This commit is contained in:
2026-04-05 00:43:23 +05:30
commit 8be37d3e92
425 changed files with 101853 additions and 0 deletions

View File

@@ -0,0 +1,84 @@
"""Web Search Agent — Tavily integration for real-time web context."""
import logging
from backend.config import TAVILY_API_KEY, ENABLE_WEB_SEARCH
logger = logging.getLogger("thirdeye.agents.web_search")
_tavily_client = None
def _get_client():
global _tavily_client
if _tavily_client is None and TAVILY_API_KEY and len(TAVILY_API_KEY) > 5:
try:
from tavily import TavilyClient
_tavily_client = TavilyClient(api_key=TAVILY_API_KEY)
logger.info("Tavily client initialized")
except ImportError:
logger.error("tavily-python not installed. Run: pip install tavily-python")
except Exception as e:
logger.error(f"Tavily client init failed: {e}")
return _tavily_client
async def search_web(query: str, max_results: int = 5) -> list[dict]:
"""
Search the web using Tavily and return structured results.
Args:
query: Search query string
max_results: Max results to return (1-10)
Returns:
List of {title, url, content, score} dicts, sorted by relevance
"""
if not ENABLE_WEB_SEARCH:
logger.info("Web search is disabled via feature flag")
return []
client = _get_client()
if not client:
logger.warning("Tavily client not available (missing API key or install)")
return []
try:
response = client.search(
query=query,
max_results=max_results,
search_depth="basic", # "basic" is faster + free-tier friendly; "advanced" for deeper
include_answer=False,
include_raw_content=False,
)
results = []
for r in response.get("results", []):
results.append({
"title": r.get("title", ""),
"url": r.get("url", ""),
"content": r.get("content", ""),
"score": r.get("score", 0.0),
})
logger.info(f"Tavily returned {len(results)} results for: {query[:60]}")
return results
except Exception as e:
logger.error(f"Tavily search failed: {e}")
return []
def format_search_results_for_llm(results: list[dict]) -> str:
"""Format Tavily results into context string for the Query Agent."""
if not results:
return ""
parts = []
for i, r in enumerate(results):
content_preview = r["content"][:500] if r["content"] else "No content"
parts.append(
f"[Web Result {i+1}] {r['title']}\n"
f"Source: {r['url']}\n"
f"Content: {content_preview}"
)
return "\n\n".join(parts)