"""Web Search Agent — Tavily integration for real-time web context.""" import logging from backend.config import TAVILY_API_KEY, ENABLE_WEB_SEARCH logger = logging.getLogger("thirdeye.agents.web_search") _tavily_client = None def _get_client(): global _tavily_client if _tavily_client is None and TAVILY_API_KEY and len(TAVILY_API_KEY) > 5: try: from tavily import TavilyClient _tavily_client = TavilyClient(api_key=TAVILY_API_KEY) logger.info("Tavily client initialized") except ImportError: logger.error("tavily-python not installed. Run: pip install tavily-python") except Exception as e: logger.error(f"Tavily client init failed: {e}") return _tavily_client async def search_web(query: str, max_results: int = 5) -> list[dict]: """ Search the web using Tavily and return structured results. Args: query: Search query string max_results: Max results to return (1-10) Returns: List of {title, url, content, score} dicts, sorted by relevance """ if not ENABLE_WEB_SEARCH: logger.info("Web search is disabled via feature flag") return [] client = _get_client() if not client: logger.warning("Tavily client not available (missing API key or install)") return [] try: response = client.search( query=query, max_results=max_results, search_depth="basic", # "basic" is faster + free-tier friendly; "advanced" for deeper include_answer=False, include_raw_content=False, ) results = [] for r in response.get("results", []): results.append({ "title": r.get("title", ""), "url": r.get("url", ""), "content": r.get("content", ""), "score": r.get("score", 0.0), }) logger.info(f"Tavily returned {len(results)} results for: {query[:60]}") return results except Exception as e: logger.error(f"Tavily search failed: {e}") return [] def format_search_results_for_llm(results: list[dict]) -> str: """Format Tavily results into context string for the Query Agent.""" if not results: return "" parts = [] for i, r in enumerate(results): content_preview = r["content"][:500] if r["content"] else "No content" parts.append( f"[Web Result {i+1}] {r['title']}\n" f"Source: {r['url']}\n" f"Content: {content_preview}" ) return "\n\n".join(parts)