"""Context Detector Agent — auto-classifies group type from messages.""" import logging from backend.providers import call_llm from backend.agents.json_utils import extract_json_object logger = logging.getLogger("thirdeye.agents.context_detector") SYSTEM_PROMPT = """You analyze a batch of messages from a Telegram group and determine what TYPE of group this is. CLASSIFY into exactly ONE: - "dev" — Software engineering team (code, PRs, deployments, bugs, tech stack) - "product" — Product/business team (features, users, metrics, roadmap, competitors) - "client" — Client/agency channel (deliverables, timelines, approvals, invoices) - "community" — Community/interest group (recommendations, events, local info, casual) Respond ONLY with valid JSON (no markdown, no backticks): {"detected_lens": "dev|product|client|community", "confidence": 0.0-1.0, "evidence": ["signal1", "signal2", "signal3"]} """ VALID_LENSES = {"dev", "product", "client", "community"} def _heuristic_detect_context(messages_text: str) -> dict: """Rule-based fallback when LLM output is malformed/unavailable.""" text = (messages_text or "").lower() lens_keywords = { "dev": [ "bug", "deploy", "deployment", "api", "database", "schema", "postgres", "mongo", "timeout", "endpoint", "pod", "pr", "code", "docker", "stack", "integration", ], "product": [ "feature", "roadmap", "user", "users", "client", "customers", "complain", "pain", "prioritize", "priority", "enterprise", "competitor", "demo", "sso", "dark mode", "mobile", "stability", "integration", ], "client": [ "invoice", "deadline", "deliverable", "approval", "sign-off", "scope", "payment", "contract", "proposal", "timeline", "meeting", ], "community": [ "event", "meetup", "recommend", "anyone", "community", "local", "where can i", "suggestion", "friends", "weekend", ], } scores = { lens: sum(text.count(keyword) for keyword in keywords) for lens, keywords in lens_keywords.items() } best_lens = max(scores, key=scores.get) best_score = scores[best_lens] if best_score == 0: best_lens = "dev" evidence = [k for k in lens_keywords[best_lens] if k in text][:3] confidence = min(0.95, 0.35 + 0.08 * best_score) if best_score > 0 else 0.0 return { "detected_lens": best_lens, "confidence": round(confidence, 2), "evidence": evidence or ["heuristic_fallback"], } async def detect_context(messages_text: str) -> dict: """Detect group type from a batch of messages.""" messages = [ {"role": "system", "content": SYSTEM_PROMPT}, {"role": "user", "content": f"Classify this group based on these messages:\n\n{messages_text}"}, ] try: result = await call_llm( "fast_large", messages, temperature=0.1, max_tokens=300, response_format={"type": "json_object"}, ) parsed = extract_json_object(result.get("content", "")) detected_lens = str(parsed.get("detected_lens", "dev")).strip().lower() if detected_lens not in VALID_LENSES: detected_lens = "dev" confidence = parsed.get("confidence", 0.5) try: confidence = float(confidence) except (TypeError, ValueError): confidence = 0.5 evidence = parsed.get("evidence", []) if not isinstance(evidence, list): evidence = [str(evidence)] return { "detected_lens": detected_lens, "confidence": max(0.0, min(1.0, confidence)), "evidence": [str(x) for x in evidence][:5], } except Exception as e: logger.error(f"Context detection failed: {e}") fallback = _heuristic_detect_context(messages_text) fallback["evidence"] = fallback["evidence"] + ["detection_failed"] return fallback