"""Signal Extractor Agent — extracts structured signals from chat messages.""" import logging from backend.providers import call_llm from backend.db.models import Signal from datetime import datetime from backend.agents.json_utils import extract_json_object logger = logging.getLogger("thirdeye.agents.signal_extractor") # Lens-specific system prompts LENS_PROMPTS = { "dev": """You are the Signal Extractor for ThirdEye operating in DevLens mode. You analyze batches of developer team chat messages and extract STRUCTURED SIGNALS. Extract ONLY signals that represent meaningful technical information. Skip greetings, small talk, emoji reactions, and meta-conversation. Signal types to look for: - architecture_decision: Technology choices, design decisions with rationale - tech_debt: Shortcuts, hardcoded values, "will fix later" patterns - knowledge_silo_evidence: Only one person discusses a critical topic - recurring_bug: Same issue mentioned repeatedly - stack_decision: Technology/framework choices (proposed or decided) - deployment_risk: Risky deployment practices - workaround: Temporary fixes being applied repeatedly - delivery_commitment: A team member gives an explicit or estimated timeline/ETA ("will take 2 days", "done by Friday", "approx 3 hours") Pay SPECIAL attention to delivery commitments — capture the person's name, the work item, and the exact time estimate. For EACH signal found, include it in the JSON array. If NO meaningful signals exist, return empty array. Be SELECTIVE. Quality over quantity.""", "product": """You are the Signal Extractor for ThirdEye operating in ProductLens mode. Signal types to look for: - feature_request: Features users or team members are asking for - delivery_commitment: A team member gives an explicit or estimated timeline/ETA ("will take 2 days", "done by Friday", "approx 3 hours") - user_pain_point: User difficulties, complaints, confusion - roadmap_drift: Discussion of topics not on the current plan - priority_conflict: Team members disagreeing on what's most important - metric_mention: Specific numbers, conversion rates, performance data - user_quote: Direct quotes from users/customers - competitor_intel: Mentions of competitor actions or features Pay SPECIAL attention to delivery commitments — capture the person's name, the work item, and the exact time estimate. Be SELECTIVE. Quality over quantity.""", "client": """You are the Signal Extractor for ThirdEye operating in ClientLens mode. Signal types to look for: - promise: Commitments made with deadlines (explicit or implicit) - scope_creep: Additional requests introduced casually without formal change requests - sentiment_signal: Tone changes (positive praise, growing frustration, formality shifts) - unanswered_request: Questions or requests that haven't received responses - satisfaction: Explicit positive or negative feedback - escalation_risk: Mentions of involving management, expressing deadline concerns - client_decision: Decisions made by the client Pay SPECIAL attention to implicit deadlines ("by end of week", "before the meeting"). Be SELECTIVE. Quality over quantity.""", "community": """You are the Signal Extractor for ThirdEye operating in CommunityLens mode. Signal types: recommendation, event, issue, local_knowledge, question Be SELECTIVE. Quality over quantity.""", } EXTRACTION_FORMAT = """ Respond ONLY with valid JSON in this exact format (no markdown, no backticks, no explanation): {"signals": [{"type": "signal_type_here", "summary": "One clear sentence that includes specific names, numbers, timelines, and commitments", "entities": ["@person", "technology"], "severity": "low|medium|high|critical", "status": "proposed|decided|implemented|unresolved", "raw_quote": "Exact verbatim sentence(s) from the message that capture the full claim, including names, numbers, and timelines", "message_index": 0}]} IMPORTANT for raw_quote: copy the FULL relevant sentence from the message, not just a topic keyword. Example — message "Anirban: feature page revamp will take approx 2 more days" WRONG raw_quote: "feature page revamp" CORRECT raw_quote: "feature page revamp will take approx 2 more days" If no signals found: {"signals": []} """ async def extract_signals(messages_text: str, group_id: str, lens: str = "dev") -> list[Signal]: """ Extract structured signals from a batch of formatted chat messages. Args: messages_text: Formatted string like "[Alex]: Let's use Redis\\n[Bob]: Agreed" group_id: Telegram group ID lens: Active lens mode (dev, product, client, community) Returns: List of Signal objects """ system_prompt = LENS_PROMPTS.get(lens, LENS_PROMPTS["dev"]) messages = [ {"role": "system", "content": system_prompt + "\n\n" + EXTRACTION_FORMAT}, {"role": "user", "content": f"Extract signals from these messages:\n\n{messages_text}"}, ] try: result = await call_llm("fast_large", messages, temperature=0.2, max_tokens=2000) parsed = extract_json_object(result.get("content", "")) raw_signals = parsed.get("signals", []) # Convert to Signal objects signals = [] for raw in raw_signals: try: signal = Signal( group_id=group_id, lens=lens, type=raw.get("type", "unknown"), summary=raw.get("summary", ""), entities=raw.get("entities", []), severity=raw.get("severity", "low"), status=raw.get("status", "unknown"), raw_quote=raw.get("raw_quote", ""), timestamp=datetime.utcnow().isoformat(), ) signals.append(signal) except Exception as e: logger.warning(f"Failed to parse signal: {e}") continue logger.info(f"Extracted {len(signals)} signals from {group_id} (lens={lens}) via {result['provider']}") return signals except Exception as e: logger.error(f"Signal extraction failed: {e}") return []