""" Query Agent — voice-aware signal context formatting for ThirdEye. Provides _format_signal_for_context() which labels each ChromaDB signal with its true origin (voice note, document, meeting, chat) so the LLM can produce properly attributed answers like: "Based on what @Raj said in a voice note on Mar 14 (45s), the team decided..." """ from datetime import datetime VOICE_CITATION_INSTRUCTION = """ When context includes [VOICE NOTE — @name on Date (Xs)] signals, ALWAYS cite the voice note explicitly. Example: "Based on what @Raj said in a voice note on Mar 14 (45s), the team decided to use PostgreSQL." Never flatten voice signals into generic "the team discussed" language. Always name the speaker and source. """ def _format_signal_for_context(signal: dict) -> str: """ Format a ChromaDB signal as a context snippet for the Query Agent LLM. Voice-sourced signals get explicit attribution so the LLM cites them correctly. Accepts both flat signal dicts and dicts with a nested 'metadata' key. """ # Support both flat dicts and ChromaDB-style {"metadata": {...}, "document": ...} meta = signal.get("metadata", signal) source = meta.get("source", signal.get("source", "chat")) sig_type = meta.get("type", signal.get("type", "unknown")) summary = meta.get("summary", signal.get("summary", "")) timestamp = meta.get("timestamp", signal.get("timestamp", "")) date_str = "" if timestamp: try: dt = datetime.fromisoformat(timestamp.replace("Z", "+00:00")) date_str = dt.strftime("%b %d") except Exception: date_str = timestamp[:10] if source == "voice": speaker = meta.get("speaker", signal.get("speaker", "Unknown")) duration = meta.get("voice_duration", signal.get("voice_duration", 0)) duration_str = f"{duration}s" if duration else "?" return ( f"[VOICE NOTE — @{speaker} on {date_str} ({duration_str})] " f"[{sig_type}] {summary}" ) if source == "document": return f"[DOCUMENT — {date_str}] [{sig_type}] {summary}" if source == "link": return f"[WEB LINK — {date_str}] [{sig_type}] {summary}" if sig_type in ("meet_decision", "meet_action_item", "meet_blocker", "meet_summary"): meeting_id = meta.get("meeting_id", signal.get("meeting_id", "")) return f"[MEETING {meeting_id} — {date_str}] [{sig_type}] {summary}" entities_raw = meta.get("entities", signal.get("entities", [])) if isinstance(entities_raw, str): import json try: entities_raw = json.loads(entities_raw) except Exception: entities_raw = [] sender_str = entities_raw[0] if entities_raw else "" return f"[CHAT — {sender_str} on {date_str}] [{sig_type}] {summary}"