B.Tech-Project-III/thirdeye/backend/agents/query_agent.py

"""
Query Agent — voice-aware signal context formatting for ThirdEye.

Provides _format_signal_for_context() which labels each ChromaDB signal with
its true origin (voice note, document, meeting, chat) so the LLM can produce
properly attributed answers like:
  "Based on what @Raj said in a voice note on Mar 14 (45s), the team decided..."
"""
from datetime import datetime


VOICE_CITATION_INSTRUCTION = """
When context includes [VOICE NOTE — @name on Date (Xs)] signals, ALWAYS cite the voice note explicitly.
Example: "Based on what @Raj said in a voice note on Mar 14 (45s), the team decided to use PostgreSQL."
Never flatten voice signals into generic "the team discussed" language. Always name the speaker and source.
"""


def _format_signal_for_context(signal: dict) -> str:
    """
    Format a ChromaDB signal as a context snippet for the Query Agent LLM.
    Voice-sourced signals get explicit attribution so the LLM cites them correctly.
    Accepts both flat signal dicts and dicts with a nested 'metadata' key.
    """
    # Support both flat dicts and ChromaDB-style {"metadata": {...}, "document": ...}
    meta = signal.get("metadata", signal)

    source = meta.get("source", signal.get("source", "chat"))
    sig_type = meta.get("type", signal.get("type", "unknown"))
    summary = meta.get("summary", signal.get("summary", ""))
    timestamp = meta.get("timestamp", signal.get("timestamp", ""))

    date_str = ""
    if timestamp:
        try:
            dt = datetime.fromisoformat(timestamp.replace("Z", "+00:00"))
            date_str = dt.strftime("%b %d")
        except Exception:
            date_str = timestamp[:10]

    if source == "voice":
        speaker = meta.get("speaker", signal.get("speaker", "Unknown"))
        duration = meta.get("voice_duration", signal.get("voice_duration", 0))
        duration_str = f"{duration}s" if duration else "?"
        return (
            f"[VOICE NOTE — @{speaker} on {date_str} ({duration_str})] "
            f"[{sig_type}] {summary}"
        )

    if source == "document":
        return f"[DOCUMENT — {date_str}] [{sig_type}] {summary}"

    if source == "link":
        return f"[WEB LINK — {date_str}] [{sig_type}] {summary}"

    if sig_type in ("meet_decision", "meet_action_item", "meet_blocker", "meet_summary"):
        meeting_id = meta.get("meeting_id", signal.get("meeting_id", ""))
        return f"[MEETING {meeting_id} — {date_str}] [{sig_type}] {summary}"

    entities_raw = meta.get("entities", signal.get("entities", []))
    if isinstance(entities_raw, str):
        import json
        try:
            entities_raw = json.loads(entities_raw)
        except Exception:
            entities_raw = []
    sender_str = entities_raw[0] if entities_raw else ""
    return f"[CHAT — {sender_str} on {date_str}] [{sig_type}] {summary}"