B.Tech-Project-III/thirdeye/backend/agents/meet_cross_ref.py

"""
Meet Cross-Reference Agent
Finds connections between meeting signals and existing Telegram group signals.
Surfaces: confirmations (meeting agrees with chat), contradictions (meeting contradicts chat),
and blind spots (meeting discusses something chat groups don't know about).
"""
import logging
from backend.providers import call_llm
from backend.db.chroma import query_signals, get_all_signals
from backend.config import MEET_CROSS_REF_GROUPS, MEET_DEFAULT_GROUP_ID

logger = logging.getLogger("thirdeye.agents.meet_cross_ref")

CROSS_REF_SYSTEM_PROMPT = """You are an expert at finding connections between meeting discussions and team chat history.

You will receive:
1. MEETING SIGNALS — decisions, action items, blockers, risks from a recent Google Meet
2. CHAT SIGNALS — existing signals from team Telegram groups

Find meaningful connections across three categories:

CONFIRMATIONS: Meeting agrees with or reinforces something from chat history
CONTRADICTIONS: Meeting decision conflicts with what was said/decided in chat
BLIND SPOTS: Important things from the meeting that the chat teams don't seem to know about

Return ONLY a valid JSON object:
{
  "confirmations": [
    {"meeting_signal": "...", "chat_signal": "...", "group": "...", "significance": "high|medium|low"}
  ],
  "contradictions": [
    {"meeting_signal": "...", "chat_signal": "...", "group": "...", "impact": "...", "significance": "high|medium|low"}
  ],
  "blind_spots": [
    {"meeting_signal": "...", "teams_unaware": ["group1", "group2"], "recommendation": "..."}
  ]
}

Rules:
- Only include HIGH confidence matches — do not stretch for weak connections
- Keep each signal description concise (1 sentence max)
- significance "high" = this matters for team alignment; "medium" = worth noting; "low" = minor
- If a category has nothing meaningful, use an empty array []
- Return JSON only"""


async def find_cross_references(
    meeting_id: str,
    group_id: str = None,
    cross_ref_group_ids: list[str] = None,
) -> dict:
    """
    Compare meeting signals against chat group signals.

    Args:
        meeting_id: The meeting to analyze
        group_id: ChromaDB group where meet signals are stored (defaults to MEET_DEFAULT_GROUP_ID)
        cross_ref_group_ids: Groups to compare against (defaults to MEET_CROSS_REF_GROUPS from config)

    Returns:
        Dict with confirmations, contradictions, blind_spots lists
    """
    group_id = group_id or MEET_DEFAULT_GROUP_ID
    cross_ref_group_ids = cross_ref_group_ids or MEET_CROSS_REF_GROUPS

    if not cross_ref_group_ids:
        return {
            "confirmations": [],
            "contradictions": [],
            "blind_spots": [],
            "error": "No cross-reference groups configured. Set MEET_CROSS_REF_GROUPS in .env",
        }

    # 1. Get meeting signals (decisions, actions, blockers, risks — NOT raw chunks)
    meet_signals = query_signals(group_id, meeting_id, n_results=30)
    structured_meet = [
        s for s in meet_signals
        if s.get("metadata", {}).get("type") in ("meet_decision", "meet_action_item", "meet_blocker", "meet_risk", "meet_open_q")
    ]

    if not structured_meet:
        return {
            "confirmations": [],
            "contradictions": [],
            "blind_spots": [],
            "error": f"No structured signals found for meeting {meeting_id}. Has it been processed yet?",
        }

    # 2. Get signals from each cross-reference group
    chat_context_parts = []
    for gid in cross_ref_group_ids:
        try:
            all_sig = get_all_signals(gid)
            if all_sig:
                formatted = "\n".join([
                    f"  [{s.get('metadata', {}).get('type', '?')}] {s.get('document', '')[:120]}"
                    for s in all_sig[:20]  # Cap at 20 per group to stay within token limits
                ])
                chat_context_parts.append(f"Group '{gid}':\n{formatted}")
        except Exception as e:
            logger.warning(f"Could not load signals for group {gid}: {e}")

    if not chat_context_parts:
        return {
            "confirmations": [],
            "contradictions": [],
            "blind_spots": [],
            "error": "Could not load any signals from cross-reference groups.",
        }

    # 3. Format inputs for LLM
    meet_text = "\n".join([
        f"  [{s.get('metadata', {}).get('type', '?')}] {s.get('document', '')[:150]}" for s in structured_meet
    ])
    chat_text = "\n\n".join(chat_context_parts)

    prompt = f"""MEETING SIGNALS (from meeting: {meeting_id}):
{meet_text}

CHAT SIGNALS (from monitored Telegram groups):
{chat_text}"""

    try:
        import json
        result = await call_llm(
            task_type="reasoning",
            messages=[
                {"role": "system", "content": CROSS_REF_SYSTEM_PROMPT},
                {"role": "user", "content": prompt},
            ],
            temperature=0.2,
            max_tokens=1500,
            response_format={"type": "json_object"},
        )
        raw = result["content"].strip()
        if raw.startswith("```"):
            raw = raw.split("```")[1]
            if raw.startswith("json"):
                raw = raw[4:]
        return json.loads(raw)

    except Exception as e:
        logger.error(f"Cross-reference LLM call failed: {e}")
        return {
            "confirmations": [],
            "contradictions": [],
            "blind_spots": [],
            "error": str(e),
        }


def format_cross_ref_for_telegram(analysis: dict, meeting_id: str) -> str:
    """Format cross-reference results as a Telegram message."""
    parts = [f"🔗 *Meet ↔ Chat Cross-Reference*\nMeeting: `{meeting_id}`\n"]

    if analysis.get("error"):
        return f"⚠️ Cross-reference failed: {analysis['error']}"

    confirmations = analysis.get("confirmations", [])
    contradictions = analysis.get("contradictions", [])
    blind_spots = analysis.get("blind_spots", [])

    if not confirmations and not contradictions and not blind_spots:
        return f"🔗 *Meet ↔ Chat Cross-Reference*\nMeeting `{meeting_id}`: No significant connections found between this meeting and your chat groups."

    if confirmations:
        parts.append(f"✅ *Confirmations* ({len(confirmations)})")
        for c in confirmations[:3]:  # Cap at 3 for readability
            sig = "🔴" if c.get("significance") == "high" else "🟡"
            parts.append(f"{sig} Meeting: _{c['meeting_signal'][:100]}_")
            parts.append(f"   Matches [{c.get('group', '?')}]: _{c['chat_signal'][:100]}_\n")

    if contradictions:
        parts.append(f"⚡ *Contradictions* ({len(contradictions)}) — ACTION NEEDED")
        for c in contradictions[:3]:
            parts.append(f"🔴 Meeting decided: _{c['meeting_signal'][:100]}_")
            parts.append(f"   BUT [{c.get('group', '?')}] says: _{c['chat_signal'][:100]}_")
            if c.get("impact"):
                parts.append(f"   Impact: {c['impact'][:100]}\n")

    if blind_spots:
        parts.append(f"🔦 *Blind Spots* ({len(blind_spots)}) — Teams may not know")
        for b in blind_spots[:3]:
            parts.append(f"🟠 {b['meeting_signal'][:120]}")
            if b.get("recommendation"):
                parts.append(f"   → {b['recommendation'][:100]}\n")

    return "\n".join(parts)