B.Tech-Project-III/thirdeye/backend/agents/cross_group_analyst.py

"""Cross-Group Analyst Agent — detects blind spots between multiple teams."""

import logging
from backend.providers import call_llm
from backend.db.chroma import get_all_signals, get_group_ids
from backend.db.models import CrossGroupInsight
from backend.agents.json_utils import extract_json_object

logger = logging.getLogger("thirdeye.agents.cross_group_analyst")

SYSTEM_PROMPT = """You are the Cross-Group Intelligence Analyst for ThirdEye. This is the MOST IMPORTANT analysis.

You receive intelligence summaries from MULTIPLE Telegram groups. Your job is to find BLIND SPOTS — information in one group that should be in another.

Detect:
- blocked_handoff: Team A waiting for something from Team B, but Team B doesn't know
- conflicting_decision: Team A decided X, Team B decided the opposite
- information_silo: Critical info in Group A never reached Group B
- promise_reality_gap: Promise made in one group, but another group shows it's blocked
- duplicated_effort: Two teams working on similar things unknowingly

Respond ONLY with valid JSON (no markdown):
{"insights": [{"type": "insight_type", "description": "SPECIFIC description naming the groups, people, and topics", "group_a": {"name": "group_name", "evidence": "what was said"}, "group_b": {"name": "group_name", "evidence": "what was said or NOT said"}, "severity": "warning|critical", "recommendation": "Specific action"}]}

If no cross-group issues: {"insights": []}
Be SPECIFIC. Name the groups, people, topics, and exact conflicts."""


def _heuristic_cross_group_insights(
    group_summaries: dict[str, list[dict]],
) -> list[CrossGroupInsight]:
    """Generate best-effort cross-group insights when LLM output is unavailable."""
    insights: list[CrossGroupInsight] = []

    normalized = {}
    for group_name, signals in group_summaries.items():
        docs = [str(s.get("document", "")) for s in signals]
        combined = " ".join(docs).lower()
        signal_types = []
        for s in signals:
            signal_types.append(
                str(s.get("metadata", {}).get("type", "unknown")).lower()
            )
        normalized[group_name] = {
            "text": combined,
            "signals": signals,
            "types": signal_types,
        }

    group_names = list(normalized.keys())
    for i in range(len(group_names)):
        for j in range(i + 1, len(group_names)):
            group_a = group_names[i]
            group_b = group_names[j]
            text_a = normalized[group_a]["text"]
            text_b = normalized[group_b]["text"]
            types_a = set(normalized[group_a]["types"])
            types_b = set(normalized[group_b]["types"])

            # Detect a likely blocked handoff around design/spec dependencies.
            a_waiting = any(
                k in text_a for k in ["waiting", "blocked", "design spec", "specs"]
            )
            b_mentions_specs = any(
                k in text_b for k in ["design spec", "specs", "design"]
            )
            if a_waiting and not b_mentions_specs:
                insights.append(
                    CrossGroupInsight(
                        type="blocked_handoff",
                        description=(
                            f"{group_a} indicates dependency blockage (design/spec inputs), "
                            f"but {group_b} has no corresponding discussion of that dependency."
                        ),
                        group_a={
                            "name": group_a,
                            "evidence": "Contains waiting/blocked language tied to specs or design dependency.",
                        },
                        group_b={
                            "name": group_b,
                            "evidence": "No clear mention of design specs/dependency handoff in available signals.",
                        },
                        severity="warning",
                        recommendation=(
                            f"Create a shared handoff item between {group_a} and {group_b} for design/spec ownership "
                            "with an explicit due date."
                        ),
                    )
                )

            # Detect likely promise vs execution mismatch.
            b_promises = any(
                k in text_b
                for k in ["demo", "friday", "promised", "told the client", "ready by"]
            )
            a_blocked = any(
                k in text_a
                for k in ["blocked", "waiting", "can't proceed", "cannot proceed"]
            )
            if b_promises and a_blocked:
                insights.append(
                    CrossGroupInsight(
                        type="promise_reality_gap",
                        description=(
                            f"{group_b} signals delivery promises while {group_a} reports blockers that may prevent those commitments."
                        ),
                        group_a={
                            "name": group_a,
                            "evidence": "Signals include active blockers/waiting dependencies.",
                        },
                        group_b={
                            "name": group_b,
                            "evidence": "Signals include explicit client/demo commitments and timelines.",
                        },
                        severity="critical",
                        recommendation="Run a joint risk review and re-baseline commitments before the next client update.",
                    )
                )

            # Type-based silo detection when lexical cues are weak.
            a_operational_risk = bool(
                types_a.intersection(
                    {"recurring_bug", "workaround", "tech_debt", "deployment_risk"}
                )
            )
            b_planning_focus = bool(
                types_b.intersection(
                    {
                        "feature_request",
                        "roadmap_drift",
                        "priority_conflict",
                        "user_pain_point",
                    }
                )
            )
            if a_operational_risk and b_planning_focus:
                insights.append(
                    CrossGroupInsight(
                        type="information_silo",
                        description=(
                            f"{group_a} shows operational risk signals while {group_b} is focused on planning/user demands, "
                            "suggesting risk context is not shared across groups."
                        ),
                        group_a={
                            "name": group_a,
                            "evidence": f"Operational risk signal types: {sorted(types_a.intersection({'recurring_bug', 'workaround', 'tech_debt', 'deployment_risk'}))}",
                        },
                        group_b={
                            "name": group_b,
                            "evidence": f"Planning-focused signal types: {sorted(types_b.intersection({'feature_request', 'roadmap_drift', 'priority_conflict', 'user_pain_point'}))}",
                        },
                        severity="warning",
                        recommendation="Add a weekly cross-functional risk sync so product planning reflects current engineering constraints.",
                    )
                )

            # Check reverse direction as well.
            b_operational_risk = bool(
                types_b.intersection(
                    {"recurring_bug", "workaround", "tech_debt", "deployment_risk"}
                )
            )
            a_planning_focus = bool(
                types_a.intersection(
                    {
                        "feature_request",
                        "roadmap_drift",
                        "priority_conflict",
                        "user_pain_point",
                    }
                )
            )
            if b_operational_risk and a_planning_focus:
                insights.append(
                    CrossGroupInsight(
                        type="information_silo",
                        description=(
                            f"{group_b} shows operational risk signals while {group_a} is focused on planning/user demands, "
                            "suggesting risk context is not shared across groups."
                        ),
                        group_a={
                            "name": group_b,
                            "evidence": f"Operational risk signal types: {sorted(types_b.intersection({'recurring_bug', 'workaround', 'tech_debt', 'deployment_risk'}))}",
                        },
                        group_b={
                            "name": group_a,
                            "evidence": f"Planning-focused signal types: {sorted(types_a.intersection({'feature_request', 'roadmap_drift', 'priority_conflict', 'user_pain_point'}))}",
                        },
                        severity="warning",
                        recommendation="Add a weekly cross-functional risk sync so product planning reflects current engineering constraints.",
                    )
                )

    deduped = []
    seen_keys = set()
    for insight in insights:
        key = (insight.type, insight.group_a.get("name"), insight.group_b.get("name"))
        if key in seen_keys:
            continue
        seen_keys.add(key)
        deduped.append(insight)

    return deduped[:5]


async def analyze_cross_group(
    group_summaries: dict[str, list[dict]] = None,
) -> list[CrossGroupInsight]:
    """
    Analyze intelligence across all monitored groups to find blind spots.

    Args:
        group_summaries: Optional pre-built summaries. If None, loads from ChromaDB.
    """
    if group_summaries is None:
        group_ids = get_group_ids()
        if len(group_ids) < 2:
            logger.info("Need at least 2 groups for cross-group analysis")
            return []

        group_summaries = {}
        for gid in group_ids:
            signals = get_all_signals(gid)
            group_summaries[gid] = signals

    if len(group_summaries) < 2:
        return []

    # Format summaries for the LLM
    summary_parts = []
    for group_name, signals in group_summaries.items():
        signal_lines = []
        for s in signals[:30]:  # Limit per group to fit context
            meta = s["metadata"]
            signal_lines.append(f"  - [{meta.get('type', '?')}] {s['document'][:120]}")

        summary_parts.append(
            f"=== GROUP: {group_name} ({len(signals)} total signals) ===\n"
            + "\n".join(signal_lines)
        )

    full_summary = "\n\n".join(summary_parts)

    messages = [
        {"role": "system", "content": SYSTEM_PROMPT},
        {
            "role": "user",
            "content": f"Analyze cross-group intelligence:\n\n{full_summary}",
        },
    ]

    try:
        result = await call_llm(
            "reasoning",
            messages,
            temperature=0.2,
            max_tokens=2000,
            response_format={"type": "json_object"},
        )
        parsed = extract_json_object(result.get("content", ""))
        insights = []
        for i in parsed.get("insights", []):
            insights.append(
                CrossGroupInsight(
                    type=i.get("type", "unknown"),
                    description=i.get("description", ""),
                    group_a=i.get("group_a", {}),
                    group_b=i.get("group_b", {}),
                    severity=i.get("severity", "warning"),
                    recommendation=i.get("recommendation", ""),
                )
            )

        logger.info(f"Cross-group analysis found {len(insights)} insights")
        return insights

    except Exception as e:
        raw = ""
        if "result" in locals() and isinstance(result, dict):
            raw = str(result.get("content", ""))[:300].replace("\n", " ")
        logger.info(f"Cross-group LLM parse issue, using fallback: {e}; raw_head={raw}")
        fallback = _heuristic_cross_group_insights(group_summaries)
        if fallback:
            logger.info(
                f"Cross-group heuristic fallback produced {len(fallback)} insights"
            )
        return fallback