init

2026-04-19 20:51:49 +00:00 · 2026-04-05 00:43:23 +05:30
commit 8be37d3e92
425 changed files with 101853 additions and 0 deletions
--- a/thirdeye/backend/agents/cross_group_analyst.py
+++ b/thirdeye/backend/agents/cross_group_analyst.py
@@ -0,0 +1,287 @@
+"""Cross-Group Analyst Agent — detects blind spots between multiple teams."""
+
+import logging
+from backend.providers import call_llm
+from backend.db.chroma import get_all_signals, get_group_ids
+from backend.db.models import CrossGroupInsight
+from backend.agents.json_utils import extract_json_object
+
+logger = logging.getLogger("thirdeye.agents.cross_group_analyst")
+
+SYSTEM_PROMPT = """You are the Cross-Group Intelligence Analyst for ThirdEye. This is the MOST IMPORTANT analysis.
+
+You receive intelligence summaries from MULTIPLE Telegram groups. Your job is to find BLIND SPOTS — information in one group that should be in another.
+
+Detect:
+- blocked_handoff: Team A waiting for something from Team B, but Team B doesn't know
+- conflicting_decision: Team A decided X, Team B decided the opposite
+- information_silo: Critical info in Group A never reached Group B
+- promise_reality_gap: Promise made in one group, but another group shows it's blocked
+- duplicated_effort: Two teams working on similar things unknowingly
+
+Respond ONLY with valid JSON (no markdown):
+{"insights": [{"type": "insight_type", "description": "SPECIFIC description naming the groups, people, and topics", "group_a": {"name": "group_name", "evidence": "what was said"}, "group_b": {"name": "group_name", "evidence": "what was said or NOT said"}, "severity": "warning|critical", "recommendation": "Specific action"}]}
+
+If no cross-group issues: {"insights": []}
+Be SPECIFIC. Name the groups, people, topics, and exact conflicts."""
+
+
+def _heuristic_cross_group_insights(
+    group_summaries: dict[str, list[dict]],
+) -> list[CrossGroupInsight]:
+    """Generate best-effort cross-group insights when LLM output is unavailable."""
+    insights: list[CrossGroupInsight] = []
+
+    normalized = {}
+    for group_name, signals in group_summaries.items():
+        docs = [str(s.get("document", "")) for s in signals]
+        combined = " ".join(docs).lower()
+        signal_types = []
+        for s in signals:
+            signal_types.append(
+                str(s.get("metadata", {}).get("type", "unknown")).lower()
+            )
+        normalized[group_name] = {
+            "text": combined,
+            "signals": signals,
+            "types": signal_types,
+        }
+
+    group_names = list(normalized.keys())
+    for i in range(len(group_names)):
+        for j in range(i + 1, len(group_names)):
+            group_a = group_names[i]
+            group_b = group_names[j]
+            text_a = normalized[group_a]["text"]
+            text_b = normalized[group_b]["text"]
+            types_a = set(normalized[group_a]["types"])
+            types_b = set(normalized[group_b]["types"])
+
+            # Detect a likely blocked handoff around design/spec dependencies.
+            a_waiting = any(
+                k in text_a for k in ["waiting", "blocked", "design spec", "specs"]
+            )
+            b_mentions_specs = any(
+                k in text_b for k in ["design spec", "specs", "design"]
+            )
+            if a_waiting and not b_mentions_specs:
+                insights.append(
+                    CrossGroupInsight(
+                        type="blocked_handoff",
+                        description=(
+                            f"{group_a} indicates dependency blockage (design/spec inputs), "
+                            f"but {group_b} has no corresponding discussion of that dependency."
+                        ),
+                        group_a={
+                            "name": group_a,
+                            "evidence": "Contains waiting/blocked language tied to specs or design dependency.",
+                        },
+                        group_b={
+                            "name": group_b,
+                            "evidence": "No clear mention of design specs/dependency handoff in available signals.",
+                        },
+                        severity="warning",
+                        recommendation=(
+                            f"Create a shared handoff item between {group_a} and {group_b} for design/spec ownership "
+                            "with an explicit due date."
+                        ),
+                    )
+                )
+
+            # Detect likely promise vs execution mismatch.
+            b_promises = any(
+                k in text_b
+                for k in ["demo", "friday", "promised", "told the client", "ready by"]
+            )
+            a_blocked = any(
+                k in text_a
+                for k in ["blocked", "waiting", "can't proceed", "cannot proceed"]
+            )
+            if b_promises and a_blocked:
+                insights.append(
+                    CrossGroupInsight(
+                        type="promise_reality_gap",
+                        description=(
+                            f"{group_b} signals delivery promises while {group_a} reports blockers that may prevent those commitments."
+                        ),
+                        group_a={
+                            "name": group_a,
+                            "evidence": "Signals include active blockers/waiting dependencies.",
+                        },
+                        group_b={
+                            "name": group_b,
+                            "evidence": "Signals include explicit client/demo commitments and timelines.",
+                        },
+                        severity="critical",
+                        recommendation="Run a joint risk review and re-baseline commitments before the next client update.",
+                    )
+                )
+
+            # Type-based silo detection when lexical cues are weak.
+            a_operational_risk = bool(
+                types_a.intersection(
+                    {"recurring_bug", "workaround", "tech_debt", "deployment_risk"}
+                )
+            )
+            b_planning_focus = bool(
+                types_b.intersection(
+                    {
+                        "feature_request",
+                        "roadmap_drift",
+                        "priority_conflict",
+                        "user_pain_point",
+                    }
+                )
+            )
+            if a_operational_risk and b_planning_focus:
+                insights.append(
+                    CrossGroupInsight(
+                        type="information_silo",
+                        description=(
+                            f"{group_a} shows operational risk signals while {group_b} is focused on planning/user demands, "
+                            "suggesting risk context is not shared across groups."
+                        ),
+                        group_a={
+                            "name": group_a,
+                            "evidence": f"Operational risk signal types: {sorted(types_a.intersection({'recurring_bug', 'workaround', 'tech_debt', 'deployment_risk'}))}",
+                        },
+                        group_b={
+                            "name": group_b,
+                            "evidence": f"Planning-focused signal types: {sorted(types_b.intersection({'feature_request', 'roadmap_drift', 'priority_conflict', 'user_pain_point'}))}",
+                        },
+                        severity="warning",
+                        recommendation="Add a weekly cross-functional risk sync so product planning reflects current engineering constraints.",
+                    )
+                )
+
+            # Check reverse direction as well.
+            b_operational_risk = bool(
+                types_b.intersection(
+                    {"recurring_bug", "workaround", "tech_debt", "deployment_risk"}
+                )
+            )
+            a_planning_focus = bool(
+                types_a.intersection(
+                    {
+                        "feature_request",
+                        "roadmap_drift",
+                        "priority_conflict",
+                        "user_pain_point",
+                    }
+                )
+            )
+            if b_operational_risk and a_planning_focus:
+                insights.append(
+                    CrossGroupInsight(
+                        type="information_silo",
+                        description=(
+                            f"{group_b} shows operational risk signals while {group_a} is focused on planning/user demands, "
+                            "suggesting risk context is not shared across groups."
+                        ),
+                        group_a={
+                            "name": group_b,
+                            "evidence": f"Operational risk signal types: {sorted(types_b.intersection({'recurring_bug', 'workaround', 'tech_debt', 'deployment_risk'}))}",
+                        },
+                        group_b={
+                            "name": group_a,
+                            "evidence": f"Planning-focused signal types: {sorted(types_a.intersection({'feature_request', 'roadmap_drift', 'priority_conflict', 'user_pain_point'}))}",
+                        },
+                        severity="warning",
+                        recommendation="Add a weekly cross-functional risk sync so product planning reflects current engineering constraints.",
+                    )
+                )
+
+    deduped = []
+    seen_keys = set()
+    for insight in insights:
+        key = (insight.type, insight.group_a.get("name"), insight.group_b.get("name"))
+        if key in seen_keys:
+            continue
+        seen_keys.add(key)
+        deduped.append(insight)
+
+    return deduped[:5]
+
+
+async def analyze_cross_group(
+    group_summaries: dict[str, list[dict]] = None,
+) -> list[CrossGroupInsight]:
+    """
+    Analyze intelligence across all monitored groups to find blind spots.
+
+    Args:
+        group_summaries: Optional pre-built summaries. If None, loads from ChromaDB.
+    """
+    if group_summaries is None:
+        group_ids = get_group_ids()
+        if len(group_ids) < 2:
+            logger.info("Need at least 2 groups for cross-group analysis")
+            return []
+
+        group_summaries = {}
+        for gid in group_ids:
+            signals = get_all_signals(gid)
+            group_summaries[gid] = signals
+
+    if len(group_summaries) < 2:
+        return []
+
+    # Format summaries for the LLM
+    summary_parts = []
+    for group_name, signals in group_summaries.items():
+        signal_lines = []
+        for s in signals[:30]:  # Limit per group to fit context
+            meta = s["metadata"]
+            signal_lines.append(f"  - [{meta.get('type', '?')}] {s['document'][:120]}")
+
+        summary_parts.append(
+            f"=== GROUP: {group_name} ({len(signals)} total signals) ===\n"
+            + "\n".join(signal_lines)
+        )
+
+    full_summary = "\n\n".join(summary_parts)
+
+    messages = [
+        {"role": "system", "content": SYSTEM_PROMPT},
+        {
+            "role": "user",
+            "content": f"Analyze cross-group intelligence:\n\n{full_summary}",
+        },
+    ]
+
+    try:
+        result = await call_llm(
+            "reasoning",
+            messages,
+            temperature=0.2,
+            max_tokens=2000,
+            response_format={"type": "json_object"},
+        )
+        parsed = extract_json_object(result.get("content", ""))
+        insights = []
+        for i in parsed.get("insights", []):
+            insights.append(
+                CrossGroupInsight(
+                    type=i.get("type", "unknown"),
+                    description=i.get("description", ""),
+                    group_a=i.get("group_a", {}),
+                    group_b=i.get("group_b", {}),
+                    severity=i.get("severity", "warning"),
+                    recommendation=i.get("recommendation", ""),
+                )
+            )
+
+        logger.info(f"Cross-group analysis found {len(insights)} insights")
+        return insights
+
+    except Exception as e:
+        raw = ""
+        if "result" in locals() and isinstance(result, dict):
+            raw = str(result.get("content", ""))[:300].replace("\n", " ")
+        logger.info(f"Cross-group LLM parse issue, using fallback: {e}; raw_head={raw}")
+        fallback = _heuristic_cross_group_insights(group_summaries)
+        if fallback:
+            logger.info(
+                f"Cross-group heuristic fallback produced {len(fallback)} insights"
+            )
+        return fallback