This commit is contained in:
2026-04-05 00:43:23 +05:30
commit 8be37d3e92
425 changed files with 101853 additions and 0 deletions

View File

@@ -0,0 +1,68 @@
"""
Query Agent — voice-aware signal context formatting for ThirdEye.
Provides _format_signal_for_context() which labels each ChromaDB signal with
its true origin (voice note, document, meeting, chat) so the LLM can produce
properly attributed answers like:
"Based on what @Raj said in a voice note on Mar 14 (45s), the team decided..."
"""
from datetime import datetime
VOICE_CITATION_INSTRUCTION = """
When context includes [VOICE NOTE — @name on Date (Xs)] signals, ALWAYS cite the voice note explicitly.
Example: "Based on what @Raj said in a voice note on Mar 14 (45s), the team decided to use PostgreSQL."
Never flatten voice signals into generic "the team discussed" language. Always name the speaker and source.
"""
def _format_signal_for_context(signal: dict) -> str:
"""
Format a ChromaDB signal as a context snippet for the Query Agent LLM.
Voice-sourced signals get explicit attribution so the LLM cites them correctly.
Accepts both flat signal dicts and dicts with a nested 'metadata' key.
"""
# Support both flat dicts and ChromaDB-style {"metadata": {...}, "document": ...}
meta = signal.get("metadata", signal)
source = meta.get("source", signal.get("source", "chat"))
sig_type = meta.get("type", signal.get("type", "unknown"))
summary = meta.get("summary", signal.get("summary", ""))
timestamp = meta.get("timestamp", signal.get("timestamp", ""))
date_str = ""
if timestamp:
try:
dt = datetime.fromisoformat(timestamp.replace("Z", "+00:00"))
date_str = dt.strftime("%b %d")
except Exception:
date_str = timestamp[:10]
if source == "voice":
speaker = meta.get("speaker", signal.get("speaker", "Unknown"))
duration = meta.get("voice_duration", signal.get("voice_duration", 0))
duration_str = f"{duration}s" if duration else "?"
return (
f"[VOICE NOTE — @{speaker} on {date_str} ({duration_str})] "
f"[{sig_type}] {summary}"
)
if source == "document":
return f"[DOCUMENT — {date_str}] [{sig_type}] {summary}"
if source == "link":
return f"[WEB LINK — {date_str}] [{sig_type}] {summary}"
if sig_type in ("meet_decision", "meet_action_item", "meet_blocker", "meet_summary"):
meeting_id = meta.get("meeting_id", signal.get("meeting_id", ""))
return f"[MEETING {meeting_id}{date_str}] [{sig_type}] {summary}"
entities_raw = meta.get("entities", signal.get("entities", []))
if isinstance(entities_raw, str):
import json
try:
entities_raw = json.loads(entities_raw)
except Exception:
entities_raw = []
sender_str = entities_raw[0] if entities_raw else ""
return f"[CHAT — {sender_str} on {date_str}] [{sig_type}] {summary}"