mirror of
https://github.com/arkorty/B.Tech-Project-III.git
synced 2026-04-19 12:41:48 +00:00
69 lines
2.8 KiB
Python
69 lines
2.8 KiB
Python
"""
|
|
Query Agent — voice-aware signal context formatting for ThirdEye.
|
|
|
|
Provides _format_signal_for_context() which labels each ChromaDB signal with
|
|
its true origin (voice note, document, meeting, chat) so the LLM can produce
|
|
properly attributed answers like:
|
|
"Based on what @Raj said in a voice note on Mar 14 (45s), the team decided..."
|
|
"""
|
|
from datetime import datetime
|
|
|
|
|
|
VOICE_CITATION_INSTRUCTION = """
|
|
When context includes [VOICE NOTE — @name on Date (Xs)] signals, ALWAYS cite the voice note explicitly.
|
|
Example: "Based on what @Raj said in a voice note on Mar 14 (45s), the team decided to use PostgreSQL."
|
|
Never flatten voice signals into generic "the team discussed" language. Always name the speaker and source.
|
|
"""
|
|
|
|
|
|
def _format_signal_for_context(signal: dict) -> str:
|
|
"""
|
|
Format a ChromaDB signal as a context snippet for the Query Agent LLM.
|
|
Voice-sourced signals get explicit attribution so the LLM cites them correctly.
|
|
Accepts both flat signal dicts and dicts with a nested 'metadata' key.
|
|
"""
|
|
# Support both flat dicts and ChromaDB-style {"metadata": {...}, "document": ...}
|
|
meta = signal.get("metadata", signal)
|
|
|
|
source = meta.get("source", signal.get("source", "chat"))
|
|
sig_type = meta.get("type", signal.get("type", "unknown"))
|
|
summary = meta.get("summary", signal.get("summary", ""))
|
|
timestamp = meta.get("timestamp", signal.get("timestamp", ""))
|
|
|
|
date_str = ""
|
|
if timestamp:
|
|
try:
|
|
dt = datetime.fromisoformat(timestamp.replace("Z", "+00:00"))
|
|
date_str = dt.strftime("%b %d")
|
|
except Exception:
|
|
date_str = timestamp[:10]
|
|
|
|
if source == "voice":
|
|
speaker = meta.get("speaker", signal.get("speaker", "Unknown"))
|
|
duration = meta.get("voice_duration", signal.get("voice_duration", 0))
|
|
duration_str = f"{duration}s" if duration else "?"
|
|
return (
|
|
f"[VOICE NOTE — @{speaker} on {date_str} ({duration_str})] "
|
|
f"[{sig_type}] {summary}"
|
|
)
|
|
|
|
if source == "document":
|
|
return f"[DOCUMENT — {date_str}] [{sig_type}] {summary}"
|
|
|
|
if source == "link":
|
|
return f"[WEB LINK — {date_str}] [{sig_type}] {summary}"
|
|
|
|
if sig_type in ("meet_decision", "meet_action_item", "meet_blocker", "meet_summary"):
|
|
meeting_id = meta.get("meeting_id", signal.get("meeting_id", ""))
|
|
return f"[MEETING {meeting_id} — {date_str}] [{sig_type}] {summary}"
|
|
|
|
entities_raw = meta.get("entities", signal.get("entities", []))
|
|
if isinstance(entities_raw, str):
|
|
import json
|
|
try:
|
|
entities_raw = json.loads(entities_raw)
|
|
except Exception:
|
|
entities_raw = []
|
|
sender_str = entities_raw[0] if entities_raw else ""
|
|
return f"[CHAT — {sender_str} on {date_str}] [{sig_type}] {summary}"
|