mirror of
https://github.com/arkorty/B.Tech-Project-III.git
synced 2026-04-19 12:41:48 +00:00
129 lines
6.0 KiB
Python
129 lines
6.0 KiB
Python
"""Signal Extractor Agent — extracts structured signals from chat messages."""
|
|
import logging
|
|
from backend.providers import call_llm
|
|
from backend.db.models import Signal
|
|
from datetime import datetime
|
|
from backend.agents.json_utils import extract_json_object
|
|
|
|
logger = logging.getLogger("thirdeye.agents.signal_extractor")
|
|
|
|
# Lens-specific system prompts
|
|
LENS_PROMPTS = {
|
|
"dev": """You are the Signal Extractor for ThirdEye operating in DevLens mode.
|
|
You analyze batches of developer team chat messages and extract STRUCTURED SIGNALS.
|
|
|
|
Extract ONLY signals that represent meaningful technical information. Skip greetings, small talk, emoji reactions, and meta-conversation.
|
|
|
|
Signal types to look for:
|
|
- architecture_decision: Technology choices, design decisions with rationale
|
|
- tech_debt: Shortcuts, hardcoded values, "will fix later" patterns
|
|
- knowledge_silo_evidence: Only one person discusses a critical topic
|
|
- recurring_bug: Same issue mentioned repeatedly
|
|
- stack_decision: Technology/framework choices (proposed or decided)
|
|
- deployment_risk: Risky deployment practices
|
|
- workaround: Temporary fixes being applied repeatedly
|
|
- delivery_commitment: A team member gives an explicit or estimated timeline/ETA ("will take 2 days", "done by Friday", "approx 3 hours")
|
|
|
|
Pay SPECIAL attention to delivery commitments — capture the person's name, the work item, and the exact time estimate.
|
|
For EACH signal found, include it in the JSON array. If NO meaningful signals exist, return empty array.
|
|
Be SELECTIVE. Quality over quantity.""",
|
|
|
|
"product": """You are the Signal Extractor for ThirdEye operating in ProductLens mode.
|
|
|
|
Signal types to look for:
|
|
- feature_request: Features users or team members are asking for
|
|
- delivery_commitment: A team member gives an explicit or estimated timeline/ETA ("will take 2 days", "done by Friday", "approx 3 hours")
|
|
- user_pain_point: User difficulties, complaints, confusion
|
|
- roadmap_drift: Discussion of topics not on the current plan
|
|
- priority_conflict: Team members disagreeing on what's most important
|
|
- metric_mention: Specific numbers, conversion rates, performance data
|
|
- user_quote: Direct quotes from users/customers
|
|
- competitor_intel: Mentions of competitor actions or features
|
|
|
|
Pay SPECIAL attention to delivery commitments — capture the person's name, the work item, and the exact time estimate.
|
|
Be SELECTIVE. Quality over quantity.""",
|
|
|
|
"client": """You are the Signal Extractor for ThirdEye operating in ClientLens mode.
|
|
|
|
Signal types to look for:
|
|
- promise: Commitments made with deadlines (explicit or implicit)
|
|
- scope_creep: Additional requests introduced casually without formal change requests
|
|
- sentiment_signal: Tone changes (positive praise, growing frustration, formality shifts)
|
|
- unanswered_request: Questions or requests that haven't received responses
|
|
- satisfaction: Explicit positive or negative feedback
|
|
- escalation_risk: Mentions of involving management, expressing deadline concerns
|
|
- client_decision: Decisions made by the client
|
|
|
|
Pay SPECIAL attention to implicit deadlines ("by end of week", "before the meeting").
|
|
Be SELECTIVE. Quality over quantity.""",
|
|
|
|
"community": """You are the Signal Extractor for ThirdEye operating in CommunityLens mode.
|
|
|
|
Signal types: recommendation, event, issue, local_knowledge, question
|
|
Be SELECTIVE. Quality over quantity.""",
|
|
}
|
|
|
|
EXTRACTION_FORMAT = """
|
|
Respond ONLY with valid JSON in this exact format (no markdown, no backticks, no explanation):
|
|
{"signals": [{"type": "signal_type_here", "summary": "One clear sentence that includes specific names, numbers, timelines, and commitments", "entities": ["@person", "technology"], "severity": "low|medium|high|critical", "status": "proposed|decided|implemented|unresolved", "raw_quote": "Exact verbatim sentence(s) from the message that capture the full claim, including names, numbers, and timelines", "message_index": 0}]}
|
|
|
|
IMPORTANT for raw_quote: copy the FULL relevant sentence from the message, not just a topic keyword.
|
|
Example — message "Anirban: feature page revamp will take approx 2 more days"
|
|
WRONG raw_quote: "feature page revamp"
|
|
CORRECT raw_quote: "feature page revamp will take approx 2 more days"
|
|
|
|
If no signals found: {"signals": []}
|
|
"""
|
|
|
|
|
|
async def extract_signals(messages_text: str, group_id: str, lens: str = "dev") -> list[Signal]:
|
|
"""
|
|
Extract structured signals from a batch of formatted chat messages.
|
|
|
|
Args:
|
|
messages_text: Formatted string like "[Alex]: Let's use Redis\\n[Bob]: Agreed"
|
|
group_id: Telegram group ID
|
|
lens: Active lens mode (dev, product, client, community)
|
|
|
|
Returns:
|
|
List of Signal objects
|
|
"""
|
|
system_prompt = LENS_PROMPTS.get(lens, LENS_PROMPTS["dev"])
|
|
|
|
messages = [
|
|
{"role": "system", "content": system_prompt + "\n\n" + EXTRACTION_FORMAT},
|
|
{"role": "user", "content": f"Extract signals from these messages:\n\n{messages_text}"},
|
|
]
|
|
|
|
try:
|
|
result = await call_llm("fast_large", messages, temperature=0.2, max_tokens=2000)
|
|
parsed = extract_json_object(result.get("content", ""))
|
|
raw_signals = parsed.get("signals", [])
|
|
|
|
# Convert to Signal objects
|
|
signals = []
|
|
for raw in raw_signals:
|
|
try:
|
|
signal = Signal(
|
|
group_id=group_id,
|
|
lens=lens,
|
|
type=raw.get("type", "unknown"),
|
|
summary=raw.get("summary", ""),
|
|
entities=raw.get("entities", []),
|
|
severity=raw.get("severity", "low"),
|
|
status=raw.get("status", "unknown"),
|
|
raw_quote=raw.get("raw_quote", ""),
|
|
timestamp=datetime.utcnow().isoformat(),
|
|
)
|
|
signals.append(signal)
|
|
except Exception as e:
|
|
logger.warning(f"Failed to parse signal: {e}")
|
|
continue
|
|
|
|
logger.info(f"Extracted {len(signals)} signals from {group_id} (lens={lens}) via {result['provider']}")
|
|
return signals
|
|
|
|
except Exception as e:
|
|
logger.error(f"Signal extraction failed: {e}")
|
|
return []
|