Files
2026-04-05 00:43:23 +05:30

246 lines
9.3 KiB
Python

"""
Test Milestone 15: Meet transcript processing agent.
Tests signal extraction from transcript text WITHOUT needing the extension or Chrome.
"""
import asyncio
import os
import sys
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
# Sample meeting transcript (realistic, multi-topic)
SAMPLE_TRANSCRIPT_1 = """
Alex: Alright, let's get started. So the main thing today is the database migration.
Sam: Yeah, we've been going back and forth but I think we should just commit to PostgreSQL.
It has better support for our JSON query patterns and the team already knows it.
Alex: Agreed, let's make that the decision. We go with PostgreSQL.
Priya: I can set up the initial schema. I'll have it ready by Thursday.
Sam: Great. One thing though — the legacy MySQL tables still have some data we need to migrate.
I have no idea how long that's going to take. That's a real risk.
Alex: Who's owning the migration scripts?
Priya: I'll do it, but I'll need the final schema signed off before I start. That's a blocker for me.
Sam: When do we need the migration done by?
Alex: End of sprint, so March 28th.
Sam: Can we do that? I'm not sure.
Alex: We'll try. Priya, can you at least start the schema this week?
Priya: Yes, schema by Thursday, migration scripts next week if all goes well.
"""
SAMPLE_TRANSCRIPT_2 = """
Lisa: Moving on — the client dashboard is still blocked waiting on design specs.
Alex: Yeah that's been two weeks now. We literally cannot start without those specs.
Lisa: I know, I'll follow up with design today. That's on me.
Sam: Also, the checkout endpoint is still hitting intermittent timeouts.
Third time this sprint. We need to actually fix this, not just restart pods.
Alex: Agreed, that needs an owner. Sam can you pick that up?
Sam: Yeah I'll investigate this week. I'll add a ticket.
Lisa: Any risks before we close?
Priya: The OAuth integration is touching a lot of the auth layer.
If something breaks there, it could affect all our users at once. High risk.
Alex: Good call. Let's make sure we do that in a feature branch and have a rollback plan.
"""
async def test_signal_extraction_chunk_1():
"""Test extraction from a decision-heavy transcript."""
from backend.agents.meet_ingestor import process_meet_chunk
from backend.db.chroma import query_signals
import chromadb
from backend.config import CHROMA_DB_PATH
group_id = "test_meet_m15_a"
meeting_id = "sprint-planning-m15"
print("Testing signal extraction from transcript chunk 1 (decisions + action items)...")
signals = await process_meet_chunk(
meeting_id=meeting_id,
group_id=group_id,
chunk_index=0,
text=SAMPLE_TRANSCRIPT_1.strip(),
speaker="Alex",
timestamp="2026-03-21T10:00:00Z",
is_final=False,
)
assert len(signals) > 0, "Expected at least some signals to be extracted"
print(f"{len(signals)} total signals produced")
types = [s["type"] for s in signals]
print(f" Types found: {set(types)}")
# Must have at least a raw chunk
assert "meet_chunk_raw" in types, "Expected raw chunk signal"
print(" ✅ Raw chunk stored (enables full-text search)")
# Should have extracted decisions (PostgreSQL decision is clear)
decisions = [s for s in signals if s["type"] == "meet_decision"]
assert len(decisions) > 0, "Expected at least one decision (PostgreSQL decision is explicit)"
print(f"{len(decisions)} decision signal(s) extracted")
print(f" First decision: {decisions[0]['summary'][:100]}")
# Should have extracted action items (Priya - schema by Thursday)
actions = [s for s in signals if s["type"] == "meet_action_item"]
assert len(actions) > 0, "Expected at least one action item (Priya - schema by Thursday)"
print(f"{len(actions)} action item(s) extracted")
print(f" First action: {actions[0]['summary'][:100]}")
# Verify signals are in ChromaDB
results = query_signals(group_id, "database decision PostgreSQL")
assert len(results) > 0, "Expected signals to be queryable from ChromaDB"
print(f" ✅ Signals queryable from ChromaDB ({len(results)} results for 'database decision PostgreSQL')")
# Cleanup
client = chromadb.PersistentClient(path=CHROMA_DB_PATH)
try:
client.delete_collection(f"ll_{group_id}")
except Exception:
pass
async def test_signal_extraction_chunk_2():
"""Test extraction from a blocker + risk heavy transcript."""
from backend.agents.meet_ingestor import process_meet_chunk
import chromadb
from backend.config import CHROMA_DB_PATH
group_id = "test_meet_m15_b"
meeting_id = "standup-m15"
print("\nTesting signal extraction from transcript chunk 2 (blockers + risks)...")
signals = await process_meet_chunk(
meeting_id=meeting_id,
group_id=group_id,
chunk_index=0,
text=SAMPLE_TRANSCRIPT_2.strip(),
speaker="Lisa",
timestamp="2026-03-21T10:30:00Z",
is_final=False,
)
types = [s["type"] for s in signals]
print(f" Types found: {set(types)}")
blockers = [s for s in signals if s["type"] == "meet_blocker"]
risks = [s for s in signals if s["type"] == "meet_risk"]
assert len(blockers) > 0, "Expected at least one blocker (dashboard blocked on design specs)"
print(f"{len(blockers)} blocker(s) extracted")
print(f" First blocker: {blockers[0]['summary'][:100]}")
assert len(risks) > 0, "Expected at least one risk (OAuth touching auth layer)"
print(f"{len(risks)} risk(s) extracted")
print(f" First risk: {risks[0]['summary'][:100]}")
# Cleanup
client = chromadb.PersistentClient(path=CHROMA_DB_PATH)
try:
client.delete_collection(f"ll_{group_id}")
except Exception:
pass
async def test_final_chunk_generates_summary():
"""Test that is_final=True triggers a summary signal generation."""
from backend.agents.meet_ingestor import process_meet_chunk
import chromadb
from backend.config import CHROMA_DB_PATH
group_id = "test_meet_m15_c"
meeting_id = "full-meeting-m15"
print("\nTesting final chunk triggers meeting summary...")
# First chunk
await process_meet_chunk(
meeting_id=meeting_id,
group_id=group_id,
chunk_index=0,
text=SAMPLE_TRANSCRIPT_1.strip(),
speaker="Alex",
timestamp="2026-03-21T10:00:00Z",
is_final=False,
)
# Final chunk
signals = await process_meet_chunk(
meeting_id=meeting_id,
group_id=group_id,
chunk_index=1,
text=SAMPLE_TRANSCRIPT_2.strip(),
speaker="Lisa",
timestamp="2026-03-21T10:30:00Z",
is_final=True,
)
types = [s["type"] for s in signals]
assert "meet_summary" in types, "Expected a meet_summary signal on is_final=True"
summary_sig = next(s for s in signals if s["type"] == "meet_summary")
assert len(summary_sig["summary"]) > 50, "Summary should be at least 50 chars"
print(f" ✅ Meeting summary generated ({len(summary_sig['summary'])} chars)")
print(f" Preview: {summary_sig['summary'][:150]}...")
# Cleanup
client = chromadb.PersistentClient(path=CHROMA_DB_PATH)
try:
client.delete_collection(f"ll_{group_id}")
except Exception:
pass
async def test_signals_coexist_with_chat_signals():
"""Test that meet signals are queryable alongside existing chat signals."""
from backend.agents.meet_ingestor import process_meet_chunk
from backend.pipeline import process_message_batch, query_knowledge, set_lens
import chromadb
from backend.config import CHROMA_DB_PATH
group_id = "test_meet_m15_d"
meeting_id = "integration-test-m15"
set_lens(group_id, "dev")
print("\nTesting meet signals + chat signals coexist...")
# Add chat signals
chat_messages = [
{"sender": "Alex", "text": "The team agreed in a previous meeting we'd use Redis for caching.", "timestamp": "2026-03-20T09:00:00Z"},
{"sender": "Priya", "text": "The timeout bug on checkout is still unresolved from last sprint.", "timestamp": "2026-03-20T09:05:00Z"},
]
await process_message_batch(group_id, chat_messages)
print(" ✅ Chat signals stored")
# Add meet signals
await process_meet_chunk(
meeting_id=meeting_id,
group_id=group_id,
chunk_index=0,
text="We decided in today's meeting to switch from Redis to Memcached for the caching layer. Sam will update the config by Friday.",
speaker="Alex",
timestamp="2026-03-21T10:00:00Z",
is_final=False,
)
print(" ✅ Meet signals stored")
# Query across both
answer = await query_knowledge(group_id, "What did we decide about caching?")
assert len(answer) > 20, "Expected a substantive answer about caching"
print(f" ✅ Query across chat + meet: {answer[:120]}...")
# Cleanup
client = chromadb.PersistentClient(path=CHROMA_DB_PATH)
try:
client.delete_collection(f"ll_{group_id}")
except Exception:
pass
async def main():
print("Running Milestone 15 tests...\n")
await test_signal_extraction_chunk_1()
await test_signal_extraction_chunk_2()
await test_final_chunk_generates_summary()
await test_signals_coexist_with_chat_signals()
print("\n🎉 MILESTONE 15 PASSED — Meet transcript agent extracting and storing signals correctly")
asyncio.run(main())