""" Test Milestone 15: Meet transcript processing agent. Tests signal extraction from transcript text WITHOUT needing the extension or Chrome. """ import asyncio import os import sys sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) # Sample meeting transcript (realistic, multi-topic) SAMPLE_TRANSCRIPT_1 = """ Alex: Alright, let's get started. So the main thing today is the database migration. Sam: Yeah, we've been going back and forth but I think we should just commit to PostgreSQL. It has better support for our JSON query patterns and the team already knows it. Alex: Agreed, let's make that the decision. We go with PostgreSQL. Priya: I can set up the initial schema. I'll have it ready by Thursday. Sam: Great. One thing though — the legacy MySQL tables still have some data we need to migrate. I have no idea how long that's going to take. That's a real risk. Alex: Who's owning the migration scripts? Priya: I'll do it, but I'll need the final schema signed off before I start. That's a blocker for me. Sam: When do we need the migration done by? Alex: End of sprint, so March 28th. Sam: Can we do that? I'm not sure. Alex: We'll try. Priya, can you at least start the schema this week? Priya: Yes, schema by Thursday, migration scripts next week if all goes well. """ SAMPLE_TRANSCRIPT_2 = """ Lisa: Moving on — the client dashboard is still blocked waiting on design specs. Alex: Yeah that's been two weeks now. We literally cannot start without those specs. Lisa: I know, I'll follow up with design today. That's on me. Sam: Also, the checkout endpoint is still hitting intermittent timeouts. Third time this sprint. We need to actually fix this, not just restart pods. Alex: Agreed, that needs an owner. Sam can you pick that up? Sam: Yeah I'll investigate this week. I'll add a ticket. Lisa: Any risks before we close? Priya: The OAuth integration is touching a lot of the auth layer. If something breaks there, it could affect all our users at once. High risk. Alex: Good call. Let's make sure we do that in a feature branch and have a rollback plan. """ async def test_signal_extraction_chunk_1(): """Test extraction from a decision-heavy transcript.""" from backend.agents.meet_ingestor import process_meet_chunk from backend.db.chroma import query_signals import chromadb from backend.config import CHROMA_DB_PATH group_id = "test_meet_m15_a" meeting_id = "sprint-planning-m15" print("Testing signal extraction from transcript chunk 1 (decisions + action items)...") signals = await process_meet_chunk( meeting_id=meeting_id, group_id=group_id, chunk_index=0, text=SAMPLE_TRANSCRIPT_1.strip(), speaker="Alex", timestamp="2026-03-21T10:00:00Z", is_final=False, ) assert len(signals) > 0, "Expected at least some signals to be extracted" print(f" ✅ {len(signals)} total signals produced") types = [s["type"] for s in signals] print(f" Types found: {set(types)}") # Must have at least a raw chunk assert "meet_chunk_raw" in types, "Expected raw chunk signal" print(" ✅ Raw chunk stored (enables full-text search)") # Should have extracted decisions (PostgreSQL decision is clear) decisions = [s for s in signals if s["type"] == "meet_decision"] assert len(decisions) > 0, "Expected at least one decision (PostgreSQL decision is explicit)" print(f" ✅ {len(decisions)} decision signal(s) extracted") print(f" First decision: {decisions[0]['summary'][:100]}") # Should have extracted action items (Priya - schema by Thursday) actions = [s for s in signals if s["type"] == "meet_action_item"] assert len(actions) > 0, "Expected at least one action item (Priya - schema by Thursday)" print(f" ✅ {len(actions)} action item(s) extracted") print(f" First action: {actions[0]['summary'][:100]}") # Verify signals are in ChromaDB results = query_signals(group_id, "database decision PostgreSQL") assert len(results) > 0, "Expected signals to be queryable from ChromaDB" print(f" ✅ Signals queryable from ChromaDB ({len(results)} results for 'database decision PostgreSQL')") # Cleanup client = chromadb.PersistentClient(path=CHROMA_DB_PATH) try: client.delete_collection(f"ll_{group_id}") except Exception: pass async def test_signal_extraction_chunk_2(): """Test extraction from a blocker + risk heavy transcript.""" from backend.agents.meet_ingestor import process_meet_chunk import chromadb from backend.config import CHROMA_DB_PATH group_id = "test_meet_m15_b" meeting_id = "standup-m15" print("\nTesting signal extraction from transcript chunk 2 (blockers + risks)...") signals = await process_meet_chunk( meeting_id=meeting_id, group_id=group_id, chunk_index=0, text=SAMPLE_TRANSCRIPT_2.strip(), speaker="Lisa", timestamp="2026-03-21T10:30:00Z", is_final=False, ) types = [s["type"] for s in signals] print(f" Types found: {set(types)}") blockers = [s for s in signals if s["type"] == "meet_blocker"] risks = [s for s in signals if s["type"] == "meet_risk"] assert len(blockers) > 0, "Expected at least one blocker (dashboard blocked on design specs)" print(f" ✅ {len(blockers)} blocker(s) extracted") print(f" First blocker: {blockers[0]['summary'][:100]}") assert len(risks) > 0, "Expected at least one risk (OAuth touching auth layer)" print(f" ✅ {len(risks)} risk(s) extracted") print(f" First risk: {risks[0]['summary'][:100]}") # Cleanup client = chromadb.PersistentClient(path=CHROMA_DB_PATH) try: client.delete_collection(f"ll_{group_id}") except Exception: pass async def test_final_chunk_generates_summary(): """Test that is_final=True triggers a summary signal generation.""" from backend.agents.meet_ingestor import process_meet_chunk import chromadb from backend.config import CHROMA_DB_PATH group_id = "test_meet_m15_c" meeting_id = "full-meeting-m15" print("\nTesting final chunk triggers meeting summary...") # First chunk await process_meet_chunk( meeting_id=meeting_id, group_id=group_id, chunk_index=0, text=SAMPLE_TRANSCRIPT_1.strip(), speaker="Alex", timestamp="2026-03-21T10:00:00Z", is_final=False, ) # Final chunk signals = await process_meet_chunk( meeting_id=meeting_id, group_id=group_id, chunk_index=1, text=SAMPLE_TRANSCRIPT_2.strip(), speaker="Lisa", timestamp="2026-03-21T10:30:00Z", is_final=True, ) types = [s["type"] for s in signals] assert "meet_summary" in types, "Expected a meet_summary signal on is_final=True" summary_sig = next(s for s in signals if s["type"] == "meet_summary") assert len(summary_sig["summary"]) > 50, "Summary should be at least 50 chars" print(f" ✅ Meeting summary generated ({len(summary_sig['summary'])} chars)") print(f" Preview: {summary_sig['summary'][:150]}...") # Cleanup client = chromadb.PersistentClient(path=CHROMA_DB_PATH) try: client.delete_collection(f"ll_{group_id}") except Exception: pass async def test_signals_coexist_with_chat_signals(): """Test that meet signals are queryable alongside existing chat signals.""" from backend.agents.meet_ingestor import process_meet_chunk from backend.pipeline import process_message_batch, query_knowledge, set_lens import chromadb from backend.config import CHROMA_DB_PATH group_id = "test_meet_m15_d" meeting_id = "integration-test-m15" set_lens(group_id, "dev") print("\nTesting meet signals + chat signals coexist...") # Add chat signals chat_messages = [ {"sender": "Alex", "text": "The team agreed in a previous meeting we'd use Redis for caching.", "timestamp": "2026-03-20T09:00:00Z"}, {"sender": "Priya", "text": "The timeout bug on checkout is still unresolved from last sprint.", "timestamp": "2026-03-20T09:05:00Z"}, ] await process_message_batch(group_id, chat_messages) print(" ✅ Chat signals stored") # Add meet signals await process_meet_chunk( meeting_id=meeting_id, group_id=group_id, chunk_index=0, text="We decided in today's meeting to switch from Redis to Memcached for the caching layer. Sam will update the config by Friday.", speaker="Alex", timestamp="2026-03-21T10:00:00Z", is_final=False, ) print(" ✅ Meet signals stored") # Query across both answer = await query_knowledge(group_id, "What did we decide about caching?") assert len(answer) > 20, "Expected a substantive answer about caching" print(f" ✅ Query across chat + meet: {answer[:120]}...") # Cleanup client = chromadb.PersistentClient(path=CHROMA_DB_PATH) try: client.delete_collection(f"ll_{group_id}") except Exception: pass async def main(): print("Running Milestone 15 tests...\n") await test_signal_extraction_chunk_1() await test_signal_extraction_chunk_2() await test_final_chunk_generates_summary() await test_signals_coexist_with_chat_signals() print("\n🎉 MILESTONE 15 PASSED — Meet transcript agent extracting and storing signals correctly") asyncio.run(main())