mirror of
https://github.com/arkorty/B.Tech-Project-III.git
synced 2026-04-19 12:41:48 +00:00
246 lines
9.3 KiB
Python
246 lines
9.3 KiB
Python
"""
|
|
Test Milestone 15: Meet transcript processing agent.
|
|
Tests signal extraction from transcript text WITHOUT needing the extension or Chrome.
|
|
"""
|
|
import asyncio
|
|
import os
|
|
import sys
|
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
|
|
|
|
|
|
# Sample meeting transcript (realistic, multi-topic)
|
|
SAMPLE_TRANSCRIPT_1 = """
|
|
Alex: Alright, let's get started. So the main thing today is the database migration.
|
|
Sam: Yeah, we've been going back and forth but I think we should just commit to PostgreSQL.
|
|
It has better support for our JSON query patterns and the team already knows it.
|
|
Alex: Agreed, let's make that the decision. We go with PostgreSQL.
|
|
Priya: I can set up the initial schema. I'll have it ready by Thursday.
|
|
Sam: Great. One thing though — the legacy MySQL tables still have some data we need to migrate.
|
|
I have no idea how long that's going to take. That's a real risk.
|
|
Alex: Who's owning the migration scripts?
|
|
Priya: I'll do it, but I'll need the final schema signed off before I start. That's a blocker for me.
|
|
Sam: When do we need the migration done by?
|
|
Alex: End of sprint, so March 28th.
|
|
Sam: Can we do that? I'm not sure.
|
|
Alex: We'll try. Priya, can you at least start the schema this week?
|
|
Priya: Yes, schema by Thursday, migration scripts next week if all goes well.
|
|
"""
|
|
|
|
SAMPLE_TRANSCRIPT_2 = """
|
|
Lisa: Moving on — the client dashboard is still blocked waiting on design specs.
|
|
Alex: Yeah that's been two weeks now. We literally cannot start without those specs.
|
|
Lisa: I know, I'll follow up with design today. That's on me.
|
|
Sam: Also, the checkout endpoint is still hitting intermittent timeouts.
|
|
Third time this sprint. We need to actually fix this, not just restart pods.
|
|
Alex: Agreed, that needs an owner. Sam can you pick that up?
|
|
Sam: Yeah I'll investigate this week. I'll add a ticket.
|
|
Lisa: Any risks before we close?
|
|
Priya: The OAuth integration is touching a lot of the auth layer.
|
|
If something breaks there, it could affect all our users at once. High risk.
|
|
Alex: Good call. Let's make sure we do that in a feature branch and have a rollback plan.
|
|
"""
|
|
|
|
|
|
async def test_signal_extraction_chunk_1():
|
|
"""Test extraction from a decision-heavy transcript."""
|
|
from backend.agents.meet_ingestor import process_meet_chunk
|
|
from backend.db.chroma import query_signals
|
|
import chromadb
|
|
from backend.config import CHROMA_DB_PATH
|
|
|
|
group_id = "test_meet_m15_a"
|
|
meeting_id = "sprint-planning-m15"
|
|
|
|
print("Testing signal extraction from transcript chunk 1 (decisions + action items)...")
|
|
signals = await process_meet_chunk(
|
|
meeting_id=meeting_id,
|
|
group_id=group_id,
|
|
chunk_index=0,
|
|
text=SAMPLE_TRANSCRIPT_1.strip(),
|
|
speaker="Alex",
|
|
timestamp="2026-03-21T10:00:00Z",
|
|
is_final=False,
|
|
)
|
|
|
|
assert len(signals) > 0, "Expected at least some signals to be extracted"
|
|
print(f" ✅ {len(signals)} total signals produced")
|
|
|
|
types = [s["type"] for s in signals]
|
|
print(f" Types found: {set(types)}")
|
|
|
|
# Must have at least a raw chunk
|
|
assert "meet_chunk_raw" in types, "Expected raw chunk signal"
|
|
print(" ✅ Raw chunk stored (enables full-text search)")
|
|
|
|
# Should have extracted decisions (PostgreSQL decision is clear)
|
|
decisions = [s for s in signals if s["type"] == "meet_decision"]
|
|
assert len(decisions) > 0, "Expected at least one decision (PostgreSQL decision is explicit)"
|
|
print(f" ✅ {len(decisions)} decision signal(s) extracted")
|
|
print(f" First decision: {decisions[0]['summary'][:100]}")
|
|
|
|
# Should have extracted action items (Priya - schema by Thursday)
|
|
actions = [s for s in signals if s["type"] == "meet_action_item"]
|
|
assert len(actions) > 0, "Expected at least one action item (Priya - schema by Thursday)"
|
|
print(f" ✅ {len(actions)} action item(s) extracted")
|
|
print(f" First action: {actions[0]['summary'][:100]}")
|
|
|
|
# Verify signals are in ChromaDB
|
|
results = query_signals(group_id, "database decision PostgreSQL")
|
|
assert len(results) > 0, "Expected signals to be queryable from ChromaDB"
|
|
print(f" ✅ Signals queryable from ChromaDB ({len(results)} results for 'database decision PostgreSQL')")
|
|
|
|
# Cleanup
|
|
client = chromadb.PersistentClient(path=CHROMA_DB_PATH)
|
|
try:
|
|
client.delete_collection(f"ll_{group_id}")
|
|
except Exception:
|
|
pass
|
|
|
|
|
|
async def test_signal_extraction_chunk_2():
|
|
"""Test extraction from a blocker + risk heavy transcript."""
|
|
from backend.agents.meet_ingestor import process_meet_chunk
|
|
import chromadb
|
|
from backend.config import CHROMA_DB_PATH
|
|
|
|
group_id = "test_meet_m15_b"
|
|
meeting_id = "standup-m15"
|
|
|
|
print("\nTesting signal extraction from transcript chunk 2 (blockers + risks)...")
|
|
signals = await process_meet_chunk(
|
|
meeting_id=meeting_id,
|
|
group_id=group_id,
|
|
chunk_index=0,
|
|
text=SAMPLE_TRANSCRIPT_2.strip(),
|
|
speaker="Lisa",
|
|
timestamp="2026-03-21T10:30:00Z",
|
|
is_final=False,
|
|
)
|
|
|
|
types = [s["type"] for s in signals]
|
|
print(f" Types found: {set(types)}")
|
|
|
|
blockers = [s for s in signals if s["type"] == "meet_blocker"]
|
|
risks = [s for s in signals if s["type"] == "meet_risk"]
|
|
|
|
assert len(blockers) > 0, "Expected at least one blocker (dashboard blocked on design specs)"
|
|
print(f" ✅ {len(blockers)} blocker(s) extracted")
|
|
print(f" First blocker: {blockers[0]['summary'][:100]}")
|
|
|
|
assert len(risks) > 0, "Expected at least one risk (OAuth touching auth layer)"
|
|
print(f" ✅ {len(risks)} risk(s) extracted")
|
|
print(f" First risk: {risks[0]['summary'][:100]}")
|
|
|
|
# Cleanup
|
|
client = chromadb.PersistentClient(path=CHROMA_DB_PATH)
|
|
try:
|
|
client.delete_collection(f"ll_{group_id}")
|
|
except Exception:
|
|
pass
|
|
|
|
|
|
async def test_final_chunk_generates_summary():
|
|
"""Test that is_final=True triggers a summary signal generation."""
|
|
from backend.agents.meet_ingestor import process_meet_chunk
|
|
import chromadb
|
|
from backend.config import CHROMA_DB_PATH
|
|
|
|
group_id = "test_meet_m15_c"
|
|
meeting_id = "full-meeting-m15"
|
|
|
|
print("\nTesting final chunk triggers meeting summary...")
|
|
|
|
# First chunk
|
|
await process_meet_chunk(
|
|
meeting_id=meeting_id,
|
|
group_id=group_id,
|
|
chunk_index=0,
|
|
text=SAMPLE_TRANSCRIPT_1.strip(),
|
|
speaker="Alex",
|
|
timestamp="2026-03-21T10:00:00Z",
|
|
is_final=False,
|
|
)
|
|
|
|
# Final chunk
|
|
signals = await process_meet_chunk(
|
|
meeting_id=meeting_id,
|
|
group_id=group_id,
|
|
chunk_index=1,
|
|
text=SAMPLE_TRANSCRIPT_2.strip(),
|
|
speaker="Lisa",
|
|
timestamp="2026-03-21T10:30:00Z",
|
|
is_final=True,
|
|
)
|
|
|
|
types = [s["type"] for s in signals]
|
|
assert "meet_summary" in types, "Expected a meet_summary signal on is_final=True"
|
|
summary_sig = next(s for s in signals if s["type"] == "meet_summary")
|
|
assert len(summary_sig["summary"]) > 50, "Summary should be at least 50 chars"
|
|
print(f" ✅ Meeting summary generated ({len(summary_sig['summary'])} chars)")
|
|
print(f" Preview: {summary_sig['summary'][:150]}...")
|
|
|
|
# Cleanup
|
|
client = chromadb.PersistentClient(path=CHROMA_DB_PATH)
|
|
try:
|
|
client.delete_collection(f"ll_{group_id}")
|
|
except Exception:
|
|
pass
|
|
|
|
|
|
async def test_signals_coexist_with_chat_signals():
|
|
"""Test that meet signals are queryable alongside existing chat signals."""
|
|
from backend.agents.meet_ingestor import process_meet_chunk
|
|
from backend.pipeline import process_message_batch, query_knowledge, set_lens
|
|
import chromadb
|
|
from backend.config import CHROMA_DB_PATH
|
|
|
|
group_id = "test_meet_m15_d"
|
|
meeting_id = "integration-test-m15"
|
|
set_lens(group_id, "dev")
|
|
|
|
print("\nTesting meet signals + chat signals coexist...")
|
|
|
|
# Add chat signals
|
|
chat_messages = [
|
|
{"sender": "Alex", "text": "The team agreed in a previous meeting we'd use Redis for caching.", "timestamp": "2026-03-20T09:00:00Z"},
|
|
{"sender": "Priya", "text": "The timeout bug on checkout is still unresolved from last sprint.", "timestamp": "2026-03-20T09:05:00Z"},
|
|
]
|
|
await process_message_batch(group_id, chat_messages)
|
|
print(" ✅ Chat signals stored")
|
|
|
|
# Add meet signals
|
|
await process_meet_chunk(
|
|
meeting_id=meeting_id,
|
|
group_id=group_id,
|
|
chunk_index=0,
|
|
text="We decided in today's meeting to switch from Redis to Memcached for the caching layer. Sam will update the config by Friday.",
|
|
speaker="Alex",
|
|
timestamp="2026-03-21T10:00:00Z",
|
|
is_final=False,
|
|
)
|
|
print(" ✅ Meet signals stored")
|
|
|
|
# Query across both
|
|
answer = await query_knowledge(group_id, "What did we decide about caching?")
|
|
assert len(answer) > 20, "Expected a substantive answer about caching"
|
|
print(f" ✅ Query across chat + meet: {answer[:120]}...")
|
|
|
|
# Cleanup
|
|
client = chromadb.PersistentClient(path=CHROMA_DB_PATH)
|
|
try:
|
|
client.delete_collection(f"ll_{group_id}")
|
|
except Exception:
|
|
pass
|
|
|
|
|
|
async def main():
|
|
print("Running Milestone 15 tests...\n")
|
|
await test_signal_extraction_chunk_1()
|
|
await test_signal_extraction_chunk_2()
|
|
await test_final_chunk_generates_summary()
|
|
await test_signals_coexist_with_chat_signals()
|
|
print("\n🎉 MILESTONE 15 PASSED — Meet transcript agent extracting and storing signals correctly")
|
|
|
|
|
|
asyncio.run(main())
|