"""Test Milestone 2: ChromaDB + Embeddings working.""" import os, sys sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) def test_embeddings(): print("Testing embeddings...") from backend.db.embeddings import embed_texts, embed_query texts = ["Let's use PostgreSQL for the database", "The timeout bug is happening again"] embeddings = embed_texts(texts) assert len(embeddings) == 2, f"Expected 2 embeddings, got {len(embeddings)}" assert len(embeddings[0]) > 10, f"Embedding too short: {len(embeddings[0])}" print(f" ✅ Embedded 2 texts, dimension={len(embeddings[0])}") query_emb = embed_query("database decision") assert len(query_emb) > 10 print(f" ✅ Query embedding works, dimension={len(query_emb)}") def test_chroma(): print("Testing ChromaDB...") from backend.db.chroma import store_signals, query_signals, get_all_signals test_group = "test_group_m2" # Store test signals signals = [ { "type": "architecture_decision", "summary": "Team decided to use PostgreSQL over MongoDB for relational data", "entities": ["@alex", "postgresql", "mongodb"], "severity": "medium", "status": "decided", "raw_quote": "Let's go with Postgres, MongoDB is overkill", "timestamp": "2026-03-20T10:00:00Z", "lens": "dev", }, { "type": "tech_debt", "summary": "API URL hardcoded instead of using environment variables", "entities": ["@priya", "api_url"], "severity": "low", "status": "unresolved", "raw_quote": "Just hardcoding the URL for now", "timestamp": "2026-03-20T14:00:00Z", "lens": "dev", }, { "type": "recurring_bug", "summary": "Timeout error occurring repeatedly in payment service", "entities": ["payment_service", "timeout"], "severity": "high", "status": "unresolved", "raw_quote": "Timeout error is back again", "timestamp": "2026-03-21T09:00:00Z", "lens": "dev", }, ] store_signals(test_group, signals) print(f" ✅ Stored {len(signals)} signals") # Query results = query_signals(test_group, "database decision") assert len(results) > 0, "No results for 'database decision'" assert "postgres" in results[0]["document"].lower() or "database" in results[0]["document"].lower() print(f" ✅ Query 'database decision' returned {len(results)} results") print(f" Top result: {results[0]['document'][:80]}") # Query with type filter results2 = query_signals(test_group, "bug", signal_type="recurring_bug") assert len(results2) > 0, "No results for type=recurring_bug" print(f" ✅ Filtered query (type=recurring_bug) returned {len(results2)} results") # Get all all_sigs = get_all_signals(test_group) assert len(all_sigs) >= 3, f"Expected >=3 signals, got {len(all_sigs)}" print(f" ✅ get_all_signals returned {len(all_sigs)} signals") # Cleanup test collection import chromadb from backend.config import CHROMA_DB_PATH client = chromadb.PersistentClient(path=CHROMA_DB_PATH) try: client.delete_collection(f"ll_{test_group}") print(f" ✅ Cleaned up test collection") except: pass def test_models(): print("Testing data models...") from backend.db.models import Signal, Pattern, CrossGroupInsight s = Signal(group_id="test", type="tech_debt", summary="Test signal") assert s.id is not None assert s.severity == "low" print(f" ✅ Signal model works (id={s.id[:8]}...)") p = Pattern(group_id="test", type="frequency_spike", description="Test pattern") assert p.is_active == True print(f" ✅ Pattern model works") c = CrossGroupInsight(type="blocked_handoff", description="Test insight") assert c.is_resolved == False print(f" ✅ CrossGroupInsight model works") test_embeddings() test_chroma() test_models() print("\n🎉 MILESTONE 2 PASSED — ChromaDB + Embeddings working")