This commit is contained in:
2026-04-05 00:43:23 +05:30
commit 8be37d3e92
425 changed files with 101853 additions and 0 deletions

110
thirdeye/scripts/test_m2.py Normal file
View File

@@ -0,0 +1,110 @@
"""Test Milestone 2: ChromaDB + Embeddings working."""
import os, sys
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
def test_embeddings():
print("Testing embeddings...")
from backend.db.embeddings import embed_texts, embed_query
texts = ["Let's use PostgreSQL for the database", "The timeout bug is happening again"]
embeddings = embed_texts(texts)
assert len(embeddings) == 2, f"Expected 2 embeddings, got {len(embeddings)}"
assert len(embeddings[0]) > 10, f"Embedding too short: {len(embeddings[0])}"
print(f" ✅ Embedded 2 texts, dimension={len(embeddings[0])}")
query_emb = embed_query("database decision")
assert len(query_emb) > 10
print(f" ✅ Query embedding works, dimension={len(query_emb)}")
def test_chroma():
print("Testing ChromaDB...")
from backend.db.chroma import store_signals, query_signals, get_all_signals
test_group = "test_group_m2"
# Store test signals
signals = [
{
"type": "architecture_decision",
"summary": "Team decided to use PostgreSQL over MongoDB for relational data",
"entities": ["@alex", "postgresql", "mongodb"],
"severity": "medium",
"status": "decided",
"raw_quote": "Let's go with Postgres, MongoDB is overkill",
"timestamp": "2026-03-20T10:00:00Z",
"lens": "dev",
},
{
"type": "tech_debt",
"summary": "API URL hardcoded instead of using environment variables",
"entities": ["@priya", "api_url"],
"severity": "low",
"status": "unresolved",
"raw_quote": "Just hardcoding the URL for now",
"timestamp": "2026-03-20T14:00:00Z",
"lens": "dev",
},
{
"type": "recurring_bug",
"summary": "Timeout error occurring repeatedly in payment service",
"entities": ["payment_service", "timeout"],
"severity": "high",
"status": "unresolved",
"raw_quote": "Timeout error is back again",
"timestamp": "2026-03-21T09:00:00Z",
"lens": "dev",
},
]
store_signals(test_group, signals)
print(f" ✅ Stored {len(signals)} signals")
# Query
results = query_signals(test_group, "database decision")
assert len(results) > 0, "No results for 'database decision'"
assert "postgres" in results[0]["document"].lower() or "database" in results[0]["document"].lower()
print(f" ✅ Query 'database decision' returned {len(results)} results")
print(f" Top result: {results[0]['document'][:80]}")
# Query with type filter
results2 = query_signals(test_group, "bug", signal_type="recurring_bug")
assert len(results2) > 0, "No results for type=recurring_bug"
print(f" ✅ Filtered query (type=recurring_bug) returned {len(results2)} results")
# Get all
all_sigs = get_all_signals(test_group)
assert len(all_sigs) >= 3, f"Expected >=3 signals, got {len(all_sigs)}"
print(f" ✅ get_all_signals returned {len(all_sigs)} signals")
# Cleanup test collection
import chromadb
from backend.config import CHROMA_DB_PATH
client = chromadb.PersistentClient(path=CHROMA_DB_PATH)
try:
client.delete_collection(f"ll_{test_group}")
print(f" ✅ Cleaned up test collection")
except:
pass
def test_models():
print("Testing data models...")
from backend.db.models import Signal, Pattern, CrossGroupInsight
s = Signal(group_id="test", type="tech_debt", summary="Test signal")
assert s.id is not None
assert s.severity == "low"
print(f" ✅ Signal model works (id={s.id[:8]}...)")
p = Pattern(group_id="test", type="frequency_spike", description="Test pattern")
assert p.is_active == True
print(f" ✅ Pattern model works")
c = CrossGroupInsight(type="blocked_handoff", description="Test insight")
assert c.is_resolved == False
print(f" ✅ CrossGroupInsight model works")
test_embeddings()
test_chroma()
test_models()
print("\n🎉 MILESTONE 2 PASSED — ChromaDB + Embeddings working")