Files
B.Tech-Project-III/thirdeye/scripts/clear_db_advanced.py
2026-04-05 00:43:23 +05:30

195 lines
6.6 KiB
Python

"""
Advanced ChromaDB clear script with selective deletion options.
Usage:
python scripts/clear_db_advanced.py # Interactive - clear all
python scripts/clear_db_advanced.py --force # Clear all without confirmation
python scripts/clear_db_advanced.py --group meet-sessions # Clear specific group only
python scripts/clear_db_advanced.py --meet-only # Clear only meet-related signals
python scripts/clear_db_advanced.py --test-only # Clear only test collections
"""
import sys
import os
import argparse
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
import chromadb
from backend.config import CHROMA_DB_PATH
from backend.db.chroma import get_group_ids
def clear_all(force=False):
"""Delete all collections."""
client = chromadb.PersistentClient(path=CHROMA_DB_PATH)
collections = client.list_collections()
if not collections:
print("✅ Database is already empty")
return
print(f"Found {len(collections)} collection(s) to delete:")
total_docs = 0
for coll in collections:
count = coll.count()
total_docs += count
print(f" - {coll.name}: {count} documents")
print(f"\nTotal: {total_docs} documents across {len(collections)} collections")
if not force:
response = input("\nType 'yes' to confirm deletion: ")
if response.lower() != 'yes':
print("❌ Deletion cancelled")
return
print("\n🗑️ Deleting...")
for coll in collections:
client.delete_collection(coll.name)
print(f" ✅ Deleted: {coll.name}")
print(f"\n✅ Deleted {len(collections)} collections, {total_docs} documents")
def clear_group(group_id: str, force=False):
"""Delete a specific group's collection."""
client = chromadb.PersistentClient(path=CHROMA_DB_PATH)
safe_name = f"ll_{group_id.replace('-', '_')}"[:63]
try:
coll = client.get_collection(safe_name)
count = coll.count()
print(f"Found collection '{safe_name}' with {count} documents")
if not force:
response = input(f"\nDelete collection '{safe_name}'? Type 'yes' to confirm: ")
if response.lower() != 'yes':
print("❌ Deletion cancelled")
return
client.delete_collection(safe_name)
print(f"✅ Deleted collection '{safe_name}' ({count} documents)")
except Exception as e:
print(f"❌ Collection '{safe_name}' not found or error: {e}")
def clear_meet_only(force=False):
"""Delete only meet-related collections."""
client = chromadb.PersistentClient(path=CHROMA_DB_PATH)
collections = client.list_collections()
# Identify meet collections (have lens=meet signals)
meet_collections = []
for coll in collections:
try:
# Sample first few docs to check if they're meet-related
results = coll.get(limit=5)
if results and results.get("metadatas"):
for meta in results["metadatas"]:
if meta.get("lens") == "meet":
meet_collections.append(coll)
break
except Exception:
pass
if not meet_collections:
print("✅ No meet-related collections found")
return
print(f"Found {len(meet_collections)} meet-related collection(s):")
total_docs = 0
for coll in meet_collections:
count = coll.count()
total_docs += count
print(f" - {coll.name}: {count} documents")
if not force:
response = input("\nType 'yes' to confirm deletion: ")
if response.lower() != 'yes':
print("❌ Deletion cancelled")
return
print("\n🗑️ Deleting meet collections...")
for coll in meet_collections:
client.delete_collection(coll.name)
print(f" ✅ Deleted: {coll.name}")
print(f"\n✅ Deleted {len(meet_collections)} collections, {total_docs} documents")
def clear_test_only(force=False):
"""Delete only test collections (names starting with 'test' or 'll_test')."""
client = chromadb.PersistentClient(path=CHROMA_DB_PATH)
collections = client.list_collections()
test_collections = [c for c in collections if 'test' in c.name.lower()]
if not test_collections:
print("✅ No test collections found")
return
print(f"Found {len(test_collections)} test collection(s):")
total_docs = 0
for coll in test_collections:
count = coll.count()
total_docs += count
print(f" - {coll.name}: {count} documents")
if not force:
response = input("\nType 'yes' to confirm deletion: ")
if response.lower() != 'yes':
print("❌ Deletion cancelled")
return
print("\n🗑️ Deleting test collections...")
for coll in test_collections:
client.delete_collection(coll.name)
print(f" ✅ Deleted: {coll.name}")
print(f"\n✅ Deleted {len(test_collections)} collections, {total_docs} documents")
def main():
parser = argparse.ArgumentParser(
description="Clear ChromaDB collections (signals + embeddings)",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
python scripts/clear_db_advanced.py # Clear all (interactive)
python scripts/clear_db_advanced.py --force # Clear all (no confirmation)
python scripts/clear_db_advanced.py --group acme-dev # Clear specific group
python scripts/clear_db_advanced.py --meet-only # Clear only meet signals
python scripts/clear_db_advanced.py --test-only --force # Clear test data automatically
"""
)
parser.add_argument("--force", action="store_true", help="Skip confirmation prompt")
parser.add_argument("--group", type=str, help="Delete only this specific group")
parser.add_argument("--meet-only", action="store_true", help="Delete only meet-related collections")
parser.add_argument("--test-only", action="store_true", help="Delete only test collections")
args = parser.parse_args()
print("=" * 60)
print("ChromaDB Advanced Clear Script")
print("=" * 60)
print(f"Database path: {CHROMA_DB_PATH}")
print()
# Determine which clear operation to run
if args.group:
clear_group(args.group, force=args.force)
elif args.meet_only:
clear_meet_only(force=args.force)
elif args.test_only:
clear_test_only(force=args.force)
else:
clear_all(force=args.force)
print("\n" + "=" * 60)
if __name__ == "__main__":
main()