""" Advanced ChromaDB clear script with selective deletion options. Usage: python scripts/clear_db_advanced.py # Interactive - clear all python scripts/clear_db_advanced.py --force # Clear all without confirmation python scripts/clear_db_advanced.py --group meet-sessions # Clear specific group only python scripts/clear_db_advanced.py --meet-only # Clear only meet-related signals python scripts/clear_db_advanced.py --test-only # Clear only test collections """ import sys import os import argparse sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) import chromadb from backend.config import CHROMA_DB_PATH from backend.db.chroma import get_group_ids def clear_all(force=False): """Delete all collections.""" client = chromadb.PersistentClient(path=CHROMA_DB_PATH) collections = client.list_collections() if not collections: print("āœ… Database is already empty") return print(f"Found {len(collections)} collection(s) to delete:") total_docs = 0 for coll in collections: count = coll.count() total_docs += count print(f" - {coll.name}: {count} documents") print(f"\nTotal: {total_docs} documents across {len(collections)} collections") if not force: response = input("\nType 'yes' to confirm deletion: ") if response.lower() != 'yes': print("āŒ Deletion cancelled") return print("\nšŸ—‘ļø Deleting...") for coll in collections: client.delete_collection(coll.name) print(f" āœ… Deleted: {coll.name}") print(f"\nāœ… Deleted {len(collections)} collections, {total_docs} documents") def clear_group(group_id: str, force=False): """Delete a specific group's collection.""" client = chromadb.PersistentClient(path=CHROMA_DB_PATH) safe_name = f"ll_{group_id.replace('-', '_')}"[:63] try: coll = client.get_collection(safe_name) count = coll.count() print(f"Found collection '{safe_name}' with {count} documents") if not force: response = input(f"\nDelete collection '{safe_name}'? Type 'yes' to confirm: ") if response.lower() != 'yes': print("āŒ Deletion cancelled") return client.delete_collection(safe_name) print(f"āœ… Deleted collection '{safe_name}' ({count} documents)") except Exception as e: print(f"āŒ Collection '{safe_name}' not found or error: {e}") def clear_meet_only(force=False): """Delete only meet-related collections.""" client = chromadb.PersistentClient(path=CHROMA_DB_PATH) collections = client.list_collections() # Identify meet collections (have lens=meet signals) meet_collections = [] for coll in collections: try: # Sample first few docs to check if they're meet-related results = coll.get(limit=5) if results and results.get("metadatas"): for meta in results["metadatas"]: if meta.get("lens") == "meet": meet_collections.append(coll) break except Exception: pass if not meet_collections: print("āœ… No meet-related collections found") return print(f"Found {len(meet_collections)} meet-related collection(s):") total_docs = 0 for coll in meet_collections: count = coll.count() total_docs += count print(f" - {coll.name}: {count} documents") if not force: response = input("\nType 'yes' to confirm deletion: ") if response.lower() != 'yes': print("āŒ Deletion cancelled") return print("\nšŸ—‘ļø Deleting meet collections...") for coll in meet_collections: client.delete_collection(coll.name) print(f" āœ… Deleted: {coll.name}") print(f"\nāœ… Deleted {len(meet_collections)} collections, {total_docs} documents") def clear_test_only(force=False): """Delete only test collections (names starting with 'test' or 'll_test').""" client = chromadb.PersistentClient(path=CHROMA_DB_PATH) collections = client.list_collections() test_collections = [c for c in collections if 'test' in c.name.lower()] if not test_collections: print("āœ… No test collections found") return print(f"Found {len(test_collections)} test collection(s):") total_docs = 0 for coll in test_collections: count = coll.count() total_docs += count print(f" - {coll.name}: {count} documents") if not force: response = input("\nType 'yes' to confirm deletion: ") if response.lower() != 'yes': print("āŒ Deletion cancelled") return print("\nšŸ—‘ļø Deleting test collections...") for coll in test_collections: client.delete_collection(coll.name) print(f" āœ… Deleted: {coll.name}") print(f"\nāœ… Deleted {len(test_collections)} collections, {total_docs} documents") def main(): parser = argparse.ArgumentParser( description="Clear ChromaDB collections (signals + embeddings)", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples: python scripts/clear_db_advanced.py # Clear all (interactive) python scripts/clear_db_advanced.py --force # Clear all (no confirmation) python scripts/clear_db_advanced.py --group acme-dev # Clear specific group python scripts/clear_db_advanced.py --meet-only # Clear only meet signals python scripts/clear_db_advanced.py --test-only --force # Clear test data automatically """ ) parser.add_argument("--force", action="store_true", help="Skip confirmation prompt") parser.add_argument("--group", type=str, help="Delete only this specific group") parser.add_argument("--meet-only", action="store_true", help="Delete only meet-related collections") parser.add_argument("--test-only", action="store_true", help="Delete only test collections") args = parser.parse_args() print("=" * 60) print("ChromaDB Advanced Clear Script") print("=" * 60) print(f"Database path: {CHROMA_DB_PATH}") print() # Determine which clear operation to run if args.group: clear_group(args.group, force=args.force) elif args.meet_only: clear_meet_only(force=args.force) elif args.test_only: clear_test_only(force=args.force) else: clear_all(force=args.force) print("\n" + "=" * 60) if __name__ == "__main__": main()