mirror of
https://github.com/arkorty/B.Tech-Project-III.git
synced 2026-04-19 12:41:48 +00:00
195 lines
6.6 KiB
Python
195 lines
6.6 KiB
Python
"""
|
|
Advanced ChromaDB clear script with selective deletion options.
|
|
|
|
Usage:
|
|
python scripts/clear_db_advanced.py # Interactive - clear all
|
|
python scripts/clear_db_advanced.py --force # Clear all without confirmation
|
|
python scripts/clear_db_advanced.py --group meet-sessions # Clear specific group only
|
|
python scripts/clear_db_advanced.py --meet-only # Clear only meet-related signals
|
|
python scripts/clear_db_advanced.py --test-only # Clear only test collections
|
|
"""
|
|
import sys
|
|
import os
|
|
import argparse
|
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
|
|
|
|
import chromadb
|
|
from backend.config import CHROMA_DB_PATH
|
|
from backend.db.chroma import get_group_ids
|
|
|
|
|
|
def clear_all(force=False):
|
|
"""Delete all collections."""
|
|
client = chromadb.PersistentClient(path=CHROMA_DB_PATH)
|
|
collections = client.list_collections()
|
|
|
|
if not collections:
|
|
print("✅ Database is already empty")
|
|
return
|
|
|
|
print(f"Found {len(collections)} collection(s) to delete:")
|
|
total_docs = 0
|
|
for coll in collections:
|
|
count = coll.count()
|
|
total_docs += count
|
|
print(f" - {coll.name}: {count} documents")
|
|
|
|
print(f"\nTotal: {total_docs} documents across {len(collections)} collections")
|
|
|
|
if not force:
|
|
response = input("\nType 'yes' to confirm deletion: ")
|
|
if response.lower() != 'yes':
|
|
print("❌ Deletion cancelled")
|
|
return
|
|
|
|
print("\n🗑️ Deleting...")
|
|
for coll in collections:
|
|
client.delete_collection(coll.name)
|
|
print(f" ✅ Deleted: {coll.name}")
|
|
|
|
print(f"\n✅ Deleted {len(collections)} collections, {total_docs} documents")
|
|
|
|
|
|
def clear_group(group_id: str, force=False):
|
|
"""Delete a specific group's collection."""
|
|
client = chromadb.PersistentClient(path=CHROMA_DB_PATH)
|
|
safe_name = f"ll_{group_id.replace('-', '_')}"[:63]
|
|
|
|
try:
|
|
coll = client.get_collection(safe_name)
|
|
count = coll.count()
|
|
|
|
print(f"Found collection '{safe_name}' with {count} documents")
|
|
|
|
if not force:
|
|
response = input(f"\nDelete collection '{safe_name}'? Type 'yes' to confirm: ")
|
|
if response.lower() != 'yes':
|
|
print("❌ Deletion cancelled")
|
|
return
|
|
|
|
client.delete_collection(safe_name)
|
|
print(f"✅ Deleted collection '{safe_name}' ({count} documents)")
|
|
|
|
except Exception as e:
|
|
print(f"❌ Collection '{safe_name}' not found or error: {e}")
|
|
|
|
|
|
def clear_meet_only(force=False):
|
|
"""Delete only meet-related collections."""
|
|
client = chromadb.PersistentClient(path=CHROMA_DB_PATH)
|
|
collections = client.list_collections()
|
|
|
|
# Identify meet collections (have lens=meet signals)
|
|
meet_collections = []
|
|
for coll in collections:
|
|
try:
|
|
# Sample first few docs to check if they're meet-related
|
|
results = coll.get(limit=5)
|
|
if results and results.get("metadatas"):
|
|
for meta in results["metadatas"]:
|
|
if meta.get("lens") == "meet":
|
|
meet_collections.append(coll)
|
|
break
|
|
except Exception:
|
|
pass
|
|
|
|
if not meet_collections:
|
|
print("✅ No meet-related collections found")
|
|
return
|
|
|
|
print(f"Found {len(meet_collections)} meet-related collection(s):")
|
|
total_docs = 0
|
|
for coll in meet_collections:
|
|
count = coll.count()
|
|
total_docs += count
|
|
print(f" - {coll.name}: {count} documents")
|
|
|
|
if not force:
|
|
response = input("\nType 'yes' to confirm deletion: ")
|
|
if response.lower() != 'yes':
|
|
print("❌ Deletion cancelled")
|
|
return
|
|
|
|
print("\n🗑️ Deleting meet collections...")
|
|
for coll in meet_collections:
|
|
client.delete_collection(coll.name)
|
|
print(f" ✅ Deleted: {coll.name}")
|
|
|
|
print(f"\n✅ Deleted {len(meet_collections)} collections, {total_docs} documents")
|
|
|
|
|
|
def clear_test_only(force=False):
|
|
"""Delete only test collections (names starting with 'test' or 'll_test')."""
|
|
client = chromadb.PersistentClient(path=CHROMA_DB_PATH)
|
|
collections = client.list_collections()
|
|
|
|
test_collections = [c for c in collections if 'test' in c.name.lower()]
|
|
|
|
if not test_collections:
|
|
print("✅ No test collections found")
|
|
return
|
|
|
|
print(f"Found {len(test_collections)} test collection(s):")
|
|
total_docs = 0
|
|
for coll in test_collections:
|
|
count = coll.count()
|
|
total_docs += count
|
|
print(f" - {coll.name}: {count} documents")
|
|
|
|
if not force:
|
|
response = input("\nType 'yes' to confirm deletion: ")
|
|
if response.lower() != 'yes':
|
|
print("❌ Deletion cancelled")
|
|
return
|
|
|
|
print("\n🗑️ Deleting test collections...")
|
|
for coll in test_collections:
|
|
client.delete_collection(coll.name)
|
|
print(f" ✅ Deleted: {coll.name}")
|
|
|
|
print(f"\n✅ Deleted {len(test_collections)} collections, {total_docs} documents")
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
description="Clear ChromaDB collections (signals + embeddings)",
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
epilog="""
|
|
Examples:
|
|
python scripts/clear_db_advanced.py # Clear all (interactive)
|
|
python scripts/clear_db_advanced.py --force # Clear all (no confirmation)
|
|
python scripts/clear_db_advanced.py --group acme-dev # Clear specific group
|
|
python scripts/clear_db_advanced.py --meet-only # Clear only meet signals
|
|
python scripts/clear_db_advanced.py --test-only --force # Clear test data automatically
|
|
"""
|
|
)
|
|
|
|
parser.add_argument("--force", action="store_true", help="Skip confirmation prompt")
|
|
parser.add_argument("--group", type=str, help="Delete only this specific group")
|
|
parser.add_argument("--meet-only", action="store_true", help="Delete only meet-related collections")
|
|
parser.add_argument("--test-only", action="store_true", help="Delete only test collections")
|
|
|
|
args = parser.parse_args()
|
|
|
|
print("=" * 60)
|
|
print("ChromaDB Advanced Clear Script")
|
|
print("=" * 60)
|
|
print(f"Database path: {CHROMA_DB_PATH}")
|
|
print()
|
|
|
|
# Determine which clear operation to run
|
|
if args.group:
|
|
clear_group(args.group, force=args.force)
|
|
elif args.meet_only:
|
|
clear_meet_only(force=args.force)
|
|
elif args.test_only:
|
|
clear_test_only(force=args.force)
|
|
else:
|
|
clear_all(force=args.force)
|
|
|
|
print("\n" + "=" * 60)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|