"""Migration script: move data from 5 Qdrant collections → 2 collections. Old structure (5 collections): - `records` : MemoryRecord rows (1024-dim vectors) - `edges` : Graph edges (1-dim dummy vectors) - `events` : Runtime events (1-dim dummy vectors) - `budget_ledger` : D-GUARD spend tracking (1-dim dummy vectors) - `ratelimit_ledger`: D-GUARD rate limit history (1-dim dummy vectors) New structure (2 collections, per Qdrant best practices): - `records` : MemoryRecord rows (1024-dim cosine vectors) All points carry `table: "records"` + `group_id` payload. - `metadata` : Payload-only (no vectors) containing edges, events, budget_ledger, ratelimit_ledger. Each point carries `table` + `group_id` payload. Both collections use keyword indexes on `table` for co-located storage. Usage: python -m iai_mcp.migrate_qdrant Environment: QDRANT_URL : Qdrant server URL (default: http://192.168.0.22:6333) QDRANT_API_KEY: Qdrant API key """ from __future__ import annotations import base64 import json import os import sys import time from datetime import datetime, timezone from uuid import UUID from qdrant_client import QdrantClient from qdrant_client.models import Distance, PointStruct, VectorParams from qdrant_client.http.exceptions import UnexpectedResponse # --------------------------------------------------------------------------- env QDRANT_URL = os.environ.get("QDRANT_URL", "http://192.168.0.22:6333") QDRANT_API_KEY = os.environ.get("QDRANT_API_KEY") GROUP_ID = os.environ.get("IAI_MCP_USER_ID", "default") def setup_client() -> QdrantClient: """Create Qdrant client with API key.""" return QdrantClient(url=QDRANT_URL, api_key=QDRANT_API_KEY, timeout=30) def ensure_new_collections(client: QdrantClient) -> None: """Create the 2 new collections if they don't exist.""" # Collection 1: records (vectors) try: client.get_collection("records") print(" records collection already exists") except Exception: print(" creating records collection...") client.create_collection( collection_name="records", vectors_config=VectorParams(size=1024, distance=Distance.COSINE), ) # Collection 2: metadata (payload-only) try: client.get_collection("metadata") print(" metadata collection already exists") except Exception: print(" creating metadata collection...") client.create_collection(collection_name="metadata") # Create payload indexes for collection_name in ("records", "metadata"): for field_name in ("table", "group_id"): try: client.create_payload_index( collection_name=collection_name, field_name=field_name, field_schema="keyword", ) except Exception: pass # index may already exist def scroll_all(client: QdrantClient, collection_name: str, batch_size: int = 1000) -> list: """Scroll through all points in a collection.""" offset = None all_points = [] while True: points, next_offset = client.scroll( collection_name=collection_name, limit=batch_size, offset=offset, with_payload=True, with_vectors=True, ) all_points.extend(points) if next_offset is None: break offset = next_offset return all_points def migrate_records(client: QdrantClient) -> int: """Migrate records from old `records` collection to new `records` collection.""" print("\nMigrating records...") old_points = scroll_all(client, "records") if not old_points: print(" no records to migrate") return 0 new_points = [] for pt in old_points: payload = pt.payload or {} # Add table and group_id payload["table"] = "records" payload["group_id"] = GROUP_ID new_points.append(PointStruct( id=pt.id, vector=list(pt.vector) if pt.vector else [], payload=payload, )) client.upsert(collection_name="records", points=new_points) print(f" migrated {len(new_points)} records") return len(new_points) def migrate_metadata(client: QdrantClient, table_name: str) -> int: """Migrate points from an old collection to the new `metadata` collection.""" print(f"\nMigrating {table_name}...") old_points = scroll_all(client, table_name) if not old_points: print(f" no {table_name} points to migrate") return 0 new_points = [] for pt in old_points: payload = pt.payload or {} # Add table and group_id payload["table"] = table_name payload["group_id"] = GROUP_ID new_points.append(PointStruct( id=pt.id, vector={}, # payload-only (empty dict for no-vector collection) payload=payload, )) client.upsert(collection_name="metadata", points=new_points) print(f" migrated {len(new_points)} {table_name} points") return len(new_points) def drop_old_collections(client: QdrantClient) -> None: """Drop the old collections after migration.""" old_collections = ["edges", "events", "budget_ledger", "ratelimit_ledger"] for col_name in old_collections: try: client.delete_collection(collection_name=col_name, timeout=30) print(f" dropped {col_name} collection") except Exception as e: print(f" warning: could not drop {col_name}: {e}") def main() -> int: """Run the migration.""" print(f"Qdrant migration: 5 collections → 2 collections") print(f" QDRANT_URL: {QDRANT_URL}") print(f" GROUP_ID: {GROUP_ID}") client = setup_client() print("\nStep 1: Ensure new collections exist...") ensure_new_collections(client) print("\nStep 2: Migrate data...") t0 = time.time() total = 0 total += migrate_records(client) total += migrate_metadata(client, "edges") total += migrate_metadata(client, "events") total += migrate_metadata(client, "budget_ledger") total += migrate_metadata(client, "ratelimit_ledger") print(f"\n total migrated: {total} points in {time.time() - t0:.1f}s") print("\nStep 3: Drop old collections...") drop_old_collections(client) print("\nStep 4: Verify...") try: rec_count = client.get_collection("records").points_count meta_points = client.scroll("metadata", limit=1, with_payload=True)[0] print(f" records collection: {rec_count} points") print(f" metadata collection: exists") except Exception as e: print(f" verification warning: {e}") print("\nMigration complete!") return 0 if __name__ == "__main__": sys.exit(main())