iai-mcp-opencode/src/iai_mcp/migrate_qdrant.py
2026-05-12 16:45:15 +02:00

202 lines
6.8 KiB
Python

"""Migration script: move data from 5 Qdrant collections → 2 collections.
Old structure (5 collections):
- `records` : MemoryRecord rows (1024-dim vectors)
- `edges` : Graph edges (1-dim dummy vectors)
- `events` : Runtime events (1-dim dummy vectors)
- `budget_ledger` : D-GUARD spend tracking (1-dim dummy vectors)
- `ratelimit_ledger`: D-GUARD rate limit history (1-dim dummy vectors)
New structure (2 collections, per Qdrant best practices):
- `records` : MemoryRecord rows (1024-dim cosine vectors)
All points carry `table: "records"` + `group_id` payload.
- `metadata` : Payload-only (no vectors) containing edges, events,
budget_ledger, ratelimit_ledger.
Each point carries `table` + `group_id` payload.
Both collections use keyword indexes on `table` for co-located storage.
Usage:
python -m iai_mcp.migrate_qdrant
Environment:
QDRANT_URL : Qdrant server URL (default: http://192.168.0.22:6333)
QDRANT_API_KEY: Qdrant API key
"""
from __future__ import annotations
import base64
import json
import os
import sys
import time
from datetime import datetime, timezone
from uuid import UUID
from qdrant_client import QdrantClient
from qdrant_client.models import Distance, PointStruct, VectorParams
from qdrant_client.http.exceptions import UnexpectedResponse
# --------------------------------------------------------------------------- env
QDRANT_URL = os.environ.get("QDRANT_URL", "http://192.168.0.22:6333")
QDRANT_API_KEY = os.environ.get("QDRANT_API_KEY")
GROUP_ID = os.environ.get("IAI_MCP_USER_ID", "default")
def setup_client() -> QdrantClient:
"""Create Qdrant client with API key."""
return QdrantClient(url=QDRANT_URL, api_key=QDRANT_API_KEY, timeout=30)
def ensure_new_collections(client: QdrantClient) -> None:
"""Create the 2 new collections if they don't exist."""
# Collection 1: records (vectors)
try:
client.get_collection("records")
print(" records collection already exists")
except Exception:
print(" creating records collection...")
client.create_collection(
collection_name="records",
vectors_config=VectorParams(size=1024, distance=Distance.COSINE),
)
# Collection 2: metadata (payload-only)
try:
client.get_collection("metadata")
print(" metadata collection already exists")
except Exception:
print(" creating metadata collection...")
client.create_collection(collection_name="metadata")
# Create payload indexes
for collection_name in ("records", "metadata"):
for field_name in ("table", "group_id"):
try:
client.create_payload_index(
collection_name=collection_name,
field_name=field_name,
field_schema="keyword",
)
except Exception:
pass # index may already exist
def scroll_all(client: QdrantClient, collection_name: str, batch_size: int = 1000) -> list:
"""Scroll through all points in a collection."""
offset = None
all_points = []
while True:
points, next_offset = client.scroll(
collection_name=collection_name,
limit=batch_size,
offset=offset,
with_payload=True,
with_vectors=True,
)
all_points.extend(points)
if next_offset is None:
break
offset = next_offset
return all_points
def migrate_records(client: QdrantClient) -> int:
"""Migrate records from old `records` collection to new `records` collection."""
print("\nMigrating records...")
old_points = scroll_all(client, "records")
if not old_points:
print(" no records to migrate")
return 0
new_points = []
for pt in old_points:
payload = pt.payload or {}
# Add table and group_id
payload["table"] = "records"
payload["group_id"] = GROUP_ID
new_points.append(PointStruct(
id=pt.id,
vector=list(pt.vector) if pt.vector else [],
payload=payload,
))
client.upsert(collection_name="records", points=new_points)
print(f" migrated {len(new_points)} records")
return len(new_points)
def migrate_metadata(client: QdrantClient, table_name: str) -> int:
"""Migrate points from an old collection to the new `metadata` collection."""
print(f"\nMigrating {table_name}...")
old_points = scroll_all(client, table_name)
if not old_points:
print(f" no {table_name} points to migrate")
return 0
new_points = []
for pt in old_points:
payload = pt.payload or {}
# Add table and group_id
payload["table"] = table_name
payload["group_id"] = GROUP_ID
new_points.append(PointStruct(
id=pt.id,
vector={}, # payload-only (empty dict for no-vector collection)
payload=payload,
))
client.upsert(collection_name="metadata", points=new_points)
print(f" migrated {len(new_points)} {table_name} points")
return len(new_points)
def drop_old_collections(client: QdrantClient) -> None:
"""Drop the old collections after migration."""
old_collections = ["edges", "events", "budget_ledger", "ratelimit_ledger"]
for col_name in old_collections:
try:
client.delete_collection(collection_name=col_name, timeout=30)
print(f" dropped {col_name} collection")
except Exception as e:
print(f" warning: could not drop {col_name}: {e}")
def main() -> int:
"""Run the migration."""
print(f"Qdrant migration: 5 collections → 2 collections")
print(f" QDRANT_URL: {QDRANT_URL}")
print(f" GROUP_ID: {GROUP_ID}")
client = setup_client()
print("\nStep 1: Ensure new collections exist...")
ensure_new_collections(client)
print("\nStep 2: Migrate data...")
t0 = time.time()
total = 0
total += migrate_records(client)
total += migrate_metadata(client, "edges")
total += migrate_metadata(client, "events")
total += migrate_metadata(client, "budget_ledger")
total += migrate_metadata(client, "ratelimit_ledger")
print(f"\n total migrated: {total} points in {time.time() - t0:.1f}s")
print("\nStep 3: Drop old collections...")
drop_old_collections(client)
print("\nStep 4: Verify...")
try:
rec_count = client.get_collection("records").points_count
meta_points = client.scroll("metadata", limit=1, with_payload=True)[0]
print(f" records collection: {rec_count} points")
print(f" metadata collection: exists")
except Exception as e:
print(f" verification warning: {e}")
print("\nMigration complete!")
return 0
if __name__ == "__main__":
sys.exit(main())