202 lines
6.8 KiB
Python
202 lines
6.8 KiB
Python
"""Migration script: move data from 5 Qdrant collections → 2 collections.
|
|
|
|
Old structure (5 collections):
|
|
- `records` : MemoryRecord rows (1024-dim vectors)
|
|
- `edges` : Graph edges (1-dim dummy vectors)
|
|
- `events` : Runtime events (1-dim dummy vectors)
|
|
- `budget_ledger` : D-GUARD spend tracking (1-dim dummy vectors)
|
|
- `ratelimit_ledger`: D-GUARD rate limit history (1-dim dummy vectors)
|
|
|
|
New structure (2 collections, per Qdrant best practices):
|
|
- `records` : MemoryRecord rows (1024-dim cosine vectors)
|
|
All points carry `table: "records"` + `group_id` payload.
|
|
- `metadata` : Payload-only (no vectors) containing edges, events,
|
|
budget_ledger, ratelimit_ledger.
|
|
Each point carries `table` + `group_id` payload.
|
|
|
|
Both collections use keyword indexes on `table` for co-located storage.
|
|
|
|
Usage:
|
|
python -m iai_mcp.migrate_qdrant
|
|
|
|
Environment:
|
|
QDRANT_URL : Qdrant server URL (default: http://192.168.0.22:6333)
|
|
QDRANT_API_KEY: Qdrant API key
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import base64
|
|
import json
|
|
import os
|
|
import sys
|
|
import time
|
|
from datetime import datetime, timezone
|
|
from uuid import UUID
|
|
|
|
from qdrant_client import QdrantClient
|
|
from qdrant_client.models import Distance, PointStruct, VectorParams
|
|
from qdrant_client.http.exceptions import UnexpectedResponse
|
|
|
|
# --------------------------------------------------------------------------- env
|
|
QDRANT_URL = os.environ.get("QDRANT_URL", "http://192.168.0.22:6333")
|
|
QDRANT_API_KEY = os.environ.get("QDRANT_API_KEY")
|
|
GROUP_ID = os.environ.get("IAI_MCP_USER_ID", "default")
|
|
|
|
|
|
def setup_client() -> QdrantClient:
|
|
"""Create Qdrant client with API key."""
|
|
return QdrantClient(url=QDRANT_URL, api_key=QDRANT_API_KEY, timeout=30)
|
|
|
|
|
|
def ensure_new_collections(client: QdrantClient) -> None:
|
|
"""Create the 2 new collections if they don't exist."""
|
|
# Collection 1: records (vectors)
|
|
try:
|
|
client.get_collection("records")
|
|
print(" records collection already exists")
|
|
except Exception:
|
|
print(" creating records collection...")
|
|
client.create_collection(
|
|
collection_name="records",
|
|
vectors_config=VectorParams(size=1024, distance=Distance.COSINE),
|
|
)
|
|
|
|
# Collection 2: metadata (payload-only)
|
|
try:
|
|
client.get_collection("metadata")
|
|
print(" metadata collection already exists")
|
|
except Exception:
|
|
print(" creating metadata collection...")
|
|
client.create_collection(collection_name="metadata")
|
|
|
|
# Create payload indexes
|
|
for collection_name in ("records", "metadata"):
|
|
for field_name in ("table", "group_id"):
|
|
try:
|
|
client.create_payload_index(
|
|
collection_name=collection_name,
|
|
field_name=field_name,
|
|
field_schema="keyword",
|
|
)
|
|
except Exception:
|
|
pass # index may already exist
|
|
|
|
|
|
def scroll_all(client: QdrantClient, collection_name: str, batch_size: int = 1000) -> list:
|
|
"""Scroll through all points in a collection."""
|
|
offset = None
|
|
all_points = []
|
|
while True:
|
|
points, next_offset = client.scroll(
|
|
collection_name=collection_name,
|
|
limit=batch_size,
|
|
offset=offset,
|
|
with_payload=True,
|
|
with_vectors=True,
|
|
)
|
|
all_points.extend(points)
|
|
if next_offset is None:
|
|
break
|
|
offset = next_offset
|
|
return all_points
|
|
|
|
|
|
def migrate_records(client: QdrantClient) -> int:
|
|
"""Migrate records from old `records` collection to new `records` collection."""
|
|
print("\nMigrating records...")
|
|
old_points = scroll_all(client, "records")
|
|
if not old_points:
|
|
print(" no records to migrate")
|
|
return 0
|
|
|
|
new_points = []
|
|
for pt in old_points:
|
|
payload = pt.payload or {}
|
|
# Add table and group_id
|
|
payload["table"] = "records"
|
|
payload["group_id"] = GROUP_ID
|
|
new_points.append(PointStruct(
|
|
id=pt.id,
|
|
vector=list(pt.vector) if pt.vector else [],
|
|
payload=payload,
|
|
))
|
|
|
|
client.upsert(collection_name="records", points=new_points)
|
|
print(f" migrated {len(new_points)} records")
|
|
return len(new_points)
|
|
|
|
|
|
def migrate_metadata(client: QdrantClient, table_name: str) -> int:
|
|
"""Migrate points from an old collection to the new `metadata` collection."""
|
|
print(f"\nMigrating {table_name}...")
|
|
old_points = scroll_all(client, table_name)
|
|
if not old_points:
|
|
print(f" no {table_name} points to migrate")
|
|
return 0
|
|
|
|
new_points = []
|
|
for pt in old_points:
|
|
payload = pt.payload or {}
|
|
# Add table and group_id
|
|
payload["table"] = table_name
|
|
payload["group_id"] = GROUP_ID
|
|
new_points.append(PointStruct(
|
|
id=pt.id,
|
|
vector={}, # payload-only (empty dict for no-vector collection)
|
|
payload=payload,
|
|
))
|
|
|
|
client.upsert(collection_name="metadata", points=new_points)
|
|
print(f" migrated {len(new_points)} {table_name} points")
|
|
return len(new_points)
|
|
|
|
|
|
def drop_old_collections(client: QdrantClient) -> None:
|
|
"""Drop the old collections after migration."""
|
|
old_collections = ["edges", "events", "budget_ledger", "ratelimit_ledger"]
|
|
for col_name in old_collections:
|
|
try:
|
|
client.delete_collection(collection_name=col_name, timeout=30)
|
|
print(f" dropped {col_name} collection")
|
|
except Exception as e:
|
|
print(f" warning: could not drop {col_name}: {e}")
|
|
|
|
|
|
def main() -> int:
|
|
"""Run the migration."""
|
|
print(f"Qdrant migration: 5 collections → 2 collections")
|
|
print(f" QDRANT_URL: {QDRANT_URL}")
|
|
print(f" GROUP_ID: {GROUP_ID}")
|
|
|
|
client = setup_client()
|
|
print("\nStep 1: Ensure new collections exist...")
|
|
ensure_new_collections(client)
|
|
|
|
print("\nStep 2: Migrate data...")
|
|
t0 = time.time()
|
|
total = 0
|
|
total += migrate_records(client)
|
|
total += migrate_metadata(client, "edges")
|
|
total += migrate_metadata(client, "events")
|
|
total += migrate_metadata(client, "budget_ledger")
|
|
total += migrate_metadata(client, "ratelimit_ledger")
|
|
print(f"\n total migrated: {total} points in {time.time() - t0:.1f}s")
|
|
|
|
print("\nStep 3: Drop old collections...")
|
|
drop_old_collections(client)
|
|
|
|
print("\nStep 4: Verify...")
|
|
try:
|
|
rec_count = client.get_collection("records").points_count
|
|
meta_points = client.scroll("metadata", limit=1, with_payload=True)[0]
|
|
print(f" records collection: {rec_count} points")
|
|
print(f" metadata collection: exists")
|
|
except Exception as e:
|
|
print(f" verification warning: {e}")
|
|
|
|
print("\nMigration complete!")
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|