refactor: opencode
This commit is contained in:
parent
09c5b30f15
commit
91d67b2e12
12 changed files with 1843 additions and 77 deletions
202
src/iai_mcp/migrate_qdrant.py
Normal file
202
src/iai_mcp/migrate_qdrant.py
Normal file
|
|
@ -0,0 +1,202 @@
|
|||
"""Migration script: move data from 5 Qdrant collections → 2 collections.
|
||||
|
||||
Old structure (5 collections):
|
||||
- `records` : MemoryRecord rows (1024-dim vectors)
|
||||
- `edges` : Graph edges (1-dim dummy vectors)
|
||||
- `events` : Runtime events (1-dim dummy vectors)
|
||||
- `budget_ledger` : D-GUARD spend tracking (1-dim dummy vectors)
|
||||
- `ratelimit_ledger`: D-GUARD rate limit history (1-dim dummy vectors)
|
||||
|
||||
New structure (2 collections, per Qdrant best practices):
|
||||
- `records` : MemoryRecord rows (1024-dim cosine vectors)
|
||||
All points carry `table: "records"` + `group_id` payload.
|
||||
- `metadata` : Payload-only (no vectors) containing edges, events,
|
||||
budget_ledger, ratelimit_ledger.
|
||||
Each point carries `table` + `group_id` payload.
|
||||
|
||||
Both collections use keyword indexes on `table` for co-located storage.
|
||||
|
||||
Usage:
|
||||
python -m iai_mcp.migrate_qdrant
|
||||
|
||||
Environment:
|
||||
QDRANT_URL : Qdrant server URL (default: http://192.168.0.22:6333)
|
||||
QDRANT_API_KEY: Qdrant API key
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
from datetime import datetime, timezone
|
||||
from uuid import UUID
|
||||
|
||||
from qdrant_client import QdrantClient
|
||||
from qdrant_client.models import Distance, PointStruct, VectorParams
|
||||
from qdrant_client.http.exceptions import UnexpectedResponse
|
||||
|
||||
# --------------------------------------------------------------------------- env
|
||||
QDRANT_URL = os.environ.get("QDRANT_URL", "http://192.168.0.22:6333")
|
||||
QDRANT_API_KEY = os.environ.get("QDRANT_API_KEY")
|
||||
GROUP_ID = os.environ.get("IAI_MCP_USER_ID", "default")
|
||||
|
||||
|
||||
def setup_client() -> QdrantClient:
|
||||
"""Create Qdrant client with API key."""
|
||||
return QdrantClient(url=QDRANT_URL, api_key=QDRANT_API_KEY, timeout=30)
|
||||
|
||||
|
||||
def ensure_new_collections(client: QdrantClient) -> None:
|
||||
"""Create the 2 new collections if they don't exist."""
|
||||
# Collection 1: records (vectors)
|
||||
try:
|
||||
client.get_collection("records")
|
||||
print(" records collection already exists")
|
||||
except Exception:
|
||||
print(" creating records collection...")
|
||||
client.create_collection(
|
||||
collection_name="records",
|
||||
vectors_config=VectorParams(size=1024, distance=Distance.COSINE),
|
||||
)
|
||||
|
||||
# Collection 2: metadata (payload-only)
|
||||
try:
|
||||
client.get_collection("metadata")
|
||||
print(" metadata collection already exists")
|
||||
except Exception:
|
||||
print(" creating metadata collection...")
|
||||
client.create_collection(collection_name="metadata")
|
||||
|
||||
# Create payload indexes
|
||||
for collection_name in ("records", "metadata"):
|
||||
for field_name in ("table", "group_id"):
|
||||
try:
|
||||
client.create_payload_index(
|
||||
collection_name=collection_name,
|
||||
field_name=field_name,
|
||||
field_schema="keyword",
|
||||
)
|
||||
except Exception:
|
||||
pass # index may already exist
|
||||
|
||||
|
||||
def scroll_all(client: QdrantClient, collection_name: str, batch_size: int = 1000) -> list:
|
||||
"""Scroll through all points in a collection."""
|
||||
offset = None
|
||||
all_points = []
|
||||
while True:
|
||||
points, next_offset = client.scroll(
|
||||
collection_name=collection_name,
|
||||
limit=batch_size,
|
||||
offset=offset,
|
||||
with_payload=True,
|
||||
with_vectors=True,
|
||||
)
|
||||
all_points.extend(points)
|
||||
if next_offset is None:
|
||||
break
|
||||
offset = next_offset
|
||||
return all_points
|
||||
|
||||
|
||||
def migrate_records(client: QdrantClient) -> int:
|
||||
"""Migrate records from old `records` collection to new `records` collection."""
|
||||
print("\nMigrating records...")
|
||||
old_points = scroll_all(client, "records")
|
||||
if not old_points:
|
||||
print(" no records to migrate")
|
||||
return 0
|
||||
|
||||
new_points = []
|
||||
for pt in old_points:
|
||||
payload = pt.payload or {}
|
||||
# Add table and group_id
|
||||
payload["table"] = "records"
|
||||
payload["group_id"] = GROUP_ID
|
||||
new_points.append(PointStruct(
|
||||
id=pt.id,
|
||||
vector=list(pt.vector) if pt.vector else [],
|
||||
payload=payload,
|
||||
))
|
||||
|
||||
client.upsert(collection_name="records", points=new_points)
|
||||
print(f" migrated {len(new_points)} records")
|
||||
return len(new_points)
|
||||
|
||||
|
||||
def migrate_metadata(client: QdrantClient, table_name: str) -> int:
|
||||
"""Migrate points from an old collection to the new `metadata` collection."""
|
||||
print(f"\nMigrating {table_name}...")
|
||||
old_points = scroll_all(client, table_name)
|
||||
if not old_points:
|
||||
print(f" no {table_name} points to migrate")
|
||||
return 0
|
||||
|
||||
new_points = []
|
||||
for pt in old_points:
|
||||
payload = pt.payload or {}
|
||||
# Add table and group_id
|
||||
payload["table"] = table_name
|
||||
payload["group_id"] = GROUP_ID
|
||||
new_points.append(PointStruct(
|
||||
id=pt.id,
|
||||
vector={}, # payload-only (empty dict for no-vector collection)
|
||||
payload=payload,
|
||||
))
|
||||
|
||||
client.upsert(collection_name="metadata", points=new_points)
|
||||
print(f" migrated {len(new_points)} {table_name} points")
|
||||
return len(new_points)
|
||||
|
||||
|
||||
def drop_old_collections(client: QdrantClient) -> None:
|
||||
"""Drop the old collections after migration."""
|
||||
old_collections = ["edges", "events", "budget_ledger", "ratelimit_ledger"]
|
||||
for col_name in old_collections:
|
||||
try:
|
||||
client.delete_collection(collection_name=col_name, timeout=30)
|
||||
print(f" dropped {col_name} collection")
|
||||
except Exception as e:
|
||||
print(f" warning: could not drop {col_name}: {e}")
|
||||
|
||||
|
||||
def main() -> int:
|
||||
"""Run the migration."""
|
||||
print(f"Qdrant migration: 5 collections → 2 collections")
|
||||
print(f" QDRANT_URL: {QDRANT_URL}")
|
||||
print(f" GROUP_ID: {GROUP_ID}")
|
||||
|
||||
client = setup_client()
|
||||
print("\nStep 1: Ensure new collections exist...")
|
||||
ensure_new_collections(client)
|
||||
|
||||
print("\nStep 2: Migrate data...")
|
||||
t0 = time.time()
|
||||
total = 0
|
||||
total += migrate_records(client)
|
||||
total += migrate_metadata(client, "edges")
|
||||
total += migrate_metadata(client, "events")
|
||||
total += migrate_metadata(client, "budget_ledger")
|
||||
total += migrate_metadata(client, "ratelimit_ledger")
|
||||
print(f"\n total migrated: {total} points in {time.time() - t0:.1f}s")
|
||||
|
||||
print("\nStep 3: Drop old collections...")
|
||||
drop_old_collections(client)
|
||||
|
||||
print("\nStep 4: Verify...")
|
||||
try:
|
||||
rec_count = client.get_collection("records").points_count
|
||||
meta_points = client.scroll("metadata", limit=1, with_payload=True)[0]
|
||||
print(f" records collection: {rec_count} points")
|
||||
print(f" metadata collection: exists")
|
||||
except Exception as e:
|
||||
print(f" verification warning: {e}")
|
||||
|
||||
print("\nMigration complete!")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
Loading…
Add table
Add a link
Reference in a new issue