refactor: opencode

This commit is contained in:
Apunkt 2026-05-12 16:45:15 +02:00
parent 09c5b30f15
commit 91d67b2e12
No known key found for this signature in database
12 changed files with 1843 additions and 77 deletions

View file

@ -222,7 +222,7 @@ def dispatch(store: MemoryStore, method: str, params: dict) -> dict:
# Plan 02 dispatch: non-empty store -> 5-stage pipeline;
# empty store -> baseline cosine recall (Plan 01 fallback).
records_count = store.db.open_table("records").count_rows()
records_count = store.count_rows("records")
if records_count == 0:
cue_embedding = params.get("cue_embedding") or [0.0] * EMBED_DIM
resp = retrieve.recall(
@ -675,7 +675,7 @@ def dispatch(store: MemoryStore, method: str, params: dict) -> dict:
if method == "topology":
from iai_mcp import sigma as sigma_mod
records_count = store.db.open_table("records").count_rows()
records_count = store.count_rows("records")
if records_count == 0:
return {
"N": 0, "C": 0.0, "L": 0.0, "sigma": None,
@ -741,7 +741,7 @@ def dispatch(store: MemoryStore, method: str, params: dict) -> dict:
# wake_depth knob reaches the assembler.
from iai_mcp.session import assemble_session_start, SessionStartPayload
sid = params.get("session_id", "-")
records_count = store.db.open_table("records").count_rows()
records_count = store.count_rows("records")
if records_count == 0:
empty = SessionStartPayload(
l0="",
@ -810,7 +810,7 @@ def _schema_list_dispatch(store: MemoryStore, params: dict) -> dict:
records = store.all_records()
schema_records = [r for r in records if "schema" in (r.tags or [])]
edges_df = store.db.open_table("edges").to_pandas()
edges_df = store.edges_as_dataframe()
if not edges_df.empty:
schema_edges = edges_df[edges_df["edge_type"] == "schema_instance_of"]
else:

View file

@ -51,7 +51,7 @@ from iai_mcp.quiet_window import (
should_relearn,
)
from iai_mcp.socket_server import SocketServer
from iai_mcp.store import MemoryStore
from iai_mcp.store import MemoryStore, get_store, _use_qdrant
from iai_mcp.tz import load_user_tz
# ---------------------------------------------------------------------------
@ -1076,7 +1076,7 @@ async def main() -> int:
# consistency — each read re-checks the latest committed version at
# negligible cost (one manifest stat per query) and restores the
# tick body's ability to see work.
store = MemoryStore(read_consistency_interval=timedelta(seconds=0))
store = get_store(read_consistency_interval=timedelta(seconds=0))
try:
from iai_mcp.crypto_key_watch import check_crypto_key_file_rotation_event
@ -1098,7 +1098,10 @@ async def main() -> int:
# - partial_swap_inconsistent -> STOP daemon; surface remediation prompt
# (manual recovery; no rollback anchor).
from iai_mcp.migrate import detect_partial_migration
_migration_state = detect_partial_migration(store.db)
if _use_qdrant():
_migration_state = {"state": "clean"}
else:
_migration_state = detect_partial_migration(store.db)
if _migration_state["state"] == "partial_swap_inconsistent":
try:
sys.stderr.write(

View file

@ -1,4 +1,4 @@
"""Embedding layer -- configurable embedder with a 3-model registry.
"""Embedding layer -- configurable embedder with a 4-model registry + remote.
Plan 05-08 (2026-04-20): the DEFAULT is now ``bge-small-en-v1.5`` (384d
English-only), reverting the Phase-2 deviation. PROJECT.md line
@ -8,11 +8,12 @@ swapped in bge-m3 (1024d multilingual) as D-08a. User directive
job. bge-m3 stays selectable via env var / kwarg for anyone who needs
multilingual semantic match at the 5x RAM cost.
Configurable 4-model registry:
Configurable 4-model registry (local) + remote OpenAI-compatible endpoint:
- "bge-m3" -> BAAI/bge-m3 -> 1024d (opt-in, multilingual)
- "multilingual-e5-small" -> intfloat/multilingual-e5-small -> 384d (compromise)
- "bge-small-en-v1.5" -> BAAI/bge-small-en-v1.5 -> 384d (DEFAULT, English)
- "all-MiniLM-L6-v2" -> sentence-transformers/all-MiniLM-L6-v2 -> 384d (English alternative embedder option; included for compatibility testing)
- "remote-bge-m3" -> OpenAI-compatible API -> 1024d (remote, no local model load)
Selection priority at Embedder() instantiation:
1. Explicit `model_key` constructor arg
@ -31,14 +32,23 @@ from __future__ import annotations
import os
import threading
import httpx
from sentence_transformers import SentenceTransformer
# 4-model registry. Name convention: short logical key -> HF repo id + dim.
# 4-model registry + remote entry. Name convention: short logical key -> HF
# repo id / endpoint + dim.
# (2026-04-29): all-MiniLM-L6-v2 added as additive ablation entry;
# DEFAULT_MODEL_KEY unchanged (English-Only Brain lock from / Plan 05-08).
# (2026-05-11): bge-m3 configured as remote (non-AVX CPU) — delegates embedding
# to an OpenAI-compatible server (bge-m3 @ 1024d).
MODEL_REGISTRY: dict[str, dict] = {
"bge-m3": {"hf": "BAAI/bge-m3", "dim": 1024},
"bge-m3": {
"endpoint": "http://192.168.0.50:12434/v1/embeddings",
"model": "bge-m3",
"dim": 1024,
"remote": True,
},
"multilingual-e5-small": {"hf": "intfloat/multilingual-e5-small", "dim": 384},
"bge-small-en-v1.5": {"hf": "BAAI/bge-small-en-v1.5", "dim": 384},
"all-MiniLM-L6-v2": {"hf": "sentence-transformers/all-MiniLM-L6-v2", "dim": 384},
@ -64,6 +74,11 @@ def _resolve_model_key(model_key: str | None = None) -> str:
return DEFAULT_MODEL_KEY
def _is_remote_model(model_key: str) -> bool:
"""Check if a model key refers to a remote embedder."""
return MODEL_REGISTRY.get(model_key, {}).get("remote", False)
_MODEL_LOCK = threading.Lock()
_MODEL_CACHE: dict[str, SentenceTransformer] = {}
@ -158,7 +173,90 @@ class Embedder:
return [v.tolist() for v in vecs]
def embedder_for_store(store) -> "Embedder":
class RemoteEmbedder:
"""Embedder that delegates to an OpenAI-compatible remote endpoint.
Used when the local CPU cannot run sentence-transformers (e.g. no AVX).
Sends text to a remote bge-m3 instance and returns L2-normalised 1024d
vectors.
The remote endpoint must speak the OpenAI `/v1/embeddings` protocol:
POST /v1/embeddings
{"model": "bge-m3", "input": ["text"]}
-> {"data": [{"embedding": [0.0, ...], ...}]}
"""
def __init__(
self,
model_key: str | None = None,
*,
endpoint: str | None = None,
model_name: str | None = None,
) -> None:
if model_key is not None and model_key in MODEL_REGISTRY:
spec = MODEL_REGISTRY[model_key]
self.model_key: str = model_key
self._endpoint: str = spec["endpoint"]
self._model_name: str = spec["model"]
self.DIM: int = int(spec["dim"])
elif endpoint is not None and model_name is not None:
self.model_key = "custom-remote"
self._endpoint = endpoint
self._model_name = model_name
# Discover dim from a probe call
self.DIM = self._probe_dim()
else:
raise ValueError(
"RemoteEmbedder requires model_key from MODEL_REGISTRY "
"or explicit endpoint + model_name"
)
self._client = httpx.Client(timeout=30.0)
def _probe_dim(self) -> int:
"""Make a single embedding call to discover the output dimension."""
resp = self._client.post(
self._endpoint,
json={"model": self._model_name, "input": ["probe"]},
)
resp.raise_for_status()
data = resp.json()
return len(data["data"][0]["embedding"])
def embed(self, text: str) -> list[float]:
"""Encode a single string. Returns L2-normalised vector."""
resp = self._client.post(
self._endpoint,
json={"model": self._model_name, "input": [text]},
)
resp.raise_for_status()
data = resp.json()
vec = data["data"][0]["embedding"]
# Normalise if not already (bge-m3 on Ollama returns normalised)
norm = (sum(x * x for x in vec)) ** 0.5
if norm > 0:
vec = [x / norm for x in vec]
return vec
def embed_batch(self, texts: list[str]) -> list[list[float]]:
"""Batch-encode preserving input order."""
resp = self._client.post(
self._endpoint,
json={"model": self._model_name, "input": texts},
)
resp.raise_for_status()
data = resp.json()
results = []
for item in data["data"]:
vec = item["embedding"]
norm = (sum(x * x for x in vec)) ** 0.5
if norm > 0:
vec = [x / norm for x in vec]
results.append(vec)
return results
def embedder_for_store(store) -> "Embedder | RemoteEmbedder":
"""Store-aware Embedder factory. Picks the model whose output dim matches
the existing LanceDB records schema, so a legacy 1024d store from the
pre-Plan-05-08 bge-m3 era stays queryable until it is re-embedded down to
@ -168,14 +266,24 @@ def embedder_for_store(store) -> "Embedder":
1. If store.embed_dim has an exact match in MODEL_REGISTRY, prefer the
model whose logical key name indicates the canonical model at that dim
(bge-small-en-v1.5 for 384d default; bge-m3 for legacy/opt-in 1024d).
2. Otherwise fall through to the env/registry default via Embedder().
2. If IAI_MCP_EMBED_MODEL points to a remote model, use RemoteEmbedder.
3. Otherwise fall through to the env/registry default via Embedder().
This decouples runtime model selection from a global env var so a single
process can operate multiple stores at different dims while the migration
from a legacy 1024d store down to 384d completes.
"""
target_dim = getattr(store, "embed_dim", None)
env_key = os.environ.get("IAI_MCP_EMBED_MODEL")
# Check if user explicitly requested remote embedder
if env_key and _is_remote_model(env_key):
return RemoteEmbedder(model_key=env_key)
if target_dim is None:
# No existing store — check if remote is requested
if env_key and _is_remote_model(env_key):
return RemoteEmbedder(model_key=env_key)
return Embedder()
preferred = {384: "bge-small-en-v1.5", 1024: "bge-m3"}
key = preferred.get(int(target_dim))
@ -184,10 +292,16 @@ def embedder_for_store(store) -> "Embedder":
# stays compatible; real production code still respects store.embed_dim.
try:
if key is not None and key in MODEL_REGISTRY:
if _is_remote_model(key):
return RemoteEmbedder(model_key=key)
return Embedder(model_key=key)
for reg_key, spec in MODEL_REGISTRY.items():
if int(spec["dim"]) == int(target_dim):
if _is_remote_model(reg_key):
return RemoteEmbedder(model_key=reg_key)
return Embedder(model_key=reg_key)
except TypeError:
pass
if env_key and _is_remote_model(env_key):
return RemoteEmbedder(model_key=env_key)
return Embedder()

View file

@ -92,17 +92,17 @@ def write_event(
data_plain = json.dumps(data)
ad = str(event_id).encode("ascii")
data_ct = encrypt_field(data_plain, store._key(), associated_data=ad)
row = {
"id": str(event_id),
"kind": kind,
"severity": severity or "",
"domain": domain or "",
"ts": datetime.now(timezone.utc),
"data_json": data_ct,
"session_id": session_id,
"source_ids_json": json.dumps([str(x) for x in (source_ids or [])]),
}
store.db.open_table(EVENTS_TABLE).add([row])
ts = datetime.now(timezone.utc)
store.events_add(
event_id=event_id,
kind=kind,
severity=severity or "",
domain=domain or "",
ts=ts,
data_json=data_ct,
session_id=session_id,
source_ids_json=json.dumps([str(x) for x in (source_ids or [])]),
)
return event_id
@ -132,27 +132,12 @@ def query_events(
Returns a list of dicts with keys: id, kind, severity, domain, ts, data,
session_id, source_ids. data and source_ids are decoded from JSON.
"""
tbl = store.db.open_table(EVENTS_TABLE)
df = tbl.to_pandas()
if df.empty:
return []
if kind is not None:
df = df[df["kind"] == kind]
if severity is not None:
df = df[df["severity"] == severity]
if since is not None:
# Ensure tz-aware comparison
since_cmp = since if since.tzinfo is not None else since.replace(tzinfo=timezone.utc)
# Pandas Timestamp compares naturally with tz-aware datetimes
df = df[df["ts"] >= since_cmp]
if df.empty:
return []
df = df.sort_values("ts", ascending=False).head(limit)
rows = store.events_query(kind=kind, since=since, severity=severity, limit=limit)
out: list[dict] = []
for _, row in df.iterrows():
for row in rows:
# decrypt data_json when it carries the iai:enc:v1: prefix.
# Pre-02-08 rows stay plaintext; migration rewrites them lazily.
raw_data = row["data_json"] or "{}"
raw_data = row["data"] if isinstance(row.get("data"), str) else json.dumps(row.get("data", {}))
if is_encrypted(raw_data):
ad = str(row["id"]).encode("ascii")
try:
@ -165,20 +150,14 @@ def query_events(
data = json.loads(raw_data)
except (TypeError, json.JSONDecodeError):
data = {}
try:
source_ids = json.loads(row["source_ids_json"] or "[]")
except (TypeError, json.JSONDecodeError):
source_ids = []
out.append(
{
"id": row["id"],
"kind": row["kind"],
"severity": row["severity"] or None,
"domain": row["domain"] or None,
"ts": row["ts"],
"data": data,
"session_id": row["session_id"],
"source_ids": source_ids,
}
)
out.append({
"id": row["id"],
"kind": row["kind"],
"severity": row["severity"] or None,
"domain": row["domain"] or None,
"ts": row["ts"],
"data": data,
"session_id": row["session_id"],
"source_ids": row["source_ids"],
})
return out

View file

@ -61,10 +61,7 @@ from pathlib import Path
from typing import Callable, Optional
from uuid import UUID
import pyarrow as pa
from iai_mcp.crypto import encrypt_field, is_encrypted
from iai_mcp.embed import Embedder
from iai_mcp.events import write_event
from iai_mcp.store import (
EVENTS_TABLE,
@ -123,7 +120,7 @@ def _detect_language(text: str) -> str:
def migrate_v1_to_v2(
store: MemoryStore,
embedder: Optional[Embedder] = None,
embedder: Optional["Embedder"] = None,
dry_run: bool = False,
progress: Optional[Callable[[int, int], None]] = None,
) -> dict:
@ -237,7 +234,7 @@ def migrate_v1_to_v2(
}
def _records_schema_at_dim(dim: int) -> pa.Schema:
def _records_schema_at_dim(dim: int) -> "pa.Schema":
"""Build the records-table Arrow schema at an explicit embedding dim.
Mirrors `MemoryStore._ensure_tables` lines 249-281 byte-for-byte except
@ -247,6 +244,7 @@ def _records_schema_at_dim(dim: int) -> pa.Schema:
is not parameterised on dim. Plan 07.11-03 / file-disjoint
constraint forbids store.py changes; inlining is the conservative path.
"""
import pyarrow as pa
return pa.schema(
[
("id", pa.string()),

View file

@ -0,0 +1,202 @@
"""Migration script: move data from 5 Qdrant collections → 2 collections.
Old structure (5 collections):
- `records` : MemoryRecord rows (1024-dim vectors)
- `edges` : Graph edges (1-dim dummy vectors)
- `events` : Runtime events (1-dim dummy vectors)
- `budget_ledger` : D-GUARD spend tracking (1-dim dummy vectors)
- `ratelimit_ledger`: D-GUARD rate limit history (1-dim dummy vectors)
New structure (2 collections, per Qdrant best practices):
- `records` : MemoryRecord rows (1024-dim cosine vectors)
All points carry `table: "records"` + `group_id` payload.
- `metadata` : Payload-only (no vectors) containing edges, events,
budget_ledger, ratelimit_ledger.
Each point carries `table` + `group_id` payload.
Both collections use keyword indexes on `table` for co-located storage.
Usage:
python -m iai_mcp.migrate_qdrant
Environment:
QDRANT_URL : Qdrant server URL (default: http://192.168.0.22:6333)
QDRANT_API_KEY: Qdrant API key
"""
from __future__ import annotations
import base64
import json
import os
import sys
import time
from datetime import datetime, timezone
from uuid import UUID
from qdrant_client import QdrantClient
from qdrant_client.models import Distance, PointStruct, VectorParams
from qdrant_client.http.exceptions import UnexpectedResponse
# --------------------------------------------------------------------------- env
QDRANT_URL = os.environ.get("QDRANT_URL", "http://192.168.0.22:6333")
QDRANT_API_KEY = os.environ.get("QDRANT_API_KEY")
GROUP_ID = os.environ.get("IAI_MCP_USER_ID", "default")
def setup_client() -> QdrantClient:
"""Create Qdrant client with API key."""
return QdrantClient(url=QDRANT_URL, api_key=QDRANT_API_KEY, timeout=30)
def ensure_new_collections(client: QdrantClient) -> None:
"""Create the 2 new collections if they don't exist."""
# Collection 1: records (vectors)
try:
client.get_collection("records")
print(" records collection already exists")
except Exception:
print(" creating records collection...")
client.create_collection(
collection_name="records",
vectors_config=VectorParams(size=1024, distance=Distance.COSINE),
)
# Collection 2: metadata (payload-only)
try:
client.get_collection("metadata")
print(" metadata collection already exists")
except Exception:
print(" creating metadata collection...")
client.create_collection(collection_name="metadata")
# Create payload indexes
for collection_name in ("records", "metadata"):
for field_name in ("table", "group_id"):
try:
client.create_payload_index(
collection_name=collection_name,
field_name=field_name,
field_schema="keyword",
)
except Exception:
pass # index may already exist
def scroll_all(client: QdrantClient, collection_name: str, batch_size: int = 1000) -> list:
"""Scroll through all points in a collection."""
offset = None
all_points = []
while True:
points, next_offset = client.scroll(
collection_name=collection_name,
limit=batch_size,
offset=offset,
with_payload=True,
with_vectors=True,
)
all_points.extend(points)
if next_offset is None:
break
offset = next_offset
return all_points
def migrate_records(client: QdrantClient) -> int:
"""Migrate records from old `records` collection to new `records` collection."""
print("\nMigrating records...")
old_points = scroll_all(client, "records")
if not old_points:
print(" no records to migrate")
return 0
new_points = []
for pt in old_points:
payload = pt.payload or {}
# Add table and group_id
payload["table"] = "records"
payload["group_id"] = GROUP_ID
new_points.append(PointStruct(
id=pt.id,
vector=list(pt.vector) if pt.vector else [],
payload=payload,
))
client.upsert(collection_name="records", points=new_points)
print(f" migrated {len(new_points)} records")
return len(new_points)
def migrate_metadata(client: QdrantClient, table_name: str) -> int:
"""Migrate points from an old collection to the new `metadata` collection."""
print(f"\nMigrating {table_name}...")
old_points = scroll_all(client, table_name)
if not old_points:
print(f" no {table_name} points to migrate")
return 0
new_points = []
for pt in old_points:
payload = pt.payload or {}
# Add table and group_id
payload["table"] = table_name
payload["group_id"] = GROUP_ID
new_points.append(PointStruct(
id=pt.id,
vector={}, # payload-only (empty dict for no-vector collection)
payload=payload,
))
client.upsert(collection_name="metadata", points=new_points)
print(f" migrated {len(new_points)} {table_name} points")
return len(new_points)
def drop_old_collections(client: QdrantClient) -> None:
"""Drop the old collections after migration."""
old_collections = ["edges", "events", "budget_ledger", "ratelimit_ledger"]
for col_name in old_collections:
try:
client.delete_collection(collection_name=col_name, timeout=30)
print(f" dropped {col_name} collection")
except Exception as e:
print(f" warning: could not drop {col_name}: {e}")
def main() -> int:
"""Run the migration."""
print(f"Qdrant migration: 5 collections → 2 collections")
print(f" QDRANT_URL: {QDRANT_URL}")
print(f" GROUP_ID: {GROUP_ID}")
client = setup_client()
print("\nStep 1: Ensure new collections exist...")
ensure_new_collections(client)
print("\nStep 2: Migrate data...")
t0 = time.time()
total = 0
total += migrate_records(client)
total += migrate_metadata(client, "edges")
total += migrate_metadata(client, "events")
total += migrate_metadata(client, "budget_ledger")
total += migrate_metadata(client, "ratelimit_ledger")
print(f"\n total migrated: {total} points in {time.time() - t0:.1f}s")
print("\nStep 3: Drop old collections...")
drop_old_collections(client)
print("\nStep 4: Verify...")
try:
rec_count = client.get_collection("records").points_count
meta_points = client.scroll("metadata", limit=1, with_payload=True)[0]
print(f" records collection: {rec_count} points")
print(f" metadata collection: exists")
except Exception as e:
print(f" verification warning: {e}")
print("\nMigration complete!")
return 0
if __name__ == "__main__":
sys.exit(main())

1321
src/iai_mcp/qdrant_store.py Normal file

File diff suppressed because it is too large Load diff

View file

@ -47,8 +47,7 @@ from collections.abc import Sequence
from typing import Callable
from uuid import UUID
import lancedb
import pyarrow as pa
# W5: cached AESGCM cipher per store; reuse safe per
# https://cryptography.io/en/latest/hazmat/primitives/aead/ — single AESGCM
@ -209,6 +208,7 @@ class MemoryStore:
connect_kwargs: dict[str, object] = {}
if read_consistency_interval is not None:
connect_kwargs["read_consistency_interval"] = read_consistency_interval
import lancedb
self.db = lancedb.connect(str(self.root / "lancedb"), **connect_kwargs)
# Resolve the embedding dimension once so records table + insert guard agree.
self._embed_dim: int = _resolve_embed_dim()
@ -1596,3 +1596,91 @@ class MemoryStore:
if language == "__LEGACY_EMPTY__":
rec.language = "" # post-construction: signal to migration path
return rec
# ---------------------------------------------------------- events
def events_add(self, event_id: UUID, kind: str, severity: str, domain: str,
ts: datetime, data_json: str, session_id: str, source_ids_json: str) -> None:
"""Add a single event row to the events table."""
row = {
"id": str(event_id),
"kind": kind,
"severity": severity,
"domain": domain,
"ts": ts,
"data_json": data_json,
"session_id": session_id,
"source_ids_json": source_ids_json,
}
self.db.open_table(EVENTS_TABLE).add([row])
def events_query(self, kind: str | None = None, since: datetime | None = None,
severity: str | None = None, limit: int = 100) -> list[dict]:
"""Query events matching filters, newest first."""
tbl = self.db.open_table(EVENTS_TABLE)
df = tbl.to_pandas()
if df.empty:
return []
if kind is not None:
df = df[df["kind"] == kind]
if severity is not None:
df = df[df["severity"] == severity]
if since is not None:
since_cmp = since if since.tzinfo is not None else since.replace(tzinfo=timezone.utc)
df = df[df["ts"] >= since_cmp]
if df.empty:
return []
df = df.sort_values("ts", ascending=False).head(limit)
out: list[dict] = []
for _, row in df.iterrows():
raw_data = row["data_json"] or "{}"
try:
data = json.loads(raw_data)
except (TypeError, json.JSONDecodeError):
data = {}
try:
source_ids = json.loads(row["source_ids_json"] or "[]")
except (TypeError, json.JSONDecodeError):
source_ids = []
out.append({
"id": row["id"],
"kind": row["kind"],
"severity": row["severity"] or None,
"domain": row["domain"] or None,
"ts": row["ts"],
"data": data,
"session_id": row["session_id"],
"source_ids": source_ids,
})
return out
# --------------------------------------------------------------------------- Qdrant backend
def _use_qdrant() -> bool:
"""Check if Qdrant backend is configured via environment."""
return bool(os.environ.get("QDRANT_URL"))
def get_store(
path: Path | str | None = None,
user_id: str = "default",
read_consistency_interval: timedelta | None = None,
) -> "MemoryStore | QdrantStore":
"""Factory: return MemoryStore (LanceDB) or QdrantStore based on env.
When QDRANT_URL is set, returns QdrantStore.
Otherwise returns MemoryStore (LanceDB) the legacy/local path.
"""
if _use_qdrant():
from iai_mcp.qdrant_store import QdrantStore
return QdrantStore(
path=path,
user_id=user_id,
read_consistency_interval=read_consistency_interval,
)
return MemoryStore(
path=path,
user_id=user_id,
read_consistency_interval=read_consistency_interval,
)