Initial release: iai-mcp v0.1.0
Co-Authored-By: Claude <noreply@anthropic.com> Co-Authored-By: XNLLLLH <XNLLLLH@users.noreply.github.com>
This commit is contained in:
commit
f6b876fbe7
332 changed files with 97258 additions and 0 deletions
221
tests/test_centrality_cache.py
Normal file
221
tests/test_centrality_cache.py
Normal file
|
|
@ -0,0 +1,221 @@
|
|||
"""Plan 05-13 RED scaffold — cached centrality on graph nodes.
|
||||
|
||||
``build_runtime_graph`` must compute betweenness centrality ONCE and
|
||||
attach it as the ``centrality`` NetworkX node attribute so the rank
|
||||
stage can read it O(1) instead of recomputing ``graph.centrality()``
|
||||
on every recall. The cache file must round-trip the per-node
|
||||
centrality alongside the rest of the node payload so a cold-start
|
||||
rebuild hits the cache and the pipeline-hot-path stays allocation-free.
|
||||
|
||||
Contracts:
|
||||
C1 — every graph node has a ``centrality`` float attribute after
|
||||
``build_runtime_graph`` returns.
|
||||
C2 — runtime_graph_cache round-trips the ``centrality`` value per node
|
||||
(save + try_load preserves the exact float).
|
||||
C3 — when a node is missing ``centrality`` (pre-05-13 graph / race),
|
||||
recall_for_response falls back to inline computation without crashing.
|
||||
C4 — CACHE_VERSION bumped from "05-12-v1" to "05-13-v1"; legacy cache
|
||||
files are invalidated cleanly.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from uuid import UUID, uuid4
|
||||
|
||||
import pytest
|
||||
|
||||
from iai_mcp import retrieve, runtime_graph_cache
|
||||
from iai_mcp.store import MemoryStore
|
||||
from iai_mcp.types import MemoryRecord
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _isolated_keyring(monkeypatch: pytest.MonkeyPatch):
|
||||
import keyring as _keyring
|
||||
|
||||
fake: dict[tuple[str, str], str] = {}
|
||||
monkeypatch.setattr(_keyring, "get_password", lambda s, u: fake.get((s, u)))
|
||||
monkeypatch.setattr(
|
||||
_keyring, "set_password", lambda s, u, p: fake.__setitem__((s, u), p)
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
_keyring, "delete_password", lambda s, u: fake.pop((s, u), None)
|
||||
)
|
||||
yield fake
|
||||
|
||||
|
||||
def _make_record(store: MemoryStore, text: str, seed: int) -> MemoryRecord:
|
||||
import numpy as np
|
||||
rng = np.random.default_rng(seed)
|
||||
v = rng.standard_normal(store.embed_dim).astype(np.float32)
|
||||
v /= float(np.linalg.norm(v)) or 1.0
|
||||
now = datetime.now(timezone.utc)
|
||||
return MemoryRecord(
|
||||
id=uuid4(),
|
||||
tier="episodic",
|
||||
literal_surface=text,
|
||||
aaak_index="",
|
||||
embedding=v.tolist(),
|
||||
community_id=None,
|
||||
centrality=0.0,
|
||||
detail_level=2,
|
||||
pinned=False,
|
||||
stability=0.0,
|
||||
difficulty=0.0,
|
||||
last_reviewed=None,
|
||||
never_decay=False,
|
||||
never_merge=False,
|
||||
provenance=[],
|
||||
created_at=now,
|
||||
updated_at=now,
|
||||
tags=["t"],
|
||||
language="en",
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def seeded_store(tmp_path: Path) -> MemoryStore:
|
||||
store = MemoryStore(path=tmp_path / "lancedb")
|
||||
store.root = tmp_path
|
||||
# Seed enough records to produce a non-trivial graph so betweenness > 0
|
||||
# on at least some nodes.
|
||||
for i in range(15):
|
||||
store.insert(_make_record(store, f"fact-{i}", i + 1))
|
||||
# Create some edges so betweenness has something to measure.
|
||||
records = list(store.all_records())
|
||||
ids = [r.id for r in records]
|
||||
pairs = [(ids[i], ids[i + 1]) for i in range(len(ids) - 1)]
|
||||
pairs += [(ids[0], ids[5]), (ids[2], ids[10])]
|
||||
store.boost_edges(pairs, delta=0.5)
|
||||
return store
|
||||
|
||||
|
||||
# --------------------------------------------------------------- C1
|
||||
|
||||
|
||||
def test_C1_every_node_has_centrality_attr(seeded_store):
|
||||
"""After build_runtime_graph, every node carries a 'centrality' float attr."""
|
||||
graph, _a, _rc = retrieve.build_runtime_graph(seeded_store)
|
||||
assert len(graph._nx.nodes) > 0
|
||||
for nid in graph._nx.nodes:
|
||||
node = graph._nx.nodes[nid]
|
||||
assert "centrality" in node, f"node {nid} missing centrality attr"
|
||||
assert isinstance(node["centrality"], float), (
|
||||
f"centrality on {nid} must be float, got {type(node['centrality'])}"
|
||||
)
|
||||
|
||||
|
||||
# --------------------------------------------------------------- C2
|
||||
|
||||
|
||||
def test_C2_cache_round_trips_centrality(seeded_store):
|
||||
"""save + try_load preserves per-node centrality exactly."""
|
||||
graph, assignment, rich_club = retrieve.build_runtime_graph(seeded_store)
|
||||
|
||||
# Snapshot centrality from the live graph.
|
||||
live_cent = {
|
||||
nid: float(graph._nx.nodes[nid]["centrality"])
|
||||
for nid in graph._nx.nodes
|
||||
}
|
||||
|
||||
# Force a fresh save by invalidating then re-running build.
|
||||
runtime_graph_cache.invalidate(seeded_store)
|
||||
graph2, _a2, _rc2 = retrieve.build_runtime_graph(seeded_store)
|
||||
|
||||
# Now cache should be populated. try_load should give us node_payload
|
||||
# with centrality baked in.
|
||||
cached = runtime_graph_cache.try_load(seeded_store)
|
||||
assert cached is not None, "cache should be populated after build"
|
||||
# try_load returns 4-tuple (max_degree appended).
|
||||
_assignment, _rich_club, node_payload, _max_degree = cached
|
||||
assert node_payload is not None and len(node_payload) > 0
|
||||
|
||||
for nid, live in live_cent.items():
|
||||
payload = node_payload.get(nid)
|
||||
assert payload is not None, f"missing payload for {nid}"
|
||||
assert "centrality" in payload, f"payload {nid} missing centrality"
|
||||
# Exact-float equality — JSON round-trip preserves float64.
|
||||
assert abs(payload["centrality"] - live) < 1e-9, (
|
||||
f"centrality drift on {nid}: cache={payload['centrality']} "
|
||||
f"live={live}"
|
||||
)
|
||||
|
||||
|
||||
# --------------------------------------------------------------- C3
|
||||
|
||||
|
||||
def test_C3_missing_centrality_fallback_inline(seeded_store):
|
||||
"""Graph with missing 'centrality' on nodes must not crash rank stage."""
|
||||
from iai_mcp import pipeline
|
||||
|
||||
class _E:
|
||||
DIM = seeded_store.embed_dim
|
||||
DEFAULT_DIM = seeded_store.embed_dim
|
||||
DEFAULT_MODEL_KEY = "t"
|
||||
|
||||
def embed(self, t):
|
||||
import numpy as np
|
||||
import hashlib
|
||||
rng = np.random.default_rng(
|
||||
int(hashlib.sha256(t.encode()).hexdigest()[:16], 16)
|
||||
)
|
||||
v = rng.standard_normal(self.DIM).astype(np.float32)
|
||||
v /= float(np.linalg.norm(v)) or 1.0
|
||||
return v.tolist()
|
||||
|
||||
graph, assignment, rich_club = retrieve.build_runtime_graph(seeded_store)
|
||||
# Strip centrality from all nodes — simulates a pre-05-13 graph shape
|
||||
# or a race in _graph_sync_hook.
|
||||
for nid in list(graph._nx.nodes):
|
||||
graph._nx.nodes[nid].pop("centrality", None)
|
||||
|
||||
resp = pipeline.recall_for_response(
|
||||
store=seeded_store, graph=graph, assignment=assignment,
|
||||
rich_club=rich_club, embedder=_E(), cue="fact-3",
|
||||
session_id="t-C3", budget_tokens=4000,
|
||||
)
|
||||
# No crash; still returns hits.
|
||||
assert resp is not None
|
||||
assert isinstance(resp.hits, list)
|
||||
|
||||
|
||||
# --------------------------------------------------------------- C4
|
||||
|
||||
|
||||
def test_C4_cache_version_bumped_to_05_13_v1():
|
||||
"""CACHE_VERSION moved forward over the cache-shape evolution (05-12-v1
|
||||
-> 05-13-v1 -> 06-02-v1 -> 07-09-v3, with W3 / wrapping
|
||||
the file in AES-256-GCM). Legacy files invalidate cleanly on version
|
||||
mismatch (and the legacy plaintext-shape "06-02-v1" lazy-migrates to
|
||||
the encrypted shape on first warm-start under 07.9).
|
||||
"""
|
||||
assert runtime_graph_cache.CACHE_VERSION == "07-09-v3"
|
||||
|
||||
|
||||
def test_C4_legacy_cache_invalidated(seeded_store, tmp_path: Path):
|
||||
"""A cache file written with CACHE_VERSION=05-12-v1 must NOT load.
|
||||
|
||||
W3: the on-disk format is now AES-256-GCM-wrapped. Decrypt
|
||||
the file, mutate cache_version, re-encrypt, then assert try_load
|
||||
rejects the stale version cleanly.
|
||||
"""
|
||||
from iai_mcp.crypto import decrypt_field, encrypt_field
|
||||
|
||||
# First build the graph so we know the path.
|
||||
graph, assignment, rich_club = retrieve.build_runtime_graph(seeded_store)
|
||||
cache_path = tmp_path / "runtime_graph_cache.json"
|
||||
assert cache_path.exists(), "cache not created by build_runtime_graph"
|
||||
|
||||
# Decrypt → mutate version → re-encrypt round-trip.
|
||||
key = runtime_graph_cache._cache_encryption_key(seeded_store)
|
||||
raw_text = cache_path.read_text(encoding="utf-8")
|
||||
plaintext = decrypt_field(raw_text, key, runtime_graph_cache._CACHE_AAD)
|
||||
raw = json.loads(plaintext)
|
||||
raw["cache_version"] = "05-12-v1"
|
||||
new_ct = encrypt_field(json.dumps(raw), key, runtime_graph_cache._CACHE_AAD)
|
||||
cache_path.write_text(new_ct, encoding="ascii")
|
||||
|
||||
# try_load must reject it (legacy version stamp).
|
||||
assert runtime_graph_cache.try_load(seeded_store) is None
|
||||
Loading…
Add table
Add a link
Reference in a new issue