551 lines
20 KiB
Python
551 lines
20 KiB
Python
|
|
"""Plan 05-09 (P4.A) — runtime_graph_cache tests.
|
|||
|
|
|
|||
|
|
The cache persists the Leiden community assignment + rich-club node
|
|||
|
|
list next to the LanceDB store. On subsequent ``build_runtime_graph``
|
|||
|
|
calls, a valid cache skips the Leiden + rich-club computations;
|
|||
|
|
invalid cache falls through to a clean rebuild.
|
|||
|
|
|
|||
|
|
Covered contracts:
|
|||
|
|
|
|||
|
|
1. save() creates the JSON file
|
|||
|
|
2. try_load on unchanged store returns the cached pair
|
|||
|
|
3. adding a record invalidates the cache (key mismatch)
|
|||
|
|
4. CACHE_VERSION mismatch triggers rebuild (forward-compat fence)
|
|||
|
|
5. corrupt JSON falls through to a clean None
|
|||
|
|
6. absent file returns None cleanly
|
|||
|
|
7. build_runtime_graph on second call avoids detect_communities
|
|||
|
|
8. build_runtime_graph on store change triggers detect_communities
|
|||
|
|
9. atomic write: interrupted save leaves old cache intact
|
|||
|
|
10. invalidate() deletes the cache file
|
|||
|
|
"""
|
|||
|
|
from __future__ import annotations
|
|||
|
|
|
|||
|
|
import json
|
|||
|
|
from pathlib import Path
|
|||
|
|
from unittest import mock
|
|||
|
|
from uuid import UUID, uuid4
|
|||
|
|
|
|||
|
|
import pytest
|
|||
|
|
|
|||
|
|
from iai_mcp import retrieve, runtime_graph_cache
|
|||
|
|
from iai_mcp.community import CommunityAssignment
|
|||
|
|
from iai_mcp.store import MemoryStore
|
|||
|
|
|
|||
|
|
|
|||
|
|
# --------------------------------------------------------------------------- fixtures
|
|||
|
|
|
|||
|
|
|
|||
|
|
@pytest.fixture(autouse=True)
|
|||
|
|
def _isolated_keyring(monkeypatch: pytest.MonkeyPatch):
|
|||
|
|
import keyring as _keyring
|
|||
|
|
|
|||
|
|
fake: dict[tuple[str, str], str] = {}
|
|||
|
|
monkeypatch.setattr(_keyring, "get_password", lambda s, u: fake.get((s, u)))
|
|||
|
|
monkeypatch.setattr(
|
|||
|
|
_keyring, "set_password", lambda s, u, p: fake.__setitem__((s, u), p)
|
|||
|
|
)
|
|||
|
|
monkeypatch.setattr(
|
|||
|
|
_keyring, "delete_password", lambda s, u: fake.pop((s, u), None)
|
|||
|
|
)
|
|||
|
|
yield fake
|
|||
|
|
|
|||
|
|
|
|||
|
|
@pytest.fixture
|
|||
|
|
def store(tmp_path: Path) -> MemoryStore:
|
|||
|
|
"""Fresh MemoryStore in tmp_path/lancedb with the cache file path set
|
|||
|
|
to tmp_path/runtime_graph_cache.json."""
|
|||
|
|
s = MemoryStore(path=tmp_path / "lancedb")
|
|||
|
|
# Override root so the cache file lives in tmp_path, not the real store
|
|||
|
|
# root (which would be tmp_path/lancedb — still fine, but explicit).
|
|||
|
|
s.root = tmp_path
|
|||
|
|
return s
|
|||
|
|
|
|||
|
|
|
|||
|
|
def _read_decrypted_cache(store: MemoryStore, path: Path) -> dict:
|
|||
|
|
"""Phase 07.9 W3: decrypt the v3 ciphertext sidecar and return the
|
|||
|
|
underlying JSON dict. Tests that pre-07.9 read the cache via
|
|||
|
|
``json.load(f)`` go through this helper instead.
|
|||
|
|
"""
|
|||
|
|
raw_text = path.read_text(encoding="utf-8")
|
|||
|
|
if not raw_text.startswith("iai:enc:v1:"):
|
|||
|
|
# Legacy / hand-written plaintext (test 4 simulating v2).
|
|||
|
|
return json.loads(raw_text)
|
|||
|
|
from iai_mcp.crypto import decrypt_field
|
|||
|
|
plaintext = decrypt_field(
|
|||
|
|
raw_text,
|
|||
|
|
store._key(),
|
|||
|
|
runtime_graph_cache._CACHE_AAD,
|
|||
|
|
)
|
|||
|
|
return json.loads(plaintext)
|
|||
|
|
|
|||
|
|
|
|||
|
|
def _write_encrypted_cache(store: MemoryStore, path: Path, data: dict) -> None:
|
|||
|
|
"""Inverse of _read_decrypted_cache: encrypt and write a hand-modified
|
|||
|
|
JSON dict back to the sidecar in v3 ciphertext format.
|
|||
|
|
"""
|
|||
|
|
from iai_mcp.crypto import encrypt_field
|
|||
|
|
plaintext = json.dumps(data)
|
|||
|
|
ciphertext = encrypt_field(
|
|||
|
|
plaintext,
|
|||
|
|
store._key(),
|
|||
|
|
runtime_graph_cache._CACHE_AAD,
|
|||
|
|
)
|
|||
|
|
path.write_text(ciphertext, encoding="ascii")
|
|||
|
|
|
|||
|
|
|
|||
|
|
def _make_assignment(n_communities: int = 2) -> CommunityAssignment:
|
|||
|
|
comms = [uuid4() for _ in range(n_communities)]
|
|||
|
|
nodes = [uuid4() for _ in range(n_communities * 3)]
|
|||
|
|
return CommunityAssignment(
|
|||
|
|
node_to_community={
|
|||
|
|
nodes[i]: comms[i // 3] for i in range(len(nodes))
|
|||
|
|
},
|
|||
|
|
community_centroids={c: [0.1, 0.2, 0.3] for c in comms},
|
|||
|
|
modularity=0.42,
|
|||
|
|
backend="leiden-networkx",
|
|||
|
|
top_communities=comms,
|
|||
|
|
mid_regions={c: nodes[i * 3:(i + 1) * 3] for i, c in enumerate(comms)},
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
|
|||
|
|
# --------------------------------------------------------------------------- Test 1
|
|||
|
|
|
|||
|
|
|
|||
|
|
def test_save_creates_json_file(store):
|
|||
|
|
assignment = _make_assignment()
|
|||
|
|
rich_club = [uuid4() for _ in range(5)]
|
|||
|
|
ok = runtime_graph_cache.save(store, assignment, rich_club)
|
|||
|
|
assert ok is True
|
|||
|
|
path = runtime_graph_cache._cache_path(store)
|
|||
|
|
assert path.exists()
|
|||
|
|
# W3: file is now AES-256-GCM-wrapped — decrypt before
|
|||
|
|
# inspecting the JSON shape.
|
|||
|
|
raw = path.read_text(encoding="utf-8")
|
|||
|
|
assert raw.startswith("iai:enc:v1:"), (
|
|||
|
|
"Phase 07.9 W3: cache must be v3 ciphertext on disk"
|
|||
|
|
)
|
|||
|
|
data = _read_decrypted_cache(store, path)
|
|||
|
|
assert data["cache_version"] == runtime_graph_cache.CACHE_VERSION
|
|||
|
|
assert "assignment" in data
|
|||
|
|
assert "rich_club" in data
|
|||
|
|
assert "key" in data
|
|||
|
|
|
|||
|
|
|
|||
|
|
# --------------------------------------------------------------------------- Test 2
|
|||
|
|
|
|||
|
|
|
|||
|
|
def test_try_load_round_trip_on_unchanged_store(store):
|
|||
|
|
assignment = _make_assignment()
|
|||
|
|
rich_club = [uuid4() for _ in range(3)]
|
|||
|
|
runtime_graph_cache.save(store, assignment, rich_club)
|
|||
|
|
|
|||
|
|
loaded = runtime_graph_cache.try_load(store)
|
|||
|
|
assert loaded is not None
|
|||
|
|
# try_load returns (assignment, rich_club, node_payload, ...).
|
|||
|
|
# node_payload is the v2 blob — None (or empty) when legacy 2-arg
|
|||
|
|
# save() shape was used.
|
|||
|
|
# 4th element is max_degree (int >= 0).
|
|||
|
|
loaded_assignment, loaded_rich_club, _node_payload, _max_degree = loaded
|
|||
|
|
assert loaded_assignment.backend == assignment.backend
|
|||
|
|
assert loaded_assignment.modularity == pytest.approx(assignment.modularity)
|
|||
|
|
assert set(loaded_assignment.top_communities) == set(assignment.top_communities)
|
|||
|
|
assert set(loaded_rich_club) == set(rich_club)
|
|||
|
|
|
|||
|
|
|
|||
|
|
# --------------------------------------------------------------------------- Test 3
|
|||
|
|
|
|||
|
|
|
|||
|
|
def test_key_mismatch_invalidates_cache(store):
|
|||
|
|
# Save with original key.
|
|||
|
|
runtime_graph_cache.save(store, _make_assignment(), [uuid4()])
|
|||
|
|
path = runtime_graph_cache._cache_path(store)
|
|||
|
|
assert path.exists()
|
|||
|
|
|
|||
|
|
# Simulate a store change by forging a wrong key in the saved file.
|
|||
|
|
# W3: decrypt → mutate → re-encrypt round-trip.
|
|||
|
|
data = _read_decrypted_cache(store, path)
|
|||
|
|
data["key"][0] = 999 # bogus records_count
|
|||
|
|
_write_encrypted_cache(store, path, data)
|
|||
|
|
|
|||
|
|
assert runtime_graph_cache.try_load(store) is None
|
|||
|
|
|
|||
|
|
|
|||
|
|
# --------------------------------------------------------------------------- Test 4
|
|||
|
|
|
|||
|
|
|
|||
|
|
def test_cache_version_mismatch_triggers_rebuild(store):
|
|||
|
|
runtime_graph_cache.save(store, _make_assignment(), [uuid4()])
|
|||
|
|
path = runtime_graph_cache._cache_path(store)
|
|||
|
|
# W3: decrypt → mutate version → re-encrypt round-trip.
|
|||
|
|
data = _read_decrypted_cache(store, path)
|
|||
|
|
data["cache_version"] = "old-format-v0"
|
|||
|
|
_write_encrypted_cache(store, path, data)
|
|||
|
|
|
|||
|
|
assert runtime_graph_cache.try_load(store) is None
|
|||
|
|
|
|||
|
|
|
|||
|
|
# --------------------------------------------------------------------------- Test 5
|
|||
|
|
|
|||
|
|
|
|||
|
|
def test_corrupt_json_returns_none(store):
|
|||
|
|
path = runtime_graph_cache._cache_path(store)
|
|||
|
|
path.parent.mkdir(parents=True, exist_ok=True)
|
|||
|
|
path.write_text("{not valid json at all")
|
|||
|
|
assert runtime_graph_cache.try_load(store) is None
|
|||
|
|
|
|||
|
|
|
|||
|
|
# --------------------------------------------------------------------------- Test 6
|
|||
|
|
|
|||
|
|
|
|||
|
|
def test_absent_cache_returns_none(store):
|
|||
|
|
path = runtime_graph_cache._cache_path(store)
|
|||
|
|
assert not path.exists()
|
|||
|
|
assert runtime_graph_cache.try_load(store) is None
|
|||
|
|
|
|||
|
|
|
|||
|
|
# --------------------------------------------------------------------------- Test 7
|
|||
|
|
|
|||
|
|
|
|||
|
|
def test_build_runtime_graph_uses_cache_on_second_call(store):
|
|||
|
|
# First call: detect_communities runs, cache is written.
|
|||
|
|
with mock.patch(
|
|||
|
|
"iai_mcp.community.detect_communities",
|
|||
|
|
wraps=__import__("iai_mcp.community", fromlist=["detect_communities"]).detect_communities,
|
|||
|
|
) as detect_spy:
|
|||
|
|
retrieve.build_runtime_graph(store)
|
|||
|
|
assert detect_spy.call_count == 1
|
|||
|
|
|
|||
|
|
# Second call: cache is valid, detect_communities must NOT re-run.
|
|||
|
|
with mock.patch(
|
|||
|
|
"iai_mcp.community.detect_communities",
|
|||
|
|
) as detect_spy:
|
|||
|
|
retrieve.build_runtime_graph(store)
|
|||
|
|
assert detect_spy.call_count == 0
|
|||
|
|
|
|||
|
|
|
|||
|
|
# --------------------------------------------------------------------------- Test 8
|
|||
|
|
|
|||
|
|
|
|||
|
|
def test_build_runtime_graph_invalidates_on_record_added(store, tmp_path):
|
|||
|
|
"""Adding a record bumps records_count -> cache key changes -> rebuild."""
|
|||
|
|
# First call seeds the cache on an empty store.
|
|||
|
|
retrieve.build_runtime_graph(store)
|
|||
|
|
assert runtime_graph_cache._cache_path(store).exists()
|
|||
|
|
|
|||
|
|
# Insert a real record via the store so records_count increments.
|
|||
|
|
from datetime import datetime, timezone
|
|||
|
|
from iai_mcp.types import MemoryRecord
|
|||
|
|
|
|||
|
|
rec = MemoryRecord(
|
|||
|
|
id=uuid4(),
|
|||
|
|
tier="episodic",
|
|||
|
|
literal_surface="x",
|
|||
|
|
aaak_index="",
|
|||
|
|
embedding=[0.0] * store.embed_dim,
|
|||
|
|
community_id=None,
|
|||
|
|
centrality=0.0,
|
|||
|
|
detail_level=2,
|
|||
|
|
pinned=False,
|
|||
|
|
stability=0.0,
|
|||
|
|
difficulty=0.0,
|
|||
|
|
last_reviewed=None,
|
|||
|
|
never_decay=False,
|
|||
|
|
never_merge=False,
|
|||
|
|
provenance=[],
|
|||
|
|
created_at=datetime.now(timezone.utc),
|
|||
|
|
updated_at=datetime.now(timezone.utc),
|
|||
|
|
tags=["t"],
|
|||
|
|
language="en",
|
|||
|
|
)
|
|||
|
|
store.insert(rec)
|
|||
|
|
|
|||
|
|
# Second build sees records_count changed -> cache invalid -> rebuild.
|
|||
|
|
with mock.patch(
|
|||
|
|
"iai_mcp.community.detect_communities",
|
|||
|
|
wraps=__import__("iai_mcp.community", fromlist=["detect_communities"]).detect_communities,
|
|||
|
|
) as detect_spy:
|
|||
|
|
retrieve.build_runtime_graph(store)
|
|||
|
|
assert detect_spy.call_count == 1
|
|||
|
|
|
|||
|
|
|
|||
|
|
# --------------------------------------------------------------------------- Test 9
|
|||
|
|
|
|||
|
|
|
|||
|
|
def test_save_is_atomic_leaves_old_file_on_error(store, monkeypatch):
|
|||
|
|
"""If os.replace raises mid-save the .tmp is cleaned up and the old
|
|||
|
|
cache file (if any) is untouched."""
|
|||
|
|
# Seed a valid cache first.
|
|||
|
|
original_assignment = _make_assignment()
|
|||
|
|
runtime_graph_cache.save(store, original_assignment, [uuid4()])
|
|||
|
|
path = runtime_graph_cache._cache_path(store)
|
|||
|
|
original_text = path.read_text()
|
|||
|
|
|
|||
|
|
# Now break os.replace and try to save again.
|
|||
|
|
monkeypatch.setattr(
|
|||
|
|
"iai_mcp.runtime_graph_cache.os.replace",
|
|||
|
|
mock.Mock(side_effect=OSError("rename failed")),
|
|||
|
|
)
|
|||
|
|
ok = runtime_graph_cache.save(store, _make_assignment(), [uuid4()])
|
|||
|
|
assert ok is False
|
|||
|
|
# Original cache still intact.
|
|||
|
|
assert path.read_text() == original_text
|
|||
|
|
# Temp file was cleaned up.
|
|||
|
|
tmp_path = path.with_suffix(path.suffix + ".tmp")
|
|||
|
|
assert not tmp_path.exists()
|
|||
|
|
|
|||
|
|
|
|||
|
|
# --------------------------------------------------------------------------- Test 10
|
|||
|
|
|
|||
|
|
|
|||
|
|
def test_invalidate_removes_cache_file(store):
|
|||
|
|
runtime_graph_cache.save(store, _make_assignment(), [uuid4()])
|
|||
|
|
path = runtime_graph_cache._cache_path(store)
|
|||
|
|
assert path.exists()
|
|||
|
|
|
|||
|
|
runtime_graph_cache.invalidate(store)
|
|||
|
|
assert not path.exists()
|
|||
|
|
|
|||
|
|
# Idempotent — second invalidate on missing file is a no-op.
|
|||
|
|
runtime_graph_cache.invalidate(store)
|
|||
|
|
|
|||
|
|
|
|||
|
|
# --------------------------------------------------------------------------- Test 11 (bonus)
|
|||
|
|
|
|||
|
|
|
|||
|
|
def test_embed_dim_change_invalidates(store):
|
|||
|
|
"""swapping embedders (1024d -> 384d) must force a
|
|||
|
|
rebuild. The cache_key includes store.embed_dim so this is
|
|||
|
|
automatic — no separate signal needed."""
|
|||
|
|
runtime_graph_cache.save(store, _make_assignment(), [uuid4()])
|
|||
|
|
assert runtime_graph_cache.try_load(store) is not None
|
|||
|
|
|
|||
|
|
# Simulate an embedder swap by monkeypatching store.embed_dim.
|
|||
|
|
store._embed_dim = 1024 # underlying attr
|
|||
|
|
assert runtime_graph_cache.try_load(store) is None
|
|||
|
|
|
|||
|
|
|
|||
|
|
# --------------------------------------------------------------------------- tests
|
|||
|
|
|
|||
|
|
|
|||
|
|
def test_save_drops_oversize_community_centroids(store):
|
|||
|
|
"""when ``assignment.community_centroids`` alone overflows
|
|||
|
|
the 10 MiB cap (the scenario on an all-isolated graph where Leiden
|
|||
|
|
gives one community per node), the iterative drop path must prune
|
|||
|
|
centroids too — not just node_payload — and produce a file ≤ cap.
|
|||
|
|
|
|||
|
|
Pre-F-09 behaviour: the single-shot drop cleared node_payload,
|
|||
|
|
re-serialised, saw the payload still > cap, and shipped it anyway.
|
|||
|
|
Post-node_payload drops first, centroids drop next,
|
|||
|
|
node_to_community + modularity + top_communities survive.
|
|||
|
|
"""
|
|||
|
|
# 2000 communities × 1024-dim float vectors ≈ 40 MiB JSON-encoded,
|
|||
|
|
# well over the 10 MiB cap. This shape mirrors the live bug.
|
|||
|
|
big_centroids = {uuid4(): [0.123456] * 1024 for _ in range(2000)}
|
|||
|
|
big_node_to_community = {uuid4(): uuid4() for _ in range(50)}
|
|||
|
|
assignment = CommunityAssignment(
|
|||
|
|
node_to_community=big_node_to_community,
|
|||
|
|
community_centroids=big_centroids,
|
|||
|
|
modularity=0.37,
|
|||
|
|
backend="leiden-networkx",
|
|||
|
|
top_communities=list(big_centroids.keys())[:5],
|
|||
|
|
mid_regions={c: [] for c in list(big_centroids.keys())[:5]},
|
|||
|
|
)
|
|||
|
|
rich_club = [uuid4() for _ in range(10)]
|
|||
|
|
|
|||
|
|
# Non-empty node_payload so the first drop step has something to remove.
|
|||
|
|
node_payload = {
|
|||
|
|
str(uuid4()): {
|
|||
|
|
"embedding": [0.0] * 384,
|
|||
|
|
"surface": "probe",
|
|||
|
|
"centrality": 0.1,
|
|||
|
|
"tier": "episodic",
|
|||
|
|
"pinned": False,
|
|||
|
|
"tags": [],
|
|||
|
|
"language": "en",
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
ok = runtime_graph_cache.save(store, assignment, rich_club, node_payload=node_payload)
|
|||
|
|
assert ok is True
|
|||
|
|
|
|||
|
|
path = runtime_graph_cache._cache_path(store)
|
|||
|
|
assert path.exists()
|
|||
|
|
|
|||
|
|
# File respects the cap.
|
|||
|
|
size = path.stat().st_size
|
|||
|
|
assert size <= runtime_graph_cache.MAX_CACHE_BYTES, (
|
|||
|
|
f"cache file {size} bytes exceeds cap "
|
|||
|
|
f"{runtime_graph_cache.MAX_CACHE_BYTES}"
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
data = _read_decrypted_cache(store, path)
|
|||
|
|
|
|||
|
|
# Both pre-F-09 drop candidates emptied in order.
|
|||
|
|
assert data["node_payload"] == {}
|
|||
|
|
assert data["assignment"]["community_centroids"] == {}
|
|||
|
|
|
|||
|
|
# Authoritative fields survived.
|
|||
|
|
assert data["assignment"]["modularity"] == pytest.approx(0.37)
|
|||
|
|
assert data["assignment"]["backend"] == "leiden-networkx"
|
|||
|
|
assert len(data["assignment"]["node_to_community"]) == len(big_node_to_community)
|
|||
|
|
assert len(data["assignment"]["top_communities"]) == 5
|
|||
|
|
# rich_club preserved.
|
|||
|
|
assert len(data["rich_club"]) == len(rich_club)
|
|||
|
|
|
|||
|
|
|
|||
|
|
def test_save_small_payload_survives_unchanged(store):
|
|||
|
|
"""Negative case: when the payload is comfortably under the cap,
|
|||
|
|
no drops fire — centroids, mid_regions, and node_payload round-trip
|
|||
|
|
intact. Guards against an over-eager iterative-drop path.
|
|||
|
|
"""
|
|||
|
|
assignment = _make_assignment(n_communities=2)
|
|||
|
|
rich_club = [uuid4() for _ in range(3)]
|
|||
|
|
node_payload = {
|
|||
|
|
str(uuid4()): {
|
|||
|
|
"embedding": [0.1] * 384,
|
|||
|
|
"surface": "hello",
|
|||
|
|
"centrality": 0.2,
|
|||
|
|
"tier": "episodic",
|
|||
|
|
"pinned": False,
|
|||
|
|
"tags": ["t"],
|
|||
|
|
"language": "en",
|
|||
|
|
}
|
|||
|
|
for _ in range(5)
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
ok = runtime_graph_cache.save(store, assignment, rich_club, node_payload=node_payload)
|
|||
|
|
assert ok is True
|
|||
|
|
|
|||
|
|
path = runtime_graph_cache._cache_path(store)
|
|||
|
|
data = _read_decrypted_cache(store, path)
|
|||
|
|
|
|||
|
|
# Well under the cap.
|
|||
|
|
assert path.stat().st_size < runtime_graph_cache.MAX_CACHE_BYTES
|
|||
|
|
|
|||
|
|
# Nothing pruned.
|
|||
|
|
assert data["node_payload"] != {}
|
|||
|
|
assert len(data["node_payload"]) == 5
|
|||
|
|
assert data["assignment"]["community_centroids"] != {}
|
|||
|
|
assert len(data["assignment"]["community_centroids"]) == 2
|
|||
|
|
assert data["assignment"]["mid_regions"] != {}
|
|||
|
|
assert len(data["assignment"]["mid_regions"]) == 2
|
|||
|
|
|
|||
|
|
|
|||
|
|
# --------------------------------------------------------------------------- W3 / tests
|
|||
|
|
|
|||
|
|
|
|||
|
|
def test_save_writes_ciphertext_no_plaintext_surface(store):
|
|||
|
|
"""W3 / saved sidecar must NOT contain plaintext surface bytes
|
|||
|
|
anywhere on disk. The whole JSON payload is wrapped in AES-256-GCM."""
|
|||
|
|
canary = "PLAINTEXT_CANARY_4d7f_07_9_W3"
|
|||
|
|
rid = uuid4()
|
|||
|
|
node_payload = {
|
|||
|
|
str(rid): {
|
|||
|
|
"embedding": [0.1] * 384,
|
|||
|
|
"surface": canary,
|
|||
|
|
"centrality": 0.5,
|
|||
|
|
"tier": "episodic",
|
|||
|
|
"pinned": False,
|
|||
|
|
"tags": ["t"],
|
|||
|
|
"language": "en",
|
|||
|
|
},
|
|||
|
|
}
|
|||
|
|
ok = runtime_graph_cache.save(store, _make_assignment(), [uuid4()],
|
|||
|
|
node_payload=node_payload, max_degree=3)
|
|||
|
|
assert ok is True
|
|||
|
|
path = runtime_graph_cache._cache_path(store)
|
|||
|
|
raw_bytes = path.read_bytes()
|
|||
|
|
|
|||
|
|
# Plaintext canary must not appear anywhere in the on-disk bytes.
|
|||
|
|
assert canary.encode("utf-8") not in raw_bytes, (
|
|||
|
|
"plaintext surface canary leaked into the on-disk sidecar"
|
|||
|
|
)
|
|||
|
|
# Ciphertext envelope present.
|
|||
|
|
assert raw_bytes.startswith(b"iai:enc:v1:"), (
|
|||
|
|
f"expected v3 ciphertext envelope; got prefix {raw_bytes[:32]!r}"
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
|
|||
|
|
def test_save_then_try_load_preserves_surface_byte_for_byte(store):
|
|||
|
|
"""W3 / surface round-trips through encrypt → decrypt cleanly,
|
|||
|
|
including non-ASCII (byte-for-byte across encryption)."""
|
|||
|
|
rid = uuid4()
|
|||
|
|
surface = "user сказал важное — please remember this 重要"
|
|||
|
|
node_payload = {
|
|||
|
|
str(rid): {
|
|||
|
|
"embedding": [0.42] * 384,
|
|||
|
|
"surface": surface,
|
|||
|
|
"centrality": 0.42,
|
|||
|
|
"tier": "episodic",
|
|||
|
|
"pinned": True,
|
|||
|
|
"tags": ["t1", "t2"],
|
|||
|
|
"language": "ru",
|
|||
|
|
},
|
|||
|
|
}
|
|||
|
|
runtime_graph_cache.save(store, _make_assignment(), [rid],
|
|||
|
|
node_payload=node_payload, max_degree=7)
|
|||
|
|
|
|||
|
|
loaded = runtime_graph_cache.try_load(store)
|
|||
|
|
assert loaded is not None
|
|||
|
|
_, _, payload, max_deg = loaded
|
|||
|
|
assert payload is not None
|
|||
|
|
assert payload[str(rid)]["surface"] == surface
|
|||
|
|
assert payload[str(rid)]["centrality"] == pytest.approx(0.42)
|
|||
|
|
assert payload[str(rid)]["tags"] == ["t1", "t2"]
|
|||
|
|
assert payload[str(rid)]["language"] == "ru"
|
|||
|
|
assert max_deg == 7
|
|||
|
|
|
|||
|
|
|
|||
|
|
def test_v2_plaintext_lazy_migrates_to_v3(store):
|
|||
|
|
"""W3 / a hand-written legacy v2 plaintext file is read once,
|
|||
|
|
then re-saved under the v3 ciphertext format on the same call.
|
|||
|
|
Subsequent reads see only ciphertext on disk."""
|
|||
|
|
path = runtime_graph_cache._cache_path(store)
|
|||
|
|
path.parent.mkdir(parents=True, exist_ok=True)
|
|||
|
|
|
|||
|
|
# Hand-craft a v2 plaintext file in the legacy shape. Use the
|
|||
|
|
# current store key so the cache_key matches and try_load
|
|||
|
|
# accepts the contents.
|
|||
|
|
rid = uuid4()
|
|||
|
|
legacy_data = {
|
|||
|
|
"cache_version": runtime_graph_cache.LEGACY_CACHE_VERSION_PLAINTEXT,
|
|||
|
|
"key": list(runtime_graph_cache._cache_key(store)),
|
|||
|
|
"assignment": runtime_graph_cache._encode_assignment(_make_assignment()),
|
|||
|
|
"rich_club": [str(uuid4())],
|
|||
|
|
"node_payload": {
|
|||
|
|
str(rid): {
|
|||
|
|
"embedding": [0.0] * 384,
|
|||
|
|
"surface": "legacy_plain_canary",
|
|||
|
|
"centrality": 0.0,
|
|||
|
|
"tier": "episodic",
|
|||
|
|
"pinned": False,
|
|||
|
|
"tags": [],
|
|||
|
|
"language": "en",
|
|||
|
|
}
|
|||
|
|
},
|
|||
|
|
"max_degree": 1,
|
|||
|
|
"saved_at": "2026-04-29T00:00:00Z",
|
|||
|
|
}
|
|||
|
|
# Force the legacy cache_version into the saved key so try_load's
|
|||
|
|
# current_key check matches. (cache_version is the 5th element.)
|
|||
|
|
if len(legacy_data["key"]) >= 5:
|
|||
|
|
legacy_data["key"][4] = runtime_graph_cache.LEGACY_CACHE_VERSION_PLAINTEXT
|
|||
|
|
path.write_text(json.dumps(legacy_data), encoding="utf-8")
|
|||
|
|
|
|||
|
|
# First try_load reads the v2 plaintext, decodes, and re-saves
|
|||
|
|
# under the v3 ciphertext format.
|
|||
|
|
loaded = runtime_graph_cache.try_load(store)
|
|||
|
|
assert loaded is not None
|
|||
|
|
_, _, payload, _ = loaded
|
|||
|
|
assert payload is not None
|
|||
|
|
assert payload[str(rid)]["surface"] == "legacy_plain_canary"
|
|||
|
|
|
|||
|
|
# On-disk file is now ciphertext.
|
|||
|
|
raw_after = path.read_bytes()
|
|||
|
|
assert raw_after.startswith(b"iai:enc:v1:"), (
|
|||
|
|
"v2 plaintext file must be lazily migrated to v3 ciphertext"
|
|||
|
|
)
|
|||
|
|
assert b"legacy_plain_canary" not in raw_after, (
|
|||
|
|
"post-migration on-disk bytes must not contain the legacy plaintext"
|
|||
|
|
)
|