Initial release: iai-mcp v0.1.0

Co-Authored-By: Claude <noreply@anthropic.com> Co-Authored-By: XNLLLLH <XNLLLLH@users.noreply.github.com>
2026-05-06 01:04:47 -07:00 · 2026-05-06 01:04:47 -07:00 · f6b876fbe7
commit f6b876fbe7
332 changed files with 97258 additions and 0 deletions
--- a/tests/test_pipeline_perf.py
+++ b/tests/test_pipeline_perf.py
@ -0,0 +1,290 @@
+"""Pipeline perf regression guard (Plan 02-07 Task 2, D-SPEED gap closure).
+
+Load-bearing tests:
+  - test_recall_for_response_p95_under_threshold: seeds N=100 records, runs
+    recall_for_response 10 times; asserts p95 < 150ms (CI-generous ceiling).
+    Bench CLI uses the strict 100ms ceiling (D-SPEED SC-6).
+  - test_recall_for_response_single_provenance_batch_call: instrumentation test;
+    confirms append_provenance_batch is called exactly once per recall and
+    the pairs list matches the hit count (no per-hit append_provenance).
+  - test_recall_for_response_mem05_provenance_preserved: semantic
+    equivalence check -- every hit still has exactly one new provenance
+    entry whose session_id matches the call's session_id.
+  - test_recall_for_response_on_read_check_uses_batch_variant: monkeypatches
+    s4.on_read_check to raise + on_read_check_batch to return []; the call
+    must NOT raise, proving the batch variant is on the active call path.
+"""
+from __future__ import annotations
+
+from datetime import datetime, timezone
+from uuid import uuid4
+
+import pytest
+
+from iai_mcp.types import EMBED_DIM, MemoryRecord
+
+
+class _PerfEmbedder:
+    """Deterministic sha256-based embedder. Stable across processes."""
+
+    DIM = EMBED_DIM
+
+    def __init__(self, base_seed: int = 0) -> None:
+        self._base_seed = base_seed
+
+    def embed(self, text: str) -> list[float]:
+        import hashlib
+        import random
+        digest = hashlib.sha256(
+            f"{self._base_seed}:{text}".encode("utf-8")
+        ).hexdigest()
+        rng = random.Random(int(digest[:16], 16))
+        v = [rng.random() * 2 - 1 for _ in range(self.DIM)]
+        norm = sum(x * x for x in v) ** 0.5
+        return [x / norm for x in v] if norm > 0 else v
+
+    def embed_batch(self, texts: list[str]) -> list[list[float]]:
+        return [self.embed(t) for t in texts]
+
+
+def _make_rec(vec: list[float], text: str, tags: list[str]) -> MemoryRecord:
+    now = datetime.now(timezone.utc)
+    return MemoryRecord(
+        id=uuid4(),
+        tier="episodic",
+        literal_surface=text,
+        aaak_index="",
+        embedding=vec,
+        community_id=None,
+        centrality=0.0,
+        detail_level=2,
+        pinned=False,
+        stability=0.0,
+        difficulty=0.0,
+        last_reviewed=None,
+        never_decay=False,
+        never_merge=False,
+        provenance=[],
+        created_at=now,
+        updated_at=now,
+        tags=tags,
+        language="en",
+    )
+
+
+def _seed_store(path, n: int = 100, seed: int = 0):
+    """Seed a MemoryStore with N synthetic records + build runtime graph."""
+    from iai_mcp.pipeline import recall_for_response  # noqa: F401
+    from iai_mcp.retrieve import build_runtime_graph
+    from iai_mcp.store import MemoryStore
+
+    store = MemoryStore(path=path)
+    embedder = _PerfEmbedder(base_seed=seed)
+    tag_pool = [
+        ["topic:auth"], ["topic:db"], ["topic:web"],
+        ["topic:net"], ["topic:cli"],
+    ]
+    for i in range(n):
+        vec = embedder.embed(f"seed-{i}")
+        tags = list(tag_pool[i % len(tag_pool)])
+        rec = _make_rec(vec, text=f"synthetic fact {i}", tags=tags)
+        store.insert(rec)
+    graph, assignment, rich_club = build_runtime_graph(store)
+    return store, embedder, graph, assignment, rich_club
+
+
+# --------------------------------------------------------- perf regression
+
+
+def test_recall_for_response_p95_under_threshold(tmp_path):
+    """D-SPEED perf regression guard: p95 < 150ms at N=100 (CI-generous).
+
+    PRE-FIX: p95 ~1000ms (fails). POST-FIX: p95 < 150ms (passes).
+    Bench CLI uses strict 100ms; this test uses 150ms to absorb CI jitter.
+    """
+    import time
+
+    from iai_mcp.pipeline import recall_for_response
+
+    store, embedder, graph, assignment, rich_club = _seed_store(
+        tmp_path, n=100, seed=0,
+    )
+    cues = [
+        "what did we cover about auth yesterday?",
+        "explain the db migration plan",
+        "how does the web cache invalidation work",
+        "summary of the cli subcommand changes",
+        "recent network stack bug report",
+    ]
+
+    latencies: list[float] = []
+    for i in range(10):
+        cue = cues[i % len(cues)]
+        t0 = time.perf_counter()
+        recall_for_response(
+            store=store,
+            graph=graph,
+            assignment=assignment,
+            rich_club=rich_club,
+            embedder=embedder,
+            cue=cue,
+            session_id="perf_test",
+            budget_tokens=1500,
+        )
+        latencies.append((time.perf_counter() - t0) * 1000.0)
+
+    latencies.sort()
+    p95 = latencies[int(len(latencies) * 0.95)] if len(latencies) > 1 else latencies[-1]
+    assert p95 < 150.0, (
+        f"D-SPEED regression: p95={p95:.2f}ms > 150ms at N=100 "
+        f"(target <100ms strict, 150ms CI-generous). "
+        f"All latencies: {[f'{x:.1f}' for x in latencies]}"
+    )
+
+
+# --------------------------------------------------------- wire-up tests
+
+
+def test_recall_for_response_single_provenance_batch_call(tmp_path, monkeypatch):
+    """recall_for_response calls store.append_provenance_batch EXACTLY once.
+
+    Instrumentation: replace append_provenance_batch with a recorder.
+    The recorder captures the pairs list length; after one recall_for_response
+    call with hits>=1, count must be exactly 1 and the pairs list length
+    must equal the number of hits.
+
+    ALSO asserts store.append_provenance (single-call) is NEVER called on
+    the hit path -- is preserved but through the batch API.
+    """
+    from iai_mcp.pipeline import recall_for_response
+    from iai_mcp.store import MemoryStore
+
+    store, embedder, graph, assignment, rich_club = _seed_store(
+        tmp_path, n=100, seed=0,
+    )
+
+    batch_calls: list[int] = []  # each element = len(pairs) of that call
+    single_calls: list[object] = []
+
+    original_batch = MemoryStore.append_provenance_batch
+    original_single = MemoryStore.append_provenance
+
+    def _recorder_batch(self, pairs, *args, **kwargs):
+        # pipeline passes records_cache=... kwarg -- accept and
+        # forward. The test only cares about call-count + pairs-length.
+        batch_calls.append(len(pairs))
+        return original_batch(self, pairs, *args, **kwargs)
+
+    def _recorder_single(self, record_id, entry):
+        single_calls.append((record_id, entry))
+        return original_single(self, record_id, entry)
+
+    monkeypatch.setattr(MemoryStore, "append_provenance_batch", _recorder_batch)
+    monkeypatch.setattr(MemoryStore, "append_provenance", _recorder_single)
+
+    resp = recall_for_response(
+        store=store,
+        graph=graph,
+        assignment=assignment,
+        rich_club=rich_club,
+        embedder=embedder,
+        cue="what about auth",
+        session_id="wire_test",
+        budget_tokens=1500,
+    )
+
+    assert len(resp.hits) >= 1, "pipeline must return at least one hit on seeded store"
+    assert len(batch_calls) == 1, (
+        f"append_provenance_batch should be called EXACTLY once; got {len(batch_calls)}"
+    )
+    assert batch_calls[0] == len(resp.hits), (
+        f"batch pairs list should have {len(resp.hits)} entries (one per hit); "
+        f"got {batch_calls[0]}"
+    )
+    # No per-hit single calls on the hit path.
+    assert len(single_calls) == 0, (
+        f"append_provenance (single) should NOT be called on the hit path; "
+        f"got {len(single_calls)} calls"
+    )
+
+
+def test_recall_for_response_mem05_provenance_preserved(tmp_path):
+    """MEM-05 correctness: every hit has a NEW provenance entry post-recall.
+
+    Establishes provenance len-before per hit, runs recall_for_response, then
+    confirms each hit's record has exactly one more provenance entry whose
+    session_id matches the call.
+    """
+    from iai_mcp.pipeline import recall_for_response
+
+    store, embedder, graph, assignment, rich_club = _seed_store(
+        tmp_path, n=100, seed=0,
+    )
+    session = "mem05_preserved"
+
+    # Run recall first to see which records become hits.
+    resp = recall_for_response(
+        store=store,
+        graph=graph,
+        assignment=assignment,
+        rich_club=rich_club,
+        embedder=embedder,
+        cue="what about auth",
+        session_id=session,
+        budget_tokens=1500,
+    )
+    assert len(resp.hits) >= 1
+
+    for h in resp.hits:
+        rec = store.get(h.record_id)
+        assert rec is not None
+        # Every hit has AT LEAST one provenance entry with the session_id
+        # we just used. (provisional check for correctness).
+        matching = [p for p in rec.provenance if p.get("session_id") == session]
+        assert len(matching) == 1, (
+            f"record {h.record_id} has {len(matching)} provenance entries "
+            f"for session '{session}'; expected exactly 1. prov={rec.provenance}"
+        )
+
+
+def test_recall_for_response_on_read_check_uses_batch_variant(tmp_path, monkeypatch):
+    """The active call path uses on_read_check_batch, not on_read_check.
+
+    Arrange: monkeypatch s4.on_read_check to RAISE. If the old single-call
+    path is still wired, recall_for_response will fail (or silently swallow).
+    We also patch on_read_check_batch to return a sentinel, and verify it
+    is what actually flows through.
+    """
+    from iai_mcp import s4
+    from iai_mcp.pipeline import recall_for_response
+
+    store, embedder, graph, assignment, rich_club = _seed_store(
+        tmp_path, n=100, seed=0,
+    )
+
+    sentinel = [{"kind": "sentinel_batch_wired", "severity": "info", "source_ids": [], "text": "ok"}]
+
+    def _boom(*a, **kw):
+        raise RuntimeError("old on_read_check must NOT be called")
+
+    def _batch_ok(*a, **kw):
+        return sentinel
+
+    monkeypatch.setattr(s4, "on_read_check", _boom)
+    monkeypatch.setattr(s4, "on_read_check_batch", _batch_ok)
+
+    resp = recall_for_response(
+        store=store,
+        graph=graph,
+        assignment=assignment,
+        rich_club=rich_club,
+        embedder=embedder,
+        cue="what about auth",
+        session_id="batch_wired_test",
+        budget_tokens=1500,
+    )
+    # The batch variant's sentinel must appear in hints.
+    hint_kinds = [h.get("kind") for h in resp.hints]
+    assert "sentinel_batch_wired" in hint_kinds, (
+        f"expected on_read_check_batch sentinel in hints; got {hint_kinds}"
+    )