Initial release: iai-mcp v0.1.0

Co-Authored-By: Claude <noreply@anthropic.com> Co-Authored-By: XNLLLLH <XNLLLLH@users.noreply.github.com>
2026-05-06 01:04:47 -07:00 · 2026-05-06 01:04:47 -07:00 · f6b876fbe7
commit f6b876fbe7
332 changed files with 97258 additions and 0 deletions
--- a/tests/test_recall_shared_cosine_pass_count.py
+++ b/tests/test_recall_shared_cosine_pass_count.py
@ -0,0 +1,299 @@
+"""Phase 8 redesign (08-CONTEXT.md D-01): regression-fence — exactly one
+cue-vs-pool cosine pass per recall.
+
+The redesign's load-bearing claim is that the rank-stage cosine term
+reads from a shared array built ONCE at the top of `_recall_core`.
+This file fences the claim at the entry-point level: for both public
+entry points (`recall_for_response`, `recall_for_benchmark`) the
+matmul that computes `pool_embs @ cue_vec` fires exactly ONCE per
+call. The L0 fast-path bypasses the pool entirely (zero pool matmuls).
+
+Pre-08 the rank-stage was a separate `E @ cue_vec` matmul (Plan 05-13
+optimization) plus the patch helper `_augment_candidates_by_cosine`
+added a third independent cosine pass. The redesign collapses all
+three into one shared pass — the matmul-counter assertions in this
+file fence that contract for the public entry points (the
+`_recall_core`-level fence lives in `test_recall_core_unit.py`).
+
+Implementation note (D-PLAN-CHECK F4): the matmul-counter is the
+canonical approach with no sentinel-content fallback. The wrapper
+counts only "cue-vs-large-pool" matmul calls — 2D matrix shaped
+(N >= 50, D) against 1D cue vector shaped (D,). The community-gate
+centroid matmul (which has K = #communities < 50 in our fixtures)
+is excluded from the count by the >= 50 row floor.
+"""
+from __future__ import annotations
+
+from datetime import datetime, timezone
+from uuid import UUID, uuid4
+
+import numpy as np
+import pytest
+
+from iai_mcp.community import CommunityAssignment
+from iai_mcp.graph import MemoryGraph
+from iai_mcp.store import MemoryStore
+from iai_mcp.types import EMBED_DIM, MemoryRecord
+
+
+# --------------------------------------------------------------- test fixtures
+
+
+class _FakeEmbedder:
+    """Stand-in embedder; cue's embedding is configurable per-test."""
+
+    DIM = EMBED_DIM
+
+    def __init__(self, vec: list[float] | None = None) -> None:
+        self._vec = vec if vec is not None else [1.0] + [0.0] * (EMBED_DIM - 1)
+
+    def embed(self, text: str) -> list[float]:
+        return list(self._vec)
+
+    def embed_batch(self, texts: list[str]) -> list[list[float]]:
+        return [list(self._vec) for _ in texts]
+
+
+def _make(vec: list[float], text: str = "rec", tier: str = "episodic") -> MemoryRecord:
+    now = datetime.now(timezone.utc)
+    return MemoryRecord(
+        id=uuid4(),
+        tier=tier,
+        literal_surface=text,
+        aaak_index="",
+        embedding=vec,
+        community_id=None,
+        centrality=0.0,
+        detail_level=2,
+        pinned=False,
+        stability=0.0,
+        difficulty=0.0,
+        last_reviewed=None,
+        never_decay=False,
+        never_merge=False,
+        provenance=[],
+        created_at=now,
+        updated_at=now,
+        tags=[],
+        language="en",
+    )
+
+
+def _build_store_and_graph(tmp_path, n: int) -> tuple[MemoryStore, MemoryGraph, list[MemoryRecord]]:
+    """Build N records with distinct primary-axis embeddings + matching graph."""
+    store = MemoryStore(path=tmp_path / "lancedb")
+    recs: list[MemoryRecord] = []
+    for i in range(n):
+        vec = [0.0] * EMBED_DIM
+        vec[i % EMBED_DIM] = 1.0
+        rec = _make(vec, text=f"rec{i}")
+        store.insert(rec)
+        recs.append(rec)
+    graph = MemoryGraph()
+    for rec in recs:
+        graph.add_node(
+            rec.id, community_id=None, embedding=list(rec.embedding),
+        )
+        # Mirror build_runtime_graph: pour the payload onto the NetworkX
+        # node attrs so _collect_graph_pool's fast path hits.
+        graph._nx.nodes[str(rec.id)].update({
+            "embedding": list(rec.embedding),
+            "surface": f"rec{recs.index(rec)}",
+            "centrality": 0.0,
+            "tier": rec.tier,
+            "tags": [],
+            "language": "en",
+        })
+    return store, graph, recs
+
+
+def _flat_assignment(recs: list[MemoryRecord]) -> CommunityAssignment:
+    """Single flat community covering all records (healthy graph baseline)."""
+    cid = uuid4()
+    centroid = [1.0] + [0.0] * (EMBED_DIM - 1)
+    return CommunityAssignment(
+        node_to_community={r.id: cid for r in recs},
+        community_centroids={cid: centroid},
+        modularity=0.0,
+        backend="flat",
+        top_communities=[cid],
+        mid_regions={cid: [r.id for r in recs]},
+    )
+
+
+# ----------------------------------------------------- matmul counter helper
+
+
+def _matmul_with_counter(counter: dict[str, int]):
+    """Wrap np.matmul with a shape-discriminating counter.
+
+    Counts only the "cue-vs-large-pool" matmul: 2D matrix shaped
+    (N >= 50, D) against a 1D cue vector shaped (D,). The community-gate
+    centroid matmul (which has K = #communities < 50 in our fixtures)
+    is excluded from the count by the >= 50 row floor.
+
+    Per 08-PLAN-CHECK.md F4 this is the canonical approach; there is no
+    fallback to a sentinel-based content test.
+    """
+    orig = np.matmul
+
+    def wrapped(a, b, **kw):
+        try:
+            if (
+                hasattr(a, "shape")
+                and hasattr(b, "shape")
+                and len(a.shape) == 2
+                and len(b.shape) == 1
+                and a.shape[1] == b.shape[0]
+                and a.shape[0] >= 50
+            ):
+                counter["count"] = counter.get("count", 0) + 1
+        except Exception:
+            pass
+        return orig(a, b, **kw)
+
+    return wrapped
+
+
+# ----------------------------------------------------------------- tests
+
+
+def test_recall_for_benchmark_runs_one_pool_cosine(tmp_path, monkeypatch):
+    """recall_for_benchmark fires the cue-vs-pool matmul EXACTLY once.
+
+    50+-node fixture so the >= 50 row floor in the matmul counter
+    discriminates the load-bearing pool matmul from the small
+    community-centroid matmul. After Wave 2 plumbed the entry point
+    onto _recall_core, the only cue-vs-large-pool matmul should fire
+    inside _recall_core's shared cosine pass; Stage 5 reads from
+    `shared_cos[reachable_indices]` — never another pool matmul.
+    """
+    from iai_mcp.pipeline import recall_for_benchmark
+
+    store, graph, recs = _build_store_and_graph(tmp_path, n=60)
+    assignment = _flat_assignment(recs)
+    embedder = _FakeEmbedder()
+
+    counter: dict[str, int] = {"count": 0}
+    monkeypatch.setattr(np, "matmul", _matmul_with_counter(counter))
+
+    recall_for_benchmark(
+        store=store, graph=graph, assignment=assignment,
+        rich_club=[], embedder=embedder,
+        cue="primary", session_id="s-bench-cosine-1",
+        k_hits=10, mode="concept",
+    )
+
+    assert counter["count"] == 1, (
+        f"D-01 violation: cue-vs-large-pool matmul fired "
+        f"{counter['count']} times via recall_for_benchmark; expected "
+        "exactly 1 (the shared cosine pass at the top of _recall_core)."
+    )
+
+
+def test_recall_for_response_runs_one_pool_cosine(tmp_path, monkeypatch):
+    """recall_for_response fires the cue-vs-pool matmul EXACTLY once.
+
+    Production entry-point analogue of the bench test above. budget_tokens
+    is generous (4000) so the budget-pack loop does not influence whether
+    a second matmul could fire (it cannot, but we keep the cap loose so
+    the test is not gated on budget arithmetic).
+    """
+    from iai_mcp.pipeline import recall_for_response
+
+    store, graph, recs = _build_store_and_graph(tmp_path, n=60)
+    assignment = _flat_assignment(recs)
+    embedder = _FakeEmbedder()
+
+    counter: dict[str, int] = {"count": 0}
+    monkeypatch.setattr(np, "matmul", _matmul_with_counter(counter))
+
+    recall_for_response(
+        store=store, graph=graph, assignment=assignment,
+        rich_club=[], embedder=embedder,
+        cue="primary", session_id="s-resp-cosine-2",
+        budget_tokens=4000, mode="concept",
+    )
+
+    assert counter["count"] == 1, (
+        f"D-01 violation: cue-vs-large-pool matmul fired "
+        f"{counter['count']} times via recall_for_response; expected "
+        "exactly 1 (the shared cosine pass at the top of _recall_core)."
+    )
+
+
+def test_l0_fastpath_runs_zero_pool_cosines(tmp_path, monkeypatch):
+    """L0 fast-path: should_skip_retrieval triggers BEFORE any pool walk.
+
+    When the active-inference gate decides to skip retrieval, _recall_core
+    returns the L0 sentinel hit without ever calling _collect_graph_pool
+    or the shared-cosine matmul. The matmul counter must therefore stay
+    at 0 across the entry-point call.
+
+    This fences the "L0 path is genuinely a fast-path" contract: if a
+    future change accidentally moved the pool walk before the L0 gate,
+    this test would surface a non-zero count even when retrieval was
+    skipped.
+    """
+    import iai_mcp.gate as gate_mod
+    from iai_mcp.pipeline import recall_for_benchmark
+
+    # Force should_skip_retrieval to fire, simulating an L0 hit.
+    monkeypatch.setattr(
+        gate_mod,
+        "should_skip_retrieval",
+        lambda cue: (True, "test L0 reason"),
+    )
+
+    # Insert the deterministic L0 sentinel record + a small fixture pool.
+    store, graph, recs = _build_store_and_graph(tmp_path, n=60)
+    l0_uuid = UUID("00000000-0000-0000-0000-000000000001")
+    now = datetime.now(timezone.utc)
+    l0_rec = MemoryRecord(
+        id=l0_uuid,
+        tier="episodic",
+        literal_surface="L0 identity literal",
+        aaak_index="",
+        embedding=[1.0] + [0.0] * (EMBED_DIM - 1),
+        community_id=None,
+        centrality=0.0,
+        detail_level=2,
+        pinned=False,
+        stability=0.0,
+        difficulty=0.0,
+        last_reviewed=None,
+        never_decay=False,
+        never_merge=False,
+        provenance=[],
+        created_at=now,
+        updated_at=now,
+        tags=[],
+        language="en",
+    )
+    store.insert(l0_rec)
+    assignment = _flat_assignment(recs)
+    embedder = _FakeEmbedder()
+
+    counter: dict[str, int] = {"count": 0}
+    monkeypatch.setattr(np, "matmul", _matmul_with_counter(counter))
+
+    resp = recall_for_benchmark(
+        store=store, graph=graph, assignment=assignment,
+        rich_club=[], embedder=embedder,
+        cue="hi", session_id="s-l0-fast-3",
+        k_hits=10, mode="concept",
+    )
+
+    # The L0 fast-path returns exactly 1 hit (the L0 sentinel).
+    assert len(resp.hits) == 1, (
+        f"L0 fast-path should return exactly 1 hit; got {len(resp.hits)}"
+    )
+    assert resp.hits[0].record_id == l0_uuid, (
+        "L0 fast-path returned a non-L0 record; gate fired but pool walk "
+        "happened anyway."
+    )
+    assert counter["count"] == 0, (
+        f"L0 fast-path violation: cue-vs-large-pool matmul fired "
+        f"{counter['count']} times even though the L0 gate fired; "
+        "expected 0 (the L0 path bypasses the pool walk entirely)."
+    )