Co-Authored-By: Claude <noreply@anthropic.com> Co-Authored-By: XNLLLLH <XNLLLLH@users.noreply.github.com>
431 lines
15 KiB
Python
431 lines
15 KiB
Python
"""Tests for iai_mcp.pipeline (D-13 5-stage retrieval pipeline).
|
||
|
||
Uses a FakeEmbedder fixture so tests don't pull BAAI/bge-small-en-v1.5 from
|
||
HuggingFace during every run. The Embedder contract verified separately in
|
||
test_embed.py.
|
||
"""
|
||
from __future__ import annotations
|
||
|
||
from datetime import datetime, timezone
|
||
from uuid import uuid4
|
||
|
||
from iai_mcp.community import CommunityAssignment
|
||
from iai_mcp.graph import MemoryGraph
|
||
from iai_mcp.pipeline import (
|
||
W_AAAK,
|
||
W_AGE,
|
||
W_COSINE,
|
||
W_DEGREE,
|
||
_aaak_overlap,
|
||
_community_gate,
|
||
_cosine,
|
||
_pick_seeds,
|
||
recall_for_response,
|
||
)
|
||
from iai_mcp.store import MemoryStore
|
||
from iai_mcp.types import EMBED_DIM, MemoryRecord
|
||
|
||
|
||
class _FakeEmbedder:
|
||
"""Stand-in for the configured embedder so tests don't require the model.
|
||
|
||
Returns a deterministic primary-axis vector for any input. recall_for_response
|
||
uses embedder.embed() only for the cue, so this is sufficient.
|
||
|
||
DIM follows the default registry (bge-m3 = 1024d). Plan-02 tests
|
||
that hand-build vectors must use `[1.0] + [0.0] * (DIM - 1)` style via this
|
||
constant so the store.insert() dim-check passes.
|
||
"""
|
||
|
||
DIM = EMBED_DIM # 1024 under default (bge-m3)
|
||
|
||
def embed(self, text: str) -> list[float]:
|
||
return [1.0] + [0.0] * (EMBED_DIM - 1)
|
||
|
||
def embed_batch(self, texts: list[str]) -> list[list[float]]:
|
||
return [self.embed(t) for t in texts]
|
||
|
||
|
||
def _make(vec: list[float], text: str = "rec", aaak: str = "", detail: int = 2) -> MemoryRecord:
|
||
"""Construct a MemoryRecord for pipeline tests.
|
||
|
||
Uses `tier="episodic"` to stay within TIER_ENUM; created_at at current UTC.
|
||
language="en" required.
|
||
"""
|
||
now = datetime.now(timezone.utc)
|
||
return MemoryRecord(
|
||
id=uuid4(),
|
||
tier="episodic",
|
||
literal_surface=text,
|
||
aaak_index=aaak,
|
||
embedding=vec,
|
||
community_id=None,
|
||
centrality=0.0,
|
||
detail_level=detail,
|
||
pinned=False,
|
||
stability=0.0,
|
||
difficulty=0.0,
|
||
last_reviewed=None,
|
||
never_decay=False,
|
||
never_merge=False,
|
||
provenance=[],
|
||
created_at=now,
|
||
updated_at=now,
|
||
tags=[],
|
||
language="en",
|
||
)
|
||
|
||
|
||
# ---------------------------------------------------------- stage-unit tests
|
||
|
||
|
||
def test_community_gate_picks_nearest() -> None:
|
||
"""CONN-06: top-1 gate on 3 centroids picks the one nearest the cue."""
|
||
c0 = uuid4()
|
||
c1 = uuid4()
|
||
c2 = uuid4()
|
||
centroids = {
|
||
c0: [1.0] + [0.0] * (EMBED_DIM - 1),
|
||
c1: [0.0] * 384,
|
||
c2: [-1.0] + [0.0] * (EMBED_DIM - 1),
|
||
}
|
||
a = CommunityAssignment(community_centroids=centroids)
|
||
cue = [1.0] + [0.0] * (EMBED_DIM - 1)
|
||
gated = _community_gate(cue, a, top_n=1)
|
||
assert len(gated) == 1
|
||
assert gated[0] == c0
|
||
|
||
|
||
def test_community_gate_returns_top_n_in_order() -> None:
|
||
c0 = uuid4()
|
||
c1 = uuid4()
|
||
c2 = uuid4()
|
||
centroids = {
|
||
c0: [1.0] + [0.0] * (EMBED_DIM - 1),
|
||
c1: [0.5, 0.5] + [0.0] * (EMBED_DIM - 2),
|
||
c2: [-1.0] + [0.0] * (EMBED_DIM - 1),
|
||
}
|
||
a = CommunityAssignment(community_centroids=centroids)
|
||
cue = [1.0] + [0.0] * (EMBED_DIM - 1)
|
||
gated = _community_gate(cue, a, top_n=3)
|
||
assert gated == [c0, c1, c2]
|
||
|
||
|
||
def test_aaak_overlap_basic_jaccard() -> None:
|
||
assert _aaak_overlap("", "anything") == 0.0
|
||
assert _aaak_overlap("x", "") == 0.0
|
||
assert _aaak_overlap("a b", "a b") == 1.0 # identical
|
||
# Jaccard({a,b}, {b,c}) = 1 / 3
|
||
assert abs(_aaak_overlap("a b", "b c") - 1 / 3) < 1e-9
|
||
|
||
|
||
def test_aaak_overlap_slash_split_symmetric() -> None:
|
||
"""AAAK tokens use '/' as separator; both sides must split on it."""
|
||
# Identical slash-delimited paths -> 1.0 (bug fix: cue side also splits).
|
||
assert _aaak_overlap("auth/login", "auth/login") == 1.0
|
||
# Partial share: {auth, login} vs {auth, logout} -> Jaccard = 1/3.
|
||
assert abs(_aaak_overlap("auth/login", "auth/logout") - 1 / 3) < 1e-9
|
||
# Case-insensitive.
|
||
assert _aaak_overlap("AUTH/Login", "auth/login") == 1.0
|
||
|
||
|
||
def test_cosine_basic_properties() -> None:
|
||
assert _cosine([1.0, 0.0], [1.0, 0.0]) == 1.0
|
||
assert _cosine([1.0, 0.0], [-1.0, 0.0]) == -1.0
|
||
assert _cosine([1.0, 0.0], [0.0, 1.0]) == 0.0
|
||
assert _cosine([0.0, 0.0], [1.0, 0.0]) == 0.0 # zero vector guard
|
||
|
||
|
||
def test_score_weight_constants_match_d13() -> None:
|
||
"""D-13 score = cos + 0.3*aaak + 0.1*log(1+deg) − 0.05*age."""
|
||
assert W_COSINE == 1.0
|
||
assert W_AAAK == 0.3
|
||
assert W_DEGREE == 0.1
|
||
assert W_AGE == 0.05
|
||
|
||
|
||
# ------------------------------------------------------------- end-to-end
|
||
|
||
|
||
def test_pipeline_returns_hits_with_adjacent_suggestions(tmp_path) -> None:
|
||
"""End-to-end: pipeline returns ranked hits with non-empty activation_trace
|
||
and adjacent_suggestions is a list on every hit (AUTIST-07 contract)."""
|
||
store = MemoryStore(path=tmp_path)
|
||
records = [
|
||
_make([1.0] + [0.0] * (EMBED_DIM - 1), text="primary match", aaak="test match"),
|
||
_make([0.9, 0.1] + [0.0] * (EMBED_DIM - 2), text="close match"),
|
||
_make([0.0, 1.0] + [0.0] * (EMBED_DIM - 2), text="orthogonal"),
|
||
_make([-1.0] + [0.0] * (EMBED_DIM - 1), text="opposite"),
|
||
_make([0.5, 0.5] + [0.0] * (EMBED_DIM - 2), text="mid"),
|
||
]
|
||
for r in records:
|
||
store.insert(r)
|
||
graph = MemoryGraph()
|
||
for r in records:
|
||
graph.add_node(r.id, community_id=None, embedding=r.embedding)
|
||
for i in range(len(records) - 1):
|
||
graph.add_edge(records[i].id, records[i + 1].id)
|
||
|
||
community_id = uuid4()
|
||
assignment = CommunityAssignment(
|
||
node_to_community={r.id: community_id for r in records},
|
||
community_centroids={community_id: [1.0] + [0.0] * (EMBED_DIM - 1)},
|
||
modularity=0.0,
|
||
backend="flat",
|
||
top_communities=[community_id],
|
||
mid_regions={community_id: [r.id for r in records]},
|
||
)
|
||
|
||
resp = recall_for_response(
|
||
store=store,
|
||
graph=graph,
|
||
assignment=assignment,
|
||
rich_club=[],
|
||
embedder=_FakeEmbedder(),
|
||
cue="test match",
|
||
session_id="s1",
|
||
)
|
||
assert len(resp.hits) >= 1
|
||
# Primary record has aaak overlap ("test match" in both cue and aaak_index),
|
||
# cosine=1.0, and degree=1: score = 1.0 + 0.3*1.0 + 0.1*log(2) ≈ 1.369.
|
||
# Close record has cos≈0.994, no aaak, degree=2: 0.994 + 0.1*log(3) ≈ 1.104.
|
||
# Primary must win thanks to the AAAK overlap bonus.
|
||
assert resp.hits[0].literal_surface == "primary match"
|
||
# Opposite record must NOT appear as a top hit (negative cosine).
|
||
assert all(h.literal_surface != "opposite" for h in resp.hits[:2])
|
||
# adjacent_suggestions must be a list on every hit.
|
||
for h in resp.hits:
|
||
assert isinstance(h.adjacent_suggestions, list)
|
||
# activation_trace = seeds ∪ spread; must not be empty here.
|
||
assert len(resp.activation_trace) >= 1
|
||
|
||
|
||
def test_pipeline_provenance_appended_to_every_hit(tmp_path) -> None:
|
||
"""MEM-05 regression: every hit returned gets a provenance entry."""
|
||
store = MemoryStore(path=tmp_path)
|
||
r1 = _make([1.0] + [0.0] * (EMBED_DIM - 1), text="primary")
|
||
store.insert(r1)
|
||
graph = MemoryGraph()
|
||
graph.add_node(r1.id, community_id=None, embedding=r1.embedding)
|
||
community_id = uuid4()
|
||
assignment = CommunityAssignment(
|
||
node_to_community={r1.id: community_id},
|
||
community_centroids={community_id: [1.0] + [0.0] * (EMBED_DIM - 1)},
|
||
modularity=0.0,
|
||
backend="flat",
|
||
top_communities=[community_id],
|
||
mid_regions={community_id: [r1.id]},
|
||
)
|
||
recall_for_response(
|
||
store=store,
|
||
graph=graph,
|
||
assignment=assignment,
|
||
rich_club=[],
|
||
embedder=_FakeEmbedder(),
|
||
cue="anything",
|
||
session_id="session-42",
|
||
)
|
||
refreshed = store.get(r1.id)
|
||
assert refreshed is not None
|
||
assert len(refreshed.provenance) == 1
|
||
assert refreshed.provenance[0]["session_id"] == "session-42"
|
||
assert refreshed.provenance[0]["cue"] == "anything"
|
||
|
||
|
||
def test_pipeline_budget_caps_hit_count(tmp_path) -> None:
|
||
"""Budget enforcement: when tokens exceeded, pipeline stops adding hits."""
|
||
store = MemoryStore(path=tmp_path)
|
||
# 5 records each with ~200 chars (~50 tokens). Budget=60 -> only first fits.
|
||
long_text = "x" * 200
|
||
records = []
|
||
for i in range(5):
|
||
r = _make(
|
||
[1.0, float(i) * 0.001] + [0.0] * (EMBED_DIM - 2),
|
||
text=f"{long_text}-{i}",
|
||
)
|
||
records.append(r)
|
||
store.insert(r)
|
||
graph = MemoryGraph()
|
||
for r in records:
|
||
graph.add_node(r.id, community_id=None, embedding=r.embedding)
|
||
community_id = uuid4()
|
||
assignment = CommunityAssignment(
|
||
node_to_community={r.id: community_id for r in records},
|
||
community_centroids={community_id: [1.0] + [0.0] * (EMBED_DIM - 1)},
|
||
modularity=0.0,
|
||
backend="flat",
|
||
top_communities=[community_id],
|
||
mid_regions={community_id: [r.id for r in records]},
|
||
)
|
||
resp = recall_for_response(
|
||
store=store,
|
||
graph=graph,
|
||
assignment=assignment,
|
||
rich_club=[],
|
||
embedder=_FakeEmbedder(),
|
||
cue="c",
|
||
session_id="s",
|
||
budget_tokens=60,
|
||
)
|
||
# With 50-token records and 60-token budget, at most 1 hit fits then loop breaks.
|
||
# (We always admit 1 even if it exceeds budget, per the len(hits)>=1 guard.)
|
||
assert len(resp.hits) == 1
|
||
|
||
|
||
def test_pipeline_anti_hits_from_contradicts_edge(tmp_path) -> None:
|
||
"""D-13 anti-hit contract: contradicts-edge neighbours of a top hit surface."""
|
||
from iai_mcp.core import dispatch
|
||
|
||
store = MemoryStore(path=tmp_path)
|
||
r1 = _make([1.0] + [0.0] * (EMBED_DIM - 1), text="original")
|
||
store.insert(r1)
|
||
dispatch(
|
||
store,
|
||
"memory_contradict",
|
||
{
|
||
"id": str(r1.id),
|
||
"new_fact": "refuted version",
|
||
"cue_embedding": r1.embedding,
|
||
},
|
||
)
|
||
|
||
graph = MemoryGraph()
|
||
graph.add_node(r1.id, community_id=None, embedding=[1.0] + [0.0] * (EMBED_DIM - 1))
|
||
community_id = uuid4()
|
||
assignment = CommunityAssignment(
|
||
node_to_community={r1.id: community_id},
|
||
community_centroids={community_id: [1.0] + [0.0] * (EMBED_DIM - 1)},
|
||
modularity=0.0,
|
||
backend="flat",
|
||
top_communities=[community_id],
|
||
mid_regions={community_id: [r1.id]},
|
||
)
|
||
resp = recall_for_response(
|
||
store=store,
|
||
graph=graph,
|
||
assignment=assignment,
|
||
rich_club=[],
|
||
embedder=_FakeEmbedder(),
|
||
cue="anything",
|
||
session_id="s1",
|
||
)
|
||
assert len(resp.anti_hits) >= 1
|
||
assert "refuted" in resp.anti_hits[0].literal_surface
|
||
|
||
|
||
def test_pipeline_activation_trace_includes_seeds(tmp_path) -> None:
|
||
"""activation_trace = seeds ∪ spread; must contain each seed."""
|
||
store = MemoryStore(path=tmp_path)
|
||
a = _make([1.0] + [0.0] * (EMBED_DIM - 1), text="A")
|
||
b = _make([0.9, 0.1] + [0.0] * (EMBED_DIM - 2), text="B")
|
||
c = _make([0.0, 1.0] + [0.0] * (EMBED_DIM - 2), text="C")
|
||
for r in (a, b, c):
|
||
store.insert(r)
|
||
graph = MemoryGraph()
|
||
for r in (a, b, c):
|
||
graph.add_node(r.id, community_id=None, embedding=r.embedding)
|
||
graph.add_edge(a.id, b.id)
|
||
graph.add_edge(b.id, c.id)
|
||
community_id = uuid4()
|
||
assignment = CommunityAssignment(
|
||
node_to_community={r.id: community_id for r in (a, b, c)},
|
||
community_centroids={community_id: [1.0] + [0.0] * (EMBED_DIM - 1)},
|
||
modularity=0.0,
|
||
backend="flat",
|
||
top_communities=[community_id],
|
||
mid_regions={community_id: [a.id, b.id, c.id]},
|
||
)
|
||
resp = recall_for_response(
|
||
store=store,
|
||
graph=graph,
|
||
assignment=assignment,
|
||
rich_club=[],
|
||
embedder=_FakeEmbedder(),
|
||
cue="c",
|
||
session_id="s",
|
||
)
|
||
# The top-cosine seed is A; its 2-hop neighbourhood is {B, C}. Trace must contain A.
|
||
assert a.id in resp.activation_trace
|
||
|
||
|
||
def test_pick_seeds_ranks_by_blended_score(tmp_path) -> None:
|
||
"""Stage 3 blend: 0.6*cos + 0.4*centrality picks the high-blend record first.
|
||
|
||
redesign: `_pick_seeds` now operates over a
|
||
precomputed shared cosine array; positions, not UUIDs, flow through.
|
||
Reproduces the pre-redesign assertion: r2 (cos=0.707, cen=1.0,
|
||
blend=0.82) beats r1 (cos=1.0, cen=0.0, blend=0.6) at n=1.
|
||
"""
|
||
import numpy as np
|
||
|
||
# Pool layout: position 0 = r1, position 1 = r2.
|
||
# cue = axis 0 -> shared_cos = [1.0, 0.707].
|
||
shared_cos = np.array([1.0, 0.7071068], dtype=np.float32)
|
||
centrality_arr = np.array([0.0, 1.0], dtype=np.float32)
|
||
candidate_indices = np.array([0, 1], dtype=np.int64)
|
||
seed_indices = _pick_seeds(
|
||
candidate_indices, shared_cos, centrality_arr, n=1,
|
||
)
|
||
# r2 (position 1): blend = 0.6 * 0.707 + 0.4 * 1.0 = 0.824 > r1's 0.6.
|
||
assert list(seed_indices) == [1]
|
||
|
||
|
||
def test_pipeline_core_dispatch_integration(tmp_path, monkeypatch) -> None:
|
||
"""core.dispatch("memory_recall", ...) routes to pipeline for non-empty store."""
|
||
import iai_mcp.pipeline as pipeline_mod
|
||
from iai_mcp.core import dispatch
|
||
|
||
store = MemoryStore(path=tmp_path)
|
||
r = _make([1.0] + [0.0] * (EMBED_DIM - 1), text="integration")
|
||
store.insert(r)
|
||
|
||
# Stub out Embedder inside core to avoid HF download.
|
||
class _StubEmbedder:
|
||
DIM = 384
|
||
|
||
def embed(self, text: str) -> list[float]:
|
||
return [1.0] + [0.0] * (EMBED_DIM - 1)
|
||
|
||
# core.py imports Embedder lazily inside dispatch -> patch at module level.
|
||
import iai_mcp.embed as embed_mod
|
||
monkeypatch.setattr(embed_mod, "Embedder", _StubEmbedder)
|
||
|
||
resp = dispatch(
|
||
store,
|
||
"memory_recall",
|
||
{"cue": "integration", "session_id": "s-int"},
|
||
)
|
||
assert "hits" in resp
|
||
assert isinstance(resp["hits"], list)
|
||
# activation_trace field always present, list of string UUIDs.
|
||
assert isinstance(resp["activation_trace"], list)
|
||
assert "budget_used" in resp
|
||
|
||
|
||
def test_pipeline_empty_gate_falls_back_to_all_nodes(tmp_path) -> None:
|
||
"""If community gate returns no candidates, pipeline falls back to all nodes."""
|
||
store = MemoryStore(path=tmp_path)
|
||
r = _make([1.0] + [0.0] * (EMBED_DIM - 1), text="lonely")
|
||
store.insert(r)
|
||
graph = MemoryGraph()
|
||
graph.add_node(r.id, community_id=None, embedding=r.embedding)
|
||
# Assignment whose mid_regions is empty (degenerate) -> pipeline must fall back.
|
||
assignment = CommunityAssignment(
|
||
node_to_community={},
|
||
community_centroids={},
|
||
modularity=0.0,
|
||
backend="flat",
|
||
top_communities=[],
|
||
mid_regions={},
|
||
)
|
||
resp = recall_for_response(
|
||
store=store,
|
||
graph=graph,
|
||
assignment=assignment,
|
||
rich_club=[],
|
||
embedder=_FakeEmbedder(),
|
||
cue="c",
|
||
session_id="s",
|
||
)
|
||
# The lone record is still reachable via the fallback.
|
||
assert len(resp.hits) == 1
|