561 lines
21 KiB
Python
561 lines
21 KiB
Python
|
|
"""Plan 06-04 R5: verbatim mode end-to-end tests.
|
||
|
|
|
||
|
|
R5 acceptance per SPEC.md:
|
||
|
|
- Test seeds 5 verbatim episodic records (one matching the cue) + 10 schema hubs.
|
||
|
|
- Verbatim cue: hits[0..2] contains the matching verbatim record.
|
||
|
|
- All hits[] are tier='episodic'. No schemas.
|
||
|
|
- hints[] empty.
|
||
|
|
- patterns_observed[] empty.
|
||
|
|
- cue_mode == 'verbatim'.
|
||
|
|
- Variance window: across 5 distinct verbatim cues + matching content,
|
||
|
|
matching record at position 0..2 in 100% of runs.
|
||
|
|
|
||
|
|
Plus Task 2 contract tests (mode kwarg, RecallResponse defaults).
|
||
|
|
|
||
|
|
Constitutional framing — Mottron EPF + Bowler TSH + Murray monotropism:
|
||
|
|
when the cue signals exact recall, return ONE hit (the bullseye), not 30.
|
||
|
|
Verbatim mode = position-1 strict.
|
||
|
|
"""
|
||
|
|
from __future__ import annotations
|
||
|
|
|
||
|
|
import math
|
||
|
|
from datetime import datetime, timezone
|
||
|
|
from uuid import uuid4
|
||
|
|
|
||
|
|
import numpy as np
|
||
|
|
import pytest
|
||
|
|
|
||
|
|
from iai_mcp.types import EMBED_DIM, MemoryRecord
|
||
|
|
|
||
|
|
|
||
|
|
# --------------------------------------------------------- Fixture machinery
|
||
|
|
# Reuses the _ControlledEmbedder + _unit_vector_with_cosine pattern
|
||
|
|
# so the rank stage's hand-crafted cosine geometry is deterministic.
|
||
|
|
|
||
|
|
|
||
|
|
class _ControlledEmbedder:
|
||
|
|
DIM = EMBED_DIM
|
||
|
|
|
||
|
|
def __init__(self) -> None:
|
||
|
|
self.fixed: dict[str, list[float]] = {}
|
||
|
|
|
||
|
|
def set_fixed(self, text: str, vec: list[float]) -> None:
|
||
|
|
self.fixed[text] = list(vec)
|
||
|
|
|
||
|
|
def embed(self, text: str) -> list[float]:
|
||
|
|
if text in self.fixed:
|
||
|
|
return list(self.fixed[text])
|
||
|
|
import hashlib
|
||
|
|
import random
|
||
|
|
digest = hashlib.sha256(text.encode("utf-8")).hexdigest()
|
||
|
|
rng = random.Random(int(digest[:16], 16))
|
||
|
|
v = [rng.random() * 2 - 1 for _ in range(self.DIM)]
|
||
|
|
norm = sum(x * x for x in v) ** 0.5
|
||
|
|
return [x / norm for x in v] if norm > 0 else v
|
||
|
|
|
||
|
|
def embed_batch(self, texts: list[str]) -> list[list[float]]:
|
||
|
|
return [self.embed(t) for t in texts]
|
||
|
|
|
||
|
|
|
||
|
|
def _unit_vector_with_cosine(cue_vec: list[float], target_cos: float) -> list[float]:
|
||
|
|
cue = np.asarray(cue_vec, dtype=np.float32)
|
||
|
|
cue_norm = float(np.linalg.norm(cue))
|
||
|
|
if cue_norm == 0.0:
|
||
|
|
raise ValueError("cue_vec must be non-zero")
|
||
|
|
cue = cue / cue_norm
|
||
|
|
|
||
|
|
probe = np.zeros(EMBED_DIM, dtype=np.float32)
|
||
|
|
probe[1] = 1.0
|
||
|
|
if abs(float(np.dot(cue, probe))) > 0.999:
|
||
|
|
probe = np.zeros(EMBED_DIM, dtype=np.float32)
|
||
|
|
probe[0] = 1.0
|
||
|
|
orth = probe - float(np.dot(cue, probe)) * cue
|
||
|
|
orth = orth / float(np.linalg.norm(orth))
|
||
|
|
|
||
|
|
alpha = float(target_cos)
|
||
|
|
beta = float(math.sqrt(max(0.0, 1.0 - alpha * alpha)))
|
||
|
|
v = alpha * cue + beta * orth
|
||
|
|
n = float(np.linalg.norm(v))
|
||
|
|
if n > 0:
|
||
|
|
v = v / n
|
||
|
|
return v.astype(np.float32).tolist()
|
||
|
|
|
||
|
|
|
||
|
|
def _make_episodic(vec: list[float], text: str) -> MemoryRecord:
|
||
|
|
now = datetime.now(timezone.utc)
|
||
|
|
return MemoryRecord(
|
||
|
|
id=uuid4(),
|
||
|
|
tier="episodic",
|
||
|
|
literal_surface=text,
|
||
|
|
aaak_index="",
|
||
|
|
embedding=list(vec),
|
||
|
|
community_id=None,
|
||
|
|
centrality=0.0,
|
||
|
|
detail_level=2,
|
||
|
|
pinned=False,
|
||
|
|
stability=0.0,
|
||
|
|
difficulty=0.0,
|
||
|
|
last_reviewed=None,
|
||
|
|
never_decay=False,
|
||
|
|
never_merge=False,
|
||
|
|
provenance=[],
|
||
|
|
created_at=now,
|
||
|
|
updated_at=now,
|
||
|
|
tags=[],
|
||
|
|
language="en",
|
||
|
|
)
|
||
|
|
|
||
|
|
|
||
|
|
def _make_schema_hub(vec: list[float], text: str, pattern: str) -> MemoryRecord:
|
||
|
|
now = datetime.now(timezone.utc)
|
||
|
|
return MemoryRecord(
|
||
|
|
id=uuid4(),
|
||
|
|
tier="semantic",
|
||
|
|
literal_surface=text,
|
||
|
|
aaak_index="",
|
||
|
|
embedding=list(vec),
|
||
|
|
community_id=None,
|
||
|
|
centrality=0.0,
|
||
|
|
detail_level=3,
|
||
|
|
pinned=False,
|
||
|
|
stability=0.0,
|
||
|
|
difficulty=0.0,
|
||
|
|
last_reviewed=None,
|
||
|
|
never_decay=True,
|
||
|
|
never_merge=False,
|
||
|
|
provenance=[],
|
||
|
|
created_at=now,
|
||
|
|
updated_at=now,
|
||
|
|
tags=["schema", "draft", f"pattern:{pattern}"],
|
||
|
|
language="en",
|
||
|
|
)
|
||
|
|
|
||
|
|
|
||
|
|
@pytest.fixture(autouse=True)
|
||
|
|
def _isolated_keyring(monkeypatch: pytest.MonkeyPatch):
|
||
|
|
import keyring as _keyring
|
||
|
|
|
||
|
|
fake: dict[tuple[str, str], str] = {}
|
||
|
|
monkeypatch.setattr(_keyring, "get_password", lambda s, u: fake.get((s, u)))
|
||
|
|
monkeypatch.setattr(
|
||
|
|
_keyring, "set_password", lambda s, u, p: fake.__setitem__((s, u), p)
|
||
|
|
)
|
||
|
|
monkeypatch.setattr(
|
||
|
|
_keyring, "delete_password", lambda s, u: fake.pop((s, u), None)
|
||
|
|
)
|
||
|
|
yield fake
|
||
|
|
|
||
|
|
|
||
|
|
HUB_DEGREE = 8
|
||
|
|
HUB_COUNT = 10
|
||
|
|
VERBATIM_COUNT = 5
|
||
|
|
|
||
|
|
# 5 distinct verbatim cues for the variance gate. Each cue triggers
|
||
|
|
# _classify_cue's verbatim branch via the EN word-marker "verbatim",
|
||
|
|
# "exact", or "quote" — keeping the dispatch end-to-end honest.
|
||
|
|
VERBATIM_CUES = [
|
||
|
|
"verbatim recall the migration snapshot text",
|
||
|
|
"exact phrase about pre-cleanup snapshot",
|
||
|
|
"quote the deg_norm normalization fix",
|
||
|
|
"what did the user say on day 17 about literal_preservation",
|
||
|
|
'recall the "schema_reinforced event payload" exact wording',
|
||
|
|
]
|
||
|
|
# Matching record content per cue (cos≈0.85 to cue under _ControlledEmbedder
|
||
|
|
# when we pin both ends to known unit vectors).
|
||
|
|
VERBATIM_TEXTS = [
|
||
|
|
"verbatim record migration snapshot text content payload one",
|
||
|
|
"verbatim record pre-cleanup snapshot phrase content payload two",
|
||
|
|
"verbatim record deg_norm normalization fix content payload three",
|
||
|
|
"verbatim record day 17 literal_preservation content payload four",
|
||
|
|
"verbatim record schema_reinforced event payload exact wording five",
|
||
|
|
]
|
||
|
|
|
||
|
|
|
||
|
|
def _seed_5_verbatim_plus_10_hubs(tmp_path):
|
||
|
|
"""R5 acceptance fixture: 5 distinct verbatim records (each matching one
|
||
|
|
of VERBATIM_CUES at cos≈0.85) + 10 schema hubs (low cos, high degree).
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
(store, embedder, graph, assignment, rich_club,
|
||
|
|
verbatim_ids_per_cue dict, hub_ids list, cues list)
|
||
|
|
"""
|
||
|
|
from iai_mcp.retrieve import build_runtime_graph
|
||
|
|
from iai_mcp.store import MemoryStore
|
||
|
|
|
||
|
|
store = MemoryStore(path=tmp_path / "lancedb")
|
||
|
|
embedder = _ControlledEmbedder()
|
||
|
|
|
||
|
|
# Pin each cue to a distinct base vector.
|
||
|
|
verbatim_ids_per_cue: dict[str, "uuid.UUID"] = {}
|
||
|
|
for cue, text in zip(VERBATIM_CUES, VERBATIM_TEXTS):
|
||
|
|
cue_vec = embedder.embed(cue)
|
||
|
|
embedder.set_fixed(cue, cue_vec)
|
||
|
|
# Verbatim record: cos=0.85 to its cue (high but achievable in test).
|
||
|
|
verbatim_vec = _unit_vector_with_cosine(cue_vec, 0.85)
|
||
|
|
verbatim_rec = _make_episodic(verbatim_vec, text)
|
||
|
|
store.insert(verbatim_rec)
|
||
|
|
verbatim_ids_per_cue[cue] = verbatim_rec.id
|
||
|
|
|
||
|
|
# 10 schema hubs. cos to ANY cue is around the orthogonal-noise level
|
||
|
|
# (~0.05 under _ControlledEmbedder), but each hub gets HUB_DEGREE
|
||
|
|
# incoming edges so deg_norm(hub) = 1.0 in a graph where max_deg = 8.
|
||
|
|
hub_ids: list = []
|
||
|
|
edge_pairs: list = []
|
||
|
|
distractor_idx = 0
|
||
|
|
for h in range(HUB_COUNT):
|
||
|
|
# Hub vec is just the sha256-derived embedding for its label —
|
||
|
|
# roughly orthogonal to all 5 cues at cos≈0.05.
|
||
|
|
hub_vec = embedder.embed(f"schema-hub-{h}-distinct-content")
|
||
|
|
hub_rec = _make_schema_hub(
|
||
|
|
hub_vec, f"schema hub record {h}", pattern=f"hub:r5:{h}"
|
||
|
|
)
|
||
|
|
store.insert(hub_rec)
|
||
|
|
hub_ids.append(hub_rec.id)
|
||
|
|
for _ in range(HUB_DEGREE):
|
||
|
|
d_vec = embedder.embed(f"r5-distractor-{distractor_idx}")
|
||
|
|
d_rec = _make_episodic(d_vec, f"distractor junk {distractor_idx}")
|
||
|
|
store.insert(d_rec)
|
||
|
|
edge_pairs.append((hub_rec.id, d_rec.id))
|
||
|
|
distractor_idx += 1
|
||
|
|
|
||
|
|
store.boost_edges(edge_pairs, edge_type="schema_instance_of", delta=1.0)
|
||
|
|
|
||
|
|
graph, assignment, rich_club = build_runtime_graph(store)
|
||
|
|
return (
|
||
|
|
store, embedder, graph, assignment, rich_club,
|
||
|
|
verbatim_ids_per_cue, hub_ids, VERBATIM_CUES,
|
||
|
|
)
|
||
|
|
|
||
|
|
|
||
|
|
# ============================================================================
|
||
|
|
# Task 2 contract tests — RecallResponse defaults + signatures
|
||
|
|
# ============================================================================
|
||
|
|
|
||
|
|
|
||
|
|
def test_recall_response_back_compat_defaults():
|
||
|
|
"""RecallResponse constructed without cue_mode/patterns_observed succeeds.
|
||
|
|
Defaults: cue_mode='concept', patterns_observed=[]."""
|
||
|
|
from iai_mcp.types import RecallResponse
|
||
|
|
|
||
|
|
r = RecallResponse(
|
||
|
|
hits=[],
|
||
|
|
anti_hits=[],
|
||
|
|
activation_trace=[],
|
||
|
|
budget_used=0,
|
||
|
|
)
|
||
|
|
assert r.cue_mode == "concept", "cue_mode default must be 'concept' per D-03"
|
||
|
|
assert r.patterns_observed == [], (
|
||
|
|
"patterns_observed default must be [] per back-compat"
|
||
|
|
)
|
||
|
|
|
||
|
|
|
||
|
|
def test_recall_for_response_signature_has_mode_kwarg_default_concept():
|
||
|
|
"""recall_for_response must accept mode kwarg, default 'concept'.
|
||
|
|
|
||
|
|
entry-point split: the production answer-packing entry
|
||
|
|
point inherits the pre-Phase-8 mode contract (default 'concept') so
|
||
|
|
cue-classifier-driven dispatch keeps working unchanged.
|
||
|
|
"""
|
||
|
|
import inspect
|
||
|
|
from iai_mcp.pipeline import recall_for_response
|
||
|
|
|
||
|
|
sig = inspect.signature(recall_for_response)
|
||
|
|
assert "mode" in sig.parameters, "recall_for_response must accept mode kwarg"
|
||
|
|
assert sig.parameters["mode"].default == "concept", (
|
||
|
|
f"recall_for_response mode default must be 'concept', "
|
||
|
|
f"got {sig.parameters['mode'].default!r}"
|
||
|
|
)
|
||
|
|
|
||
|
|
|
||
|
|
def test_retrieve_recall_signature_has_mode_kwarg_default_verbatim():
|
||
|
|
"""retrieve.recall must accept mode kwarg, default 'verbatim' per D-14."""
|
||
|
|
import inspect
|
||
|
|
from iai_mcp.retrieve import recall
|
||
|
|
|
||
|
|
sig = inspect.signature(recall)
|
||
|
|
assert "mode" in sig.parameters, "retrieve.recall must accept mode kwarg"
|
||
|
|
assert sig.parameters["mode"].default == "verbatim", (
|
||
|
|
f"retrieve.recall mode default must be 'verbatim' per D-14, "
|
||
|
|
f"got {sig.parameters['mode'].default!r}"
|
||
|
|
)
|
||
|
|
|
||
|
|
|
||
|
|
# ============================================================================
|
||
|
|
# Task 4 R5 acceptance tests — end-to-end verbatim mode
|
||
|
|
# ============================================================================
|
||
|
|
|
||
|
|
|
||
|
|
def test_verbatim_mode_response_carries_cue_mode_and_empty_patterns(tmp_path):
|
||
|
|
"""recall_for_response(mode='verbatim') returns cue_mode='verbatim',
|
||
|
|
patterns_observed=[], hints=[]."""
|
||
|
|
from iai_mcp.pipeline import recall_for_response
|
||
|
|
|
||
|
|
(store, embedder, graph, assignment, rich_club,
|
||
|
|
verbatim_ids_per_cue, hub_ids, cues) = _seed_5_verbatim_plus_10_hubs(tmp_path)
|
||
|
|
|
||
|
|
cue = cues[0]
|
||
|
|
resp = recall_for_response(
|
||
|
|
store=store, graph=graph, assignment=assignment,
|
||
|
|
rich_club=rich_club, embedder=embedder, cue=cue,
|
||
|
|
session_id="r5_test", mode="verbatim",
|
||
|
|
)
|
||
|
|
assert resp.cue_mode == "verbatim", f"expected cue_mode='verbatim', got {resp.cue_mode!r}"
|
||
|
|
assert resp.patterns_observed == [], (
|
||
|
|
f"verbatim mode must emit no patterns_observed, got {resp.patterns_observed!r}"
|
||
|
|
)
|
||
|
|
assert resp.hints == [], (
|
||
|
|
f"verbatim mode must emit no hints (S4/curiosity/schema all suppressed), "
|
||
|
|
f"got {resp.hints!r}"
|
||
|
|
)
|
||
|
|
|
||
|
|
|
||
|
|
def test_verbatim_mode_hits_are_episodic_only(tmp_path):
|
||
|
|
"""In verbatim mode, every hit is tier='episodic'. No schemas."""
|
||
|
|
from iai_mcp.pipeline import recall_for_response
|
||
|
|
|
||
|
|
(store, embedder, graph, assignment, rich_club,
|
||
|
|
verbatim_ids_per_cue, hub_ids, cues) = _seed_5_verbatim_plus_10_hubs(tmp_path)
|
||
|
|
|
||
|
|
resp = recall_for_response(
|
||
|
|
store=store, graph=graph, assignment=assignment,
|
||
|
|
rich_club=rich_club, embedder=embedder, cue=cues[0],
|
||
|
|
session_id="r5_episodic", mode="verbatim",
|
||
|
|
)
|
||
|
|
hub_id_set = set(hub_ids)
|
||
|
|
for h in resp.hits:
|
||
|
|
assert h.record_id not in hub_id_set, (
|
||
|
|
f"verbatim mode must EXCLUDE schema hubs from hits[], "
|
||
|
|
f"hub {h.record_id} appeared at position "
|
||
|
|
f"{[r.record_id for r in resp.hits].index(h.record_id)}"
|
||
|
|
)
|
||
|
|
rec = store.get(h.record_id)
|
||
|
|
assert rec is not None, f"unknown record id {h.record_id} in hits"
|
||
|
|
assert rec.tier == "episodic", (
|
||
|
|
f"verbatim mode hit {h.record_id} has tier {rec.tier!r}, expected 'episodic'"
|
||
|
|
)
|
||
|
|
|
||
|
|
|
||
|
|
def test_verbatim_mode_five_cue_variance_window_position_1_to_3(tmp_path):
|
||
|
|
"""R5 variance gate: across 5 distinct verbatim cues + matching content,
|
||
|
|
the matching record lands at position 0..2 in 100% of runs.
|
||
|
|
|
||
|
|
Position 0..2 = top-3 variance window (Mottron EPF + Bowler TSH).
|
||
|
|
Acceptance: ALL 5 cues must satisfy.
|
||
|
|
"""
|
||
|
|
from iai_mcp.pipeline import recall_for_response
|
||
|
|
|
||
|
|
(store, embedder, graph, assignment, rich_club,
|
||
|
|
verbatim_ids_per_cue, hub_ids, cues) = _seed_5_verbatim_plus_10_hubs(tmp_path)
|
||
|
|
|
||
|
|
positions: list[int] = []
|
||
|
|
for cue in cues:
|
||
|
|
resp = recall_for_response(
|
||
|
|
store=store, graph=graph, assignment=assignment,
|
||
|
|
rich_club=rich_club, embedder=embedder, cue=cue,
|
||
|
|
session_id="r5_variance", mode="verbatim",
|
||
|
|
)
|
||
|
|
verbatim_id = verbatim_ids_per_cue[cue]
|
||
|
|
ids = [h.record_id for h in resp.hits]
|
||
|
|
assert verbatim_id in ids, (
|
||
|
|
f"cue {cue!r}: matching verbatim {verbatim_id} not in hits at all "
|
||
|
|
f"(hits ids: {ids})"
|
||
|
|
)
|
||
|
|
pos = ids.index(verbatim_id)
|
||
|
|
positions.append(pos)
|
||
|
|
assert pos <= 2, (
|
||
|
|
f"cue {cue!r}: verbatim landed at pos {pos}, must be in 0..2 window. "
|
||
|
|
f"All hits: {[(str(h.record_id)[:8], h.score) for h in resp.hits]}"
|
||
|
|
)
|
||
|
|
|
||
|
|
# All 5 cues passed the gate.
|
||
|
|
assert len(positions) == 5
|
||
|
|
print(f"R5 variance positions across 5 cues: {positions}")
|
||
|
|
|
||
|
|
|
||
|
|
def test_verbatim_mode_position_1_strict_on_diagnostic_cue(tmp_path):
|
||
|
|
"""R5 strict gate (single cue): the matching verbatim is at hits[0]."""
|
||
|
|
from iai_mcp.pipeline import recall_for_response
|
||
|
|
|
||
|
|
(store, embedder, graph, assignment, rich_club,
|
||
|
|
verbatim_ids_per_cue, hub_ids, cues) = _seed_5_verbatim_plus_10_hubs(tmp_path)
|
||
|
|
|
||
|
|
cue = cues[0]
|
||
|
|
resp = recall_for_response(
|
||
|
|
store=store, graph=graph, assignment=assignment,
|
||
|
|
rich_club=rich_club, embedder=embedder, cue=cue,
|
||
|
|
session_id="r5_strict", mode="verbatim",
|
||
|
|
)
|
||
|
|
verbatim_id = verbatim_ids_per_cue[cue]
|
||
|
|
assert resp.hits, "verbatim mode produced empty hits"
|
||
|
|
assert resp.hits[0].record_id == verbatim_id, (
|
||
|
|
f"verbatim record must be at hits[0] (position-1 strict), "
|
||
|
|
f"got {resp.hits[0].record_id} at pos 0; "
|
||
|
|
f"matching verbatim {verbatim_id} at pos "
|
||
|
|
f"{[h.record_id for h in resp.hits].index(verbatim_id) if verbatim_id in [h.record_id for h in resp.hits] else 'MISSING'}"
|
||
|
|
)
|
||
|
|
|
||
|
|
|
||
|
|
def test_verbatim_mode_overrides_loose_knob_setting(tmp_path):
|
||
|
|
"""Verbatim mode zeroes effective_w_degree REGARDLESS of literal_preservation
|
||
|
|
knob value. With profile_state['literal_preservation']='loose', concept-mode
|
||
|
|
would let hubs win — but verbatim mode forces W_DEGREE=0, so the verbatim
|
||
|
|
record still wins position 0..2.
|
||
|
|
"""
|
||
|
|
from iai_mcp.pipeline import recall_for_response
|
||
|
|
|
||
|
|
(store, embedder, graph, assignment, rich_club,
|
||
|
|
verbatim_ids_per_cue, hub_ids, cues) = _seed_5_verbatim_plus_10_hubs(tmp_path)
|
||
|
|
|
||
|
|
cue = cues[0]
|
||
|
|
# 'loose' (scale 1.5) would let hubs lead under concept mode. Verbatim
|
||
|
|
# mode must override.
|
||
|
|
resp = recall_for_response(
|
||
|
|
store=store, graph=graph, assignment=assignment,
|
||
|
|
rich_club=rich_club, embedder=embedder, cue=cue,
|
||
|
|
session_id="r5_override", mode="verbatim",
|
||
|
|
profile_state={"literal_preservation": "loose"},
|
||
|
|
)
|
||
|
|
verbatim_id = verbatim_ids_per_cue[cue]
|
||
|
|
ids = [h.record_id for h in resp.hits]
|
||
|
|
assert verbatim_id in ids, "verbatim record missing under loose knob + verbatim mode"
|
||
|
|
pos = ids.index(verbatim_id)
|
||
|
|
assert pos <= 2, (
|
||
|
|
f"verbatim mode must beat loose knob setting; got pos {pos} (must be 0..2). "
|
||
|
|
f"hits: {[(str(h.record_id)[:8], h.score) for h in resp.hits]}"
|
||
|
|
)
|
||
|
|
# All hits must be episodic — no hubs leaked through despite loose knob.
|
||
|
|
hub_id_set = set(hub_ids)
|
||
|
|
for h in resp.hits:
|
||
|
|
assert h.record_id not in hub_id_set, (
|
||
|
|
f"hub {h.record_id} leaked into hits despite verbatim mode override of loose knob"
|
||
|
|
)
|
||
|
|
|
||
|
|
|
||
|
|
def test_concept_mode_default_preserves_phase_5_baseline(tmp_path):
|
||
|
|
"""recall_for_response WITHOUT mode kwarg defaults to 'concept' — Phase 5
|
||
|
|
behaviour preserved (no tier filter, full graph path, knob-modulated W_DEGREE).
|
||
|
|
"""
|
||
|
|
from iai_mcp.pipeline import recall_for_response
|
||
|
|
|
||
|
|
(store, embedder, graph, assignment, rich_club,
|
||
|
|
verbatim_ids_per_cue, hub_ids, cues) = _seed_5_verbatim_plus_10_hubs(tmp_path)
|
||
|
|
|
||
|
|
# No mode kwarg -> concept default.
|
||
|
|
resp_default = recall_for_response(
|
||
|
|
store=store, graph=graph, assignment=assignment,
|
||
|
|
rich_club=rich_club, embedder=embedder, cue=cues[0],
|
||
|
|
session_id="r5_default",
|
||
|
|
)
|
||
|
|
assert resp_default.cue_mode == "concept", (
|
||
|
|
"recall_for_response default mode must be 'concept' per baseline"
|
||
|
|
)
|
||
|
|
|
||
|
|
|
||
|
|
# ============================================================================
|
||
|
|
# Task 4 — R5 dispatch end-to-end tests (5-cue variance window via dispatch)
|
||
|
|
# ============================================================================
|
||
|
|
|
||
|
|
|
||
|
|
def test_dispatch_verbatim_5_cue_variance_window(tmp_path, monkeypatch):
|
||
|
|
"""R5 dispatch end-to-end: for each of 5 distinct verbatim-style cues that
|
||
|
|
match a unique verbatim record, dispatch (verbatim cue -> classifier ->
|
||
|
|
recall_for_response(mode='verbatim')) returns the matching record at position
|
||
|
|
0..2. ALL 5 cues must satisfy. Variance gate per SPEC R5 acceptance.
|
||
|
|
"""
|
||
|
|
from iai_mcp import core
|
||
|
|
from iai_mcp import embed as _embed_mod
|
||
|
|
|
||
|
|
(store, embedder, graph, assignment, rich_club,
|
||
|
|
verbatim_ids_per_cue, hub_ids, cues) = _seed_5_verbatim_plus_10_hubs(tmp_path)
|
||
|
|
monkeypatch.setattr(_embed_mod, "embedder_for_store", lambda _store: embedder)
|
||
|
|
|
||
|
|
positions: list[int] = []
|
||
|
|
for cue in cues:
|
||
|
|
response = core.dispatch(
|
||
|
|
store, "memory_recall",
|
||
|
|
{"cue": cue, "session_id": "r5_dispatch_variance",
|
||
|
|
"cue_embedding": embedder.embed(cue)},
|
||
|
|
)
|
||
|
|
assert response["cue_mode"] == "verbatim", (
|
||
|
|
f"cue {cue!r} should classify to verbatim, got {response['cue_mode']!r}"
|
||
|
|
)
|
||
|
|
verbatim_id = str(verbatim_ids_per_cue[cue])
|
||
|
|
ids = [h["record_id"] for h in response["hits"]]
|
||
|
|
assert verbatim_id in ids, (
|
||
|
|
f"cue {cue!r}: matching verbatim {verbatim_id} missing from dispatch response. "
|
||
|
|
f"hits ids: {ids}"
|
||
|
|
)
|
||
|
|
pos = ids.index(verbatim_id)
|
||
|
|
positions.append(pos)
|
||
|
|
assert pos <= 2, (
|
||
|
|
f"cue {cue!r}: dispatch verbatim landed at pos {pos}, must be in 0..2 window"
|
||
|
|
)
|
||
|
|
|
||
|
|
# All 5 cues passed the gate via dispatch.
|
||
|
|
assert len(positions) == 5
|
||
|
|
print(f"R5 dispatch variance positions across 5 cues: {positions}")
|
||
|
|
|
||
|
|
|
||
|
|
def test_dispatch_verbatim_position_1_strict_diagnostic_cue(tmp_path, monkeypatch):
|
||
|
|
"""R5 strict gate via dispatch: matching verbatim is at hits[0]."""
|
||
|
|
from iai_mcp import core
|
||
|
|
from iai_mcp import embed as _embed_mod
|
||
|
|
|
||
|
|
(store, embedder, graph, assignment, rich_club,
|
||
|
|
verbatim_ids_per_cue, hub_ids, cues) = _seed_5_verbatim_plus_10_hubs(tmp_path)
|
||
|
|
monkeypatch.setattr(_embed_mod, "embedder_for_store", lambda _store: embedder)
|
||
|
|
|
||
|
|
cue = cues[0]
|
||
|
|
response = core.dispatch(
|
||
|
|
store, "memory_recall",
|
||
|
|
{"cue": cue, "session_id": "r5_dispatch_strict",
|
||
|
|
"cue_embedding": embedder.embed(cue)},
|
||
|
|
)
|
||
|
|
assert response["cue_mode"] == "verbatim"
|
||
|
|
assert response["hits"], "dispatch produced empty hits for verbatim cue"
|
||
|
|
verbatim_id = str(verbatim_ids_per_cue[cue])
|
||
|
|
assert response["hits"][0]["record_id"] == verbatim_id, (
|
||
|
|
f"verbatim must be at hits[0] (position-1 strict via dispatch); "
|
||
|
|
f"got {response['hits'][0]['record_id']} at pos 0"
|
||
|
|
)
|
||
|
|
|
||
|
|
|
||
|
|
def test_dispatch_verbatim_overrides_loose_knob_setting(tmp_path, monkeypatch):
|
||
|
|
"""Verbatim mode via dispatch overrides loose literal_preservation knob.
|
||
|
|
Mutates iai_mcp.core._profile_state directly between the dispatch call.
|
||
|
|
"""
|
||
|
|
from iai_mcp import core
|
||
|
|
from iai_mcp import embed as _embed_mod
|
||
|
|
|
||
|
|
(store, embedder, graph, assignment, rich_club,
|
||
|
|
verbatim_ids_per_cue, hub_ids, cues) = _seed_5_verbatim_plus_10_hubs(tmp_path)
|
||
|
|
monkeypatch.setattr(_embed_mod, "embedder_for_store", lambda _store: embedder)
|
||
|
|
|
||
|
|
# Set the knob to 'loose' (would let hubs lead under concept mode).
|
||
|
|
original_lp = core._profile_state.get("literal_preservation", "strong")
|
||
|
|
core._profile_state["literal_preservation"] = "loose"
|
||
|
|
try:
|
||
|
|
cue = cues[0]
|
||
|
|
response = core.dispatch(
|
||
|
|
store, "memory_recall",
|
||
|
|
{"cue": cue, "session_id": "r5_dispatch_override",
|
||
|
|
"cue_embedding": embedder.embed(cue)},
|
||
|
|
)
|
||
|
|
assert response["cue_mode"] == "verbatim"
|
||
|
|
verbatim_id = str(verbatim_ids_per_cue[cue])
|
||
|
|
ids = [h["record_id"] for h in response["hits"]]
|
||
|
|
assert verbatim_id in ids, "verbatim missing under loose knob + verbatim cue"
|
||
|
|
pos = ids.index(verbatim_id)
|
||
|
|
assert pos <= 2, (
|
||
|
|
f"verbatim mode via dispatch must override loose knob; got pos {pos}"
|
||
|
|
)
|
||
|
|
# No hubs leaked through.
|
||
|
|
hub_id_strs = {str(h) for h in hub_ids}
|
||
|
|
for h in response["hits"]:
|
||
|
|
assert h["record_id"] not in hub_id_strs, (
|
||
|
|
f"hub {h['record_id']} leaked despite verbatim mode + loose knob"
|
||
|
|
)
|
||
|
|
finally:
|
||
|
|
# Restore knob (test isolation across the worktree-shared _profile_state).
|
||
|
|
core._profile_state["literal_preservation"] = original_lp
|