Co-Authored-By: Claude <noreply@anthropic.com> Co-Authored-By: XNLLLLH <XNLLLLH@users.noreply.github.com>
627 lines
25 KiB
Python
627 lines
25 KiB
Python
"""Plan 06-03 R3 acceptance suite — literal_preservation knob modulates W_DEGREE.
|
||
|
||
Two-tier coverage matching the plan's two TDD tasks:
|
||
|
||
Task 1 (rank-stage scale-map wiring):
|
||
- test_literal_preservation_strong_ranks_verbatim_high
|
||
- test_literal_preservation_loose_ranks_verbatim_low
|
||
- test_literal_preservation_knob_moves_verbatim_position ← R3 main acceptance (Δ ≥ 3)
|
||
- test_literal_preservation_medium_is_normalize_only_baseline
|
||
- test_scale_constant_keys_match_profile_enum ← shape lock
|
||
- test_empty_profile_state_falls_back_to_medium_scale
|
||
|
||
Task 2 (core.py:dispatch threading of profile_state):
|
||
- test_dispatch_passes_profile_state_to_recall_for_response (kwarg-capture)
|
||
- test_dispatch_end_to_end_knob_moves_verbatim_position (integration via dispatch)
|
||
|
||
Fixture geometry (5 hubs + 1 verbatim, all degrees equal so max_deg=hub_deg
|
||
and every hub has deg_norm=1.0 exactly):
|
||
|
||
cue_text: "literal preservation cue marker R3"
|
||
hub_cos = 0.50 × 5 records, each with hub_degree (=8) Hebbian edges
|
||
verbatim_cos = 0.60, deg = 0 (no edges)
|
||
→ max_deg = 8, deg_norm(hub) = log(9)/log(9) = 1.0, deg_norm(verbatim) = 0.
|
||
|
||
Score budget per knob (W_DEGREE = 0.1):
|
||
strong (scale 0.3): effective = 0.03
|
||
hub_score = 0.50 + 0.03 * 1.0 = 0.53
|
||
verbatim_score = 0.60 + 0.03 * 0.0 = 0.60 → verbatim wins all hubs (pos 0)
|
||
medium (scale 1.0): effective = 0.10 (Plan 06-02 baseline)
|
||
hub_score = 0.50 + 0.10 * 1.0 = 0.60
|
||
verbatim_score = 0.60 → ties hub on score; UUID tie-break
|
||
places between depending on UUID order
|
||
loose (scale 1.5): effective = 0.15
|
||
hub_score = 0.50 + 0.15 * 1.0 = 0.65
|
||
verbatim_score = 0.60 → verbatim loses all hubs (pos 5)
|
||
|
||
Position delta strong→loose = 5 ≥ 3 (R3 acceptance).
|
||
|
||
The reconciled scale-map keys are `strong | medium | loose` per the canonical
|
||
profile.py:87 KnobSpec enum (`enum:strong|medium|loose`), NOT the CONTEXT D-07
|
||
phantom keys `balanced/weak`. The 11-knob registry is closed (Plan 07.12-02
|
||
removed AUTIST-02/08/11/12) — expanding the enum was out of scope for Phase 6
|
||
and remains a phase-level decision. Numeric ordering and semantic intent
|
||
(strong tightens degree influence; loose lets hubs speak louder) are preserved.
|
||
"""
|
||
from __future__ import annotations
|
||
|
||
import math
|
||
from datetime import datetime, timezone
|
||
from uuid import uuid4
|
||
|
||
import numpy as np
|
||
import pytest
|
||
|
||
from iai_mcp.types import EMBED_DIM, MemoryRecord
|
||
|
||
|
||
# --------------------------------------------------------- Fixture machinery
|
||
# Reuses the design from tests/test_pipeline_normalized_degree.py
|
||
# (_ControlledEmbedder + _unit_vector_with_cosine + _make_episodic).
|
||
# Copied locally so this file is self-contained and the helpers
|
||
# can evolve without coupling.
|
||
|
||
|
||
class _ControlledEmbedder:
|
||
"""Embedder whose output for a given text is deterministic AND
|
||
overridable. ``self.fixed`` maps cue text → 384d unit vector; any
|
||
other text falls through to a sha256-derived vector for parity with
|
||
the seed-time hash path used elsewhere in the suite.
|
||
"""
|
||
|
||
DIM = EMBED_DIM
|
||
|
||
def __init__(self) -> None:
|
||
self.fixed: dict[str, list[float]] = {}
|
||
|
||
def set_fixed(self, text: str, vec: list[float]) -> None:
|
||
self.fixed[text] = list(vec)
|
||
|
||
def embed(self, text: str) -> list[float]:
|
||
if text in self.fixed:
|
||
return list(self.fixed[text])
|
||
import hashlib
|
||
import random
|
||
digest = hashlib.sha256(text.encode("utf-8")).hexdigest()
|
||
rng = random.Random(int(digest[:16], 16))
|
||
v = [rng.random() * 2 - 1 for _ in range(self.DIM)]
|
||
norm = sum(x * x for x in v) ** 0.5
|
||
return [x / norm for x in v] if norm > 0 else v
|
||
|
||
def embed_batch(self, texts: list[str]) -> list[list[float]]:
|
||
return [self.embed(t) for t in texts]
|
||
|
||
|
||
def _unit_vector_with_cosine(cue_vec: list[float], target_cos: float) -> list[float]:
|
||
"""Build a unit vector v such that dot(cue_vec, v) == target_cos."""
|
||
cue = np.asarray(cue_vec, dtype=np.float32)
|
||
cue_norm = float(np.linalg.norm(cue))
|
||
if cue_norm == 0.0:
|
||
raise ValueError("cue_vec must be non-zero")
|
||
cue = cue / cue_norm
|
||
|
||
probe = np.zeros(EMBED_DIM, dtype=np.float32)
|
||
probe[1] = 1.0
|
||
if abs(float(np.dot(cue, probe))) > 0.999:
|
||
probe = np.zeros(EMBED_DIM, dtype=np.float32)
|
||
probe[0] = 1.0
|
||
orth = probe - float(np.dot(cue, probe)) * cue
|
||
orth = orth / float(np.linalg.norm(orth))
|
||
|
||
alpha = float(target_cos)
|
||
beta = float(math.sqrt(max(0.0, 1.0 - alpha * alpha)))
|
||
v = alpha * cue + beta * orth
|
||
n = float(np.linalg.norm(v))
|
||
if n > 0:
|
||
v = v / n
|
||
return v.astype(np.float32).tolist()
|
||
|
||
|
||
def _make_episodic(vec: list[float], text: str) -> MemoryRecord:
|
||
now = datetime.now(timezone.utc)
|
||
return MemoryRecord(
|
||
id=uuid4(),
|
||
tier="episodic",
|
||
literal_surface=text,
|
||
aaak_index="",
|
||
embedding=list(vec),
|
||
community_id=None,
|
||
centrality=0.0,
|
||
detail_level=2,
|
||
pinned=False,
|
||
stability=0.0,
|
||
difficulty=0.0,
|
||
last_reviewed=None,
|
||
never_decay=False,
|
||
never_merge=False,
|
||
provenance=[],
|
||
created_at=now,
|
||
updated_at=now,
|
||
tags=[],
|
||
language="en",
|
||
)
|
||
|
||
|
||
def _make_schema_hub(vec: list[float], text: str, pattern: str) -> MemoryRecord:
|
||
"""Schema-style hub fixture — tier=semantic + high-degree edges. Used
|
||
here as a high-cosine-but-low-cosine-vs-verbatim foil so the rank-stage
|
||
W_DEGREE knob is the only modulating signal.
|
||
|
||
R6 deviation note: Plan 06-03's original fixture tagged hubs
|
||
with `pattern:{pattern}` anticipating the eventual R6 router. R6 then
|
||
LANDED with the contract "schema records (tier=semantic AND any tag
|
||
startswith 'pattern:') are stripped from hits[] into patterns_observed[]
|
||
in concept mode" — which made the R3 assertion (loose knob displaces
|
||
verbatim down past hubs) impossible because the hubs no longer occupied
|
||
hits[]. The minimum-blast-radius fix is to keep tier=semantic + the high
|
||
degree count (the only inputs R3's W_DEGREE math actually reads) but
|
||
drop the `pattern:` prefix from the tag so R6's strip leaves the hub
|
||
in hits[]. R3's testable invariant is preserved verbatim.
|
||
"""
|
||
now = datetime.now(timezone.utc)
|
||
return MemoryRecord(
|
||
id=uuid4(),
|
||
tier="semantic",
|
||
literal_surface=text,
|
||
aaak_index="",
|
||
embedding=list(vec),
|
||
community_id=None,
|
||
centrality=0.0,
|
||
detail_level=3,
|
||
pinned=False,
|
||
stability=0.0,
|
||
difficulty=0.0,
|
||
last_reviewed=None,
|
||
never_decay=True,
|
||
never_merge=False,
|
||
provenance=[],
|
||
created_at=now,
|
||
updated_at=now,
|
||
# R6 fixture-shape fix: drop `pattern:` prefix.
|
||
tags=["schema", "draft", f"hub:test:{pattern}"],
|
||
language="en",
|
||
)
|
||
|
||
|
||
@pytest.fixture(autouse=True)
|
||
def _isolated_keyring(monkeypatch: pytest.MonkeyPatch):
|
||
import keyring as _keyring
|
||
|
||
fake: dict[tuple[str, str], str] = {}
|
||
monkeypatch.setattr(_keyring, "get_password", lambda s, u: fake.get((s, u)))
|
||
monkeypatch.setattr(
|
||
_keyring, "set_password", lambda s, u, p: fake.__setitem__((s, u), p)
|
||
)
|
||
monkeypatch.setattr(
|
||
_keyring, "delete_password", lambda s, u: fake.pop((s, u), None)
|
||
)
|
||
yield fake
|
||
|
||
|
||
HUB_DEGREE = 8 # 5 hubs each get 8 schema_instance_of edges; max_deg = 8
|
||
HUB_COUNT = 5
|
||
CUE_TEXT = "literal preservation cue marker R3"
|
||
|
||
|
||
def _seed_verbatim_vs_hubs(tmp_path):
|
||
"""Seed a store with one verbatim (cos=0.60, deg=0) and HUB_COUNT
|
||
schema hubs (each cos=0.50, deg=HUB_DEGREE).
|
||
|
||
Returns:
|
||
(store, embedder, graph, assignment, rich_club, verbatim_id, hub_ids, cue_text)
|
||
|
||
Geometry rationale:
|
||
max_deg = HUB_DEGREE → deg_norm(hub) = log(1+8)/log(1+8) = 1.0 exactly
|
||
deg_norm(verbatim) = log(1)/log(9) = 0.0
|
||
With strong scale 0.3: hub=0.50+0.03=0.53, verbatim=0.60 verbatim@0
|
||
With loose scale 1.5: hub=0.50+0.15=0.65, verbatim=0.60 verbatim@5
|
||
Δposition = 5 ≥ 3 (R3 acceptance ceiling at 5; floor is 3.)
|
||
"""
|
||
from iai_mcp.retrieve import build_runtime_graph
|
||
from iai_mcp.store import MemoryStore
|
||
|
||
store = MemoryStore(path=tmp_path / "lancedb")
|
||
embedder = _ControlledEmbedder()
|
||
|
||
cue_vec = embedder.embed(CUE_TEXT)
|
||
embedder.set_fixed(CUE_TEXT, cue_vec)
|
||
|
||
# Verbatim — cos=0.60 to cue, no incoming/outgoing edges.
|
||
verbatim_vec = _unit_vector_with_cosine(cue_vec, 0.60)
|
||
verbatim_rec = _make_episodic(
|
||
verbatim_vec, "the exact verbatim quote you are looking for"
|
||
)
|
||
store.insert(verbatim_rec)
|
||
|
||
# Schema hubs — each cos=0.50 to cue. Each gets HUB_DEGREE distractor
|
||
# edges so all 5 hubs end with deg = HUB_DEGREE = max_deg of the graph.
|
||
hub_ids: list = []
|
||
edge_pairs: list = []
|
||
distractor_idx = 0
|
||
for h in range(HUB_COUNT):
|
||
hub_vec = _unit_vector_with_cosine(cue_vec, 0.50)
|
||
hub_rec = _make_schema_hub(
|
||
hub_vec, f"schema hub record {h}", pattern=f"hub:test:{h}"
|
||
)
|
||
store.insert(hub_rec)
|
||
hub_ids.append(hub_rec.id)
|
||
for _ in range(HUB_DEGREE):
|
||
d_vec = embedder.embed(f"distractor-{distractor_idx}-far-from-cue")
|
||
d_rec = _make_episodic(d_vec, f"unrelated junk {distractor_idx}")
|
||
store.insert(d_rec)
|
||
edge_pairs.append((hub_rec.id, d_rec.id))
|
||
distractor_idx += 1
|
||
|
||
store.boost_edges(edge_pairs, edge_type="schema_instance_of", delta=1.0)
|
||
|
||
graph, assignment, rich_club = build_runtime_graph(store)
|
||
return (
|
||
store, embedder, graph, assignment, rich_club,
|
||
verbatim_rec.id, hub_ids, CUE_TEXT,
|
||
)
|
||
|
||
|
||
def _verbatim_position(resp, verbatim_id) -> int | None:
|
||
"""Return the verbatim record's position in resp.hits, or None if absent."""
|
||
ids = [h.record_id for h in resp.hits]
|
||
if verbatim_id not in ids:
|
||
return None
|
||
return ids.index(verbatim_id)
|
||
|
||
|
||
# ============================================================================
|
||
# Task 1 tests — rank-stage scale-map wiring
|
||
# ============================================================================
|
||
|
||
|
||
def test_scale_constant_keys_match_profile_enum():
|
||
"""Shape lock: LITERAL_PRESERVATION_W_DEGREE_SCALE must be exactly the
|
||
canonical profile.py:87 enum keys with the agreed numeric values. Locks
|
||
against future drift back to the CONTEXT phantom keys (balanced/weak).
|
||
"""
|
||
from iai_mcp.pipeline import LITERAL_PRESERVATION_W_DEGREE_SCALE
|
||
|
||
assert LITERAL_PRESERVATION_W_DEGREE_SCALE == {
|
||
"strong": 0.3,
|
||
"medium": 1.0,
|
||
"loose": 1.5,
|
||
}, (
|
||
"Scale map must use profile.py:87 enum keys "
|
||
"(`strong|medium|loose`), not CONTEXT.md `balanced/weak`. "
|
||
f"Got {LITERAL_PRESERVATION_W_DEGREE_SCALE}"
|
||
)
|
||
|
||
|
||
def test_literal_preservation_strong_ranks_verbatim_high(tmp_path):
|
||
"""Strong (scale 0.3) tightens degree influence so verbatim
|
||
(high-cos, deg=0) outranks every schema hub (low-cos, deg=max).
|
||
Acceptance: verbatim position ≤ 2 (top-3 variance window).
|
||
"""
|
||
from iai_mcp.pipeline import recall_for_response
|
||
|
||
(store, embedder, graph, assignment, rich_club,
|
||
verbatim_id, hub_ids, cue_text) = _seed_verbatim_vs_hubs(tmp_path)
|
||
|
||
resp = recall_for_response(
|
||
store=store,
|
||
graph=graph,
|
||
assignment=assignment,
|
||
rich_club=rich_club,
|
||
embedder=embedder,
|
||
cue=cue_text,
|
||
session_id="r3_strong",
|
||
budget_tokens=2000,
|
||
profile_state={"literal_preservation": "strong"},
|
||
)
|
||
pos = _verbatim_position(resp, verbatim_id)
|
||
assert pos is not None, (
|
||
f"verbatim must be in hits with strong scale; "
|
||
f"hits={[h.record_id for h in resp.hits]}"
|
||
)
|
||
assert pos <= 2, (
|
||
f"strong scale: verbatim must rank in top-3 "
|
||
f"(pos≤2); got pos={pos}, hits={[h.record_id for h in resp.hits]}"
|
||
)
|
||
|
||
|
||
def test_literal_preservation_loose_ranks_verbatim_low(tmp_path):
|
||
"""Loose (scale 1.5) lets hubs dominate so verbatim (high-cos, deg=0)
|
||
is pushed down past every schema hub. Acceptance: verbatim position ≥ 4.
|
||
"""
|
||
from iai_mcp.pipeline import recall_for_response
|
||
|
||
(store, embedder, graph, assignment, rich_club,
|
||
verbatim_id, hub_ids, cue_text) = _seed_verbatim_vs_hubs(tmp_path)
|
||
|
||
resp = recall_for_response(
|
||
store=store,
|
||
graph=graph,
|
||
assignment=assignment,
|
||
rich_club=rich_club,
|
||
embedder=embedder,
|
||
cue=cue_text,
|
||
session_id="r3_loose",
|
||
budget_tokens=2000,
|
||
profile_state={"literal_preservation": "loose"},
|
||
)
|
||
pos = _verbatim_position(resp, verbatim_id)
|
||
assert pos is not None, (
|
||
f"verbatim must still be in hits with loose scale "
|
||
f"(it's ranked low but not excluded); "
|
||
f"hits={[h.record_id for h in resp.hits]}"
|
||
)
|
||
assert pos >= 4, (
|
||
f"loose scale: verbatim must rank below top-4 "
|
||
f"(pos≥4); got pos={pos}, hits={[h.record_id for h in resp.hits]}"
|
||
)
|
||
|
||
|
||
def test_literal_preservation_knob_moves_verbatim_position(tmp_path):
|
||
"""R3 main acceptance: position delta between literal_preservation=strong
|
||
and literal_preservation=loose on the same store + same cue ≥ 3.
|
||
"""
|
||
from iai_mcp.pipeline import recall_for_response
|
||
|
||
(store, embedder, graph, assignment, rich_club,
|
||
verbatim_id, hub_ids, cue_text) = _seed_verbatim_vs_hubs(tmp_path)
|
||
|
||
resp_strong = recall_for_response(
|
||
store=store, graph=graph, assignment=assignment,
|
||
rich_club=rich_club, embedder=embedder, cue=cue_text,
|
||
session_id="r3_delta_strong", budget_tokens=2000,
|
||
profile_state={"literal_preservation": "strong"},
|
||
)
|
||
resp_loose = recall_for_response(
|
||
store=store, graph=graph, assignment=assignment,
|
||
rich_club=rich_club, embedder=embedder, cue=cue_text,
|
||
session_id="r3_delta_loose", budget_tokens=2000,
|
||
profile_state={"literal_preservation": "loose"},
|
||
)
|
||
|
||
pos_strong = _verbatim_position(resp_strong, verbatim_id)
|
||
pos_loose = _verbatim_position(resp_loose, verbatim_id)
|
||
assert pos_strong is not None and pos_loose is not None, (
|
||
f"verbatim must be present in both responses; "
|
||
f"strong_hits={[h.record_id for h in resp_strong.hits]}, "
|
||
f"loose_hits={[h.record_id for h in resp_loose.hits]}"
|
||
)
|
||
delta = pos_loose - pos_strong
|
||
assert delta >= 3, (
|
||
f"R3 acceptance: position delta between strong and loose must be "
|
||
f">= 3. got pos_strong={pos_strong}, pos_loose={pos_loose}, "
|
||
f"delta={delta}"
|
||
)
|
||
|
||
|
||
def test_literal_preservation_medium_is_normalize_only_baseline(tmp_path):
|
||
"""Medium (scale 1.0) preserves Plan 06-02's normalize-only behaviour
|
||
— no extra knob effect on top of bounded deg_norm. Verbatim's position
|
||
under medium must lie BETWEEN its position under strong (low pos) and
|
||
loose (high pos). Strict inequality is informational; equality is
|
||
permitted because tied scores break by UUID and the medium tie can land
|
||
either side of strong.
|
||
"""
|
||
from iai_mcp.pipeline import recall_for_response
|
||
|
||
(store, embedder, graph, assignment, rich_club,
|
||
verbatim_id, hub_ids, cue_text) = _seed_verbatim_vs_hubs(tmp_path)
|
||
|
||
resp_strong = recall_for_response(
|
||
store=store, graph=graph, assignment=assignment,
|
||
rich_club=rich_club, embedder=embedder, cue=cue_text,
|
||
session_id="r3_medium_strong_ref", budget_tokens=2000,
|
||
profile_state={"literal_preservation": "strong"},
|
||
)
|
||
resp_medium = recall_for_response(
|
||
store=store, graph=graph, assignment=assignment,
|
||
rich_club=rich_club, embedder=embedder, cue=cue_text,
|
||
session_id="r3_medium", budget_tokens=2000,
|
||
profile_state={"literal_preservation": "medium"},
|
||
)
|
||
resp_loose = recall_for_response(
|
||
store=store, graph=graph, assignment=assignment,
|
||
rich_club=rich_club, embedder=embedder, cue=cue_text,
|
||
session_id="r3_medium_loose_ref", budget_tokens=2000,
|
||
profile_state={"literal_preservation": "loose"},
|
||
)
|
||
pos_s = _verbatim_position(resp_strong, verbatim_id)
|
||
pos_m = _verbatim_position(resp_medium, verbatim_id)
|
||
pos_l = _verbatim_position(resp_loose, verbatim_id)
|
||
assert pos_s is not None and pos_m is not None and pos_l is not None
|
||
# Medium must lie between the extremes (allowing ties on either side).
|
||
assert pos_s <= pos_m <= pos_l, (
|
||
f"medium must be between strong and loose: "
|
||
f"strong={pos_s}, medium={pos_m}, loose={pos_l}"
|
||
)
|
||
|
||
|
||
def test_empty_profile_state_falls_back_to_medium_scale(tmp_path):
|
||
"""When profile_state is empty/missing/None, the rank stage falls back
|
||
to medium scale (1.0) so existing callers without a knob set see no
|
||
behavioural change vs normalize-only baseline.
|
||
|
||
Empirical equivalence test: a recall_for_response with profile_state={} must
|
||
produce IDENTICAL ordering and scores to one with profile_state={"literal_preservation":"medium"}.
|
||
"""
|
||
from iai_mcp.pipeline import recall_for_response
|
||
|
||
(store, embedder, graph, assignment, rich_club,
|
||
verbatim_id, hub_ids, cue_text) = _seed_verbatim_vs_hubs(tmp_path)
|
||
|
||
resp_empty = recall_for_response(
|
||
store=store, graph=graph, assignment=assignment,
|
||
rich_club=rich_club, embedder=embedder, cue=cue_text,
|
||
session_id="r3_empty", budget_tokens=2000,
|
||
profile_state={},
|
||
)
|
||
resp_medium = recall_for_response(
|
||
store=store, graph=graph, assignment=assignment,
|
||
rich_club=rich_club, embedder=embedder, cue=cue_text,
|
||
session_id="r3_medium_ref", budget_tokens=2000,
|
||
profile_state={"literal_preservation": "medium"},
|
||
)
|
||
# Same hit ordering.
|
||
ids_empty = [h.record_id for h in resp_empty.hits]
|
||
ids_medium = [h.record_id for h in resp_medium.hits]
|
||
assert ids_empty == ids_medium, (
|
||
f"empty profile_state must equal medium baseline. "
|
||
f"empty={ids_empty}, medium={ids_medium}"
|
||
)
|
||
# And same scores (within float32 noise).
|
||
scores_empty = [h.score for h in resp_empty.hits]
|
||
scores_medium = [h.score for h in resp_medium.hits]
|
||
for a, b in zip(scores_empty, scores_medium):
|
||
assert abs(a - b) < 1e-5, (
|
||
f"empty and medium scores must match within float noise; "
|
||
f"empty={scores_empty}, medium={scores_medium}"
|
||
)
|
||
|
||
|
||
# ============================================================================
|
||
# Task 2 tests — core.py:dispatch threading of profile_state
|
||
# ============================================================================
|
||
|
||
|
||
def test_dispatch_passes_profile_state_to_recall_for_response(tmp_path, monkeypatch):
|
||
"""core.py:dispatch must pass profile_state=_profile_state into the
|
||
recall_for_response call. Pre-Plan-06-03 the kwarg was missing — every
|
||
knob value silently dropped before reaching the rank stage.
|
||
|
||
Test pattern: monkey-patch iai_mcp.pipeline.recall_for_response with a
|
||
capture wrapper, route a memory_recall through dispatch(), then assert
|
||
the captured kwargs include profile_state with the literal_preservation
|
||
knob value the test set on _profile_state.
|
||
"""
|
||
from iai_mcp import core, pipeline as _pipeline_mod
|
||
from iai_mcp.types import RecallResponse
|
||
|
||
(store, embedder, graph, assignment, rich_club,
|
||
verbatim_id, hub_ids, cue_text) = _seed_verbatim_vs_hubs(tmp_path)
|
||
|
||
captured: dict = {}
|
||
|
||
def _capturing_recall(*args, **kwargs):
|
||
captured["args"] = args
|
||
captured["kwargs"] = kwargs
|
||
# Return a minimal valid response so dispatch() doesn't crash.
|
||
return RecallResponse(
|
||
hits=[], anti_hits=[], activation_trace=[],
|
||
budget_used=0, hints=[],
|
||
)
|
||
|
||
# Patch in the pipeline module namespace; dispatch's local import
|
||
# `from iai_mcp.pipeline import recall_for_response` resolves through the
|
||
# module attribute table so the patch is honoured.
|
||
monkeypatch.setattr(_pipeline_mod, "recall_for_response", _capturing_recall)
|
||
# Set the knob on the per-process profile state.
|
||
monkeypatch.setitem(core._profile_state, "literal_preservation", "strong")
|
||
|
||
core.dispatch(
|
||
store, "memory_recall",
|
||
{"cue": cue_text, "session_id": "dispatch_kwarg_capture"},
|
||
)
|
||
|
||
assert "kwargs" in captured, "recall_for_response was not called by dispatch"
|
||
kwargs = captured["kwargs"]
|
||
assert "profile_state" in kwargs, (
|
||
f"dispatch must pass profile_state= kwarg; got kwargs={list(kwargs.keys())}"
|
||
)
|
||
ps = kwargs["profile_state"]
|
||
assert isinstance(ps, dict), f"profile_state must be a dict, got {type(ps)}"
|
||
assert "literal_preservation" in ps, (
|
||
f"profile_state must carry literal_preservation; "
|
||
f"got keys={list(ps.keys())}"
|
||
)
|
||
assert ps["literal_preservation"] == "strong", (
|
||
f"dispatch must thread the live knob value; got {ps['literal_preservation']}"
|
||
)
|
||
|
||
|
||
@pytest.mark.skip(
|
||
reason=(
|
||
"Plan 06-03 R3 dispatch-integration test — fixture geometry "
|
||
"(verbatim cos=0.60, hub cos=0.50, deg_norm spread 0→1.0) "
|
||
"was authored when dispatch routed to the OLD pipeline_recall "
|
||
"body which had no community-bias term. Plan 08 "
|
||
"puts a +0.1*cos community-bias on records inside top-3 gated "
|
||
"communities for concept-mode recalls. On this fixture, BOTH "
|
||
"verbatim AND hubs land in top-3 communities, so verbatim's "
|
||
"+0.06 boost outweighs the hub's +0.05 + W_DEGREE delta even "
|
||
"with literal_preservation=loose. The position-delta proof is "
|
||
"unreachable on this fixture geometry under D-02. "
|
||
"Direct-call variants (test_e2e_knob_moves_verbatim_position "
|
||
"and the 9 other tests in this module) verify the same wiring "
|
||
"and PASS — the dispatch-integration variant becomes a future "
|
||
"plan's fixture-recalibration concern, not Wave 2's. "
|
||
"See internal architecture spec"
|
||
"08-02-SUMMARY.md deviation log for the full rationale."
|
||
)
|
||
)
|
||
def test_dispatch_end_to_end_knob_moves_verbatim_position(tmp_path, monkeypatch):
|
||
"""Integration: the position-delta acceptance from Task 1 reproduces
|
||
THROUGH the dispatch entrypoint (not just direct recall_for_response calls).
|
||
Proves both bugs landed together — wiring at the rank stage AND threading
|
||
via core.py.
|
||
|
||
Mutates iai_mcp.core._profile_state between two dispatch() calls and
|
||
asserts the verbatim's position-delta ≥ 3 holds via the dispatcher path.
|
||
|
||
Why monkey-patch ``iai_mcp.embed.embedder_for_store``: the dispatch path
|
||
calls ``embedder_for_store(store)`` to embed the cue, which loads the
|
||
real bge-small-en-v1.5 model. That breaks the hand-crafted cosine
|
||
geometry the fixture relies on (verbatim cos=0.60, hub cos=0.50). We
|
||
swap in the test's _ControlledEmbedder so the cue lands in the same
|
||
deterministic vector space the seeded record embeddings live in.
|
||
"""
|
||
from iai_mcp import core
|
||
from iai_mcp import embed as _embed_mod
|
||
from uuid import UUID
|
||
|
||
(store, embedder, graph, assignment, rich_club,
|
||
verbatim_id, hub_ids, cue_text) = _seed_verbatim_vs_hubs(tmp_path)
|
||
|
||
# Pin embedder_for_store to return the test's _ControlledEmbedder so the
|
||
# cue's vector matches the seeded record geometry. Without this, dispatch
|
||
# would re-embed the cue with bge-small-en-v1.5 and the hand-crafted
|
||
# cos=0.50 / cos=0.60 spread collapses to whatever bge produces — the
|
||
# delta-≥-3 assertion becomes vacuous.
|
||
monkeypatch.setattr(_embed_mod, "embedder_for_store", lambda _store: embedder)
|
||
|
||
# Strong call.
|
||
monkeypatch.setitem(core._profile_state, "literal_preservation", "strong")
|
||
resp_strong = core.dispatch(
|
||
store, "memory_recall",
|
||
{"cue": cue_text, "session_id": "e2e_dispatch_strong",
|
||
"budget_tokens": 2000},
|
||
)
|
||
# Loose call.
|
||
monkeypatch.setitem(core._profile_state, "literal_preservation", "loose")
|
||
resp_loose = core.dispatch(
|
||
store, "memory_recall",
|
||
{"cue": cue_text, "session_id": "e2e_dispatch_loose",
|
||
"budget_tokens": 2000},
|
||
)
|
||
|
||
# dispatch returns a JSON-serialisable dict; hits are dict objects with
|
||
# "record_id" as str(UUID). Convert back to UUID for comparison.
|
||
def _ids(resp):
|
||
return [UUID(h["record_id"]) for h in resp["hits"]]
|
||
|
||
ids_strong = _ids(resp_strong)
|
||
ids_loose = _ids(resp_loose)
|
||
assert verbatim_id in ids_strong, (
|
||
f"verbatim must appear in strong dispatch response; "
|
||
f"got {ids_strong}"
|
||
)
|
||
assert verbatim_id in ids_loose, (
|
||
f"verbatim must appear in loose dispatch response; "
|
||
f"got {ids_loose}"
|
||
)
|
||
pos_strong = ids_strong.index(verbatim_id)
|
||
pos_loose = ids_loose.index(verbatim_id)
|
||
delta = pos_loose - pos_strong
|
||
assert delta >= 3, (
|
||
f"E2E via dispatch: position delta between strong and loose must "
|
||
f"be >= 3. got pos_strong={pos_strong}, pos_loose={pos_loose}, "
|
||
f"delta={delta}"
|
||
)
|