iai-mcp-opencode/tests/test_pipeline_knob_modulates_w_degree.py

"""Plan 06-03 R3 acceptance suite — literal_preservation knob modulates W_DEGREE.

Two-tier coverage matching the plan's two TDD tasks:

  Task 1 (rank-stage scale-map wiring):
    - test_literal_preservation_strong_ranks_verbatim_high
    - test_literal_preservation_loose_ranks_verbatim_low
    - test_literal_preservation_knob_moves_verbatim_position    ← R3 main acceptance (Δ ≥ 3)
    - test_literal_preservation_medium_is_normalize_only_baseline
    - test_scale_constant_keys_match_profile_enum               ← shape lock
    - test_empty_profile_state_falls_back_to_medium_scale

  Task 2 (core.py:dispatch threading of profile_state):
    - test_dispatch_passes_profile_state_to_recall_for_response     (kwarg-capture)
    - test_dispatch_end_to_end_knob_moves_verbatim_position     (integration via dispatch)

Fixture geometry (5 hubs + 1 verbatim, all degrees equal so max_deg=hub_deg
and every hub has deg_norm=1.0 exactly):

  cue_text:        "literal preservation cue marker R3"
  hub_cos = 0.50   × 5 records, each with hub_degree (=8) Hebbian edges
  verbatim_cos = 0.60, deg = 0 (no edges)
  → max_deg = 8, deg_norm(hub) = log(9)/log(9) = 1.0, deg_norm(verbatim) = 0.

Score budget per knob (W_DEGREE = 0.1):
  strong (scale 0.3):  effective = 0.03
    hub_score      = 0.50 + 0.03 * 1.0 = 0.53
    verbatim_score = 0.60 + 0.03 * 0.0 = 0.60   → verbatim wins all hubs (pos 0)
  medium (scale 1.0):  effective = 0.10  (Plan 06-02 baseline)
    hub_score      = 0.50 + 0.10 * 1.0 = 0.60
    verbatim_score = 0.60                        → ties hub on score; UUID tie-break
                                                   places between depending on UUID order
  loose  (scale 1.5):  effective = 0.15
    hub_score      = 0.50 + 0.15 * 1.0 = 0.65
    verbatim_score = 0.60                        → verbatim loses all hubs (pos 5)

Position delta strong→loose = 5 ≥ 3 (R3 acceptance).

The reconciled scale-map keys are `strong | medium | loose` per the canonical
profile.py:87 KnobSpec enum (`enum:strong|medium|loose`), NOT the CONTEXT D-07
phantom keys `balanced/weak`. The 11-knob registry is closed (Plan 07.12-02
removed AUTIST-02/08/11/12) — expanding the enum was out of scope for Phase 6
and remains a phase-level decision. Numeric ordering and semantic intent
(strong tightens degree influence; loose lets hubs speak louder) are preserved.
"""
from __future__ import annotations

import math
from datetime import datetime, timezone
from uuid import uuid4

import numpy as np
import pytest

from iai_mcp.types import EMBED_DIM, MemoryRecord


# --------------------------------------------------------- Fixture machinery
# Reuses the design from tests/test_pipeline_normalized_degree.py
# (_ControlledEmbedder + _unit_vector_with_cosine + _make_episodic).
# Copied locally so this file is self-contained and the helpers
# can evolve without coupling.


class _ControlledEmbedder:
    """Embedder whose output for a given text is deterministic AND
    overridable. ``self.fixed`` maps cue text → 384d unit vector; any
    other text falls through to a sha256-derived vector for parity with
    the seed-time hash path used elsewhere in the suite.
    """

    DIM = EMBED_DIM

    def __init__(self) -> None:
        self.fixed: dict[str, list[float]] = {}

    def set_fixed(self, text: str, vec: list[float]) -> None:
        self.fixed[text] = list(vec)

    def embed(self, text: str) -> list[float]:
        if text in self.fixed:
            return list(self.fixed[text])
        import hashlib
        import random
        digest = hashlib.sha256(text.encode("utf-8")).hexdigest()
        rng = random.Random(int(digest[:16], 16))
        v = [rng.random() * 2 - 1 for _ in range(self.DIM)]
        norm = sum(x * x for x in v) ** 0.5
        return [x / norm for x in v] if norm > 0 else v

    def embed_batch(self, texts: list[str]) -> list[list[float]]:
        return [self.embed(t) for t in texts]


def _unit_vector_with_cosine(cue_vec: list[float], target_cos: float) -> list[float]:
    """Build a unit vector v such that dot(cue_vec, v) == target_cos."""
    cue = np.asarray(cue_vec, dtype=np.float32)
    cue_norm = float(np.linalg.norm(cue))
    if cue_norm == 0.0:
        raise ValueError("cue_vec must be non-zero")
    cue = cue / cue_norm

    probe = np.zeros(EMBED_DIM, dtype=np.float32)
    probe[1] = 1.0
    if abs(float(np.dot(cue, probe))) > 0.999:
        probe = np.zeros(EMBED_DIM, dtype=np.float32)
        probe[0] = 1.0
    orth = probe - float(np.dot(cue, probe)) * cue
    orth = orth / float(np.linalg.norm(orth))

    alpha = float(target_cos)
    beta = float(math.sqrt(max(0.0, 1.0 - alpha * alpha)))
    v = alpha * cue + beta * orth
    n = float(np.linalg.norm(v))
    if n > 0:
        v = v / n
    return v.astype(np.float32).tolist()


def _make_episodic(vec: list[float], text: str) -> MemoryRecord:
    now = datetime.now(timezone.utc)
    return MemoryRecord(
        id=uuid4(),
        tier="episodic",
        literal_surface=text,
        aaak_index="",
        embedding=list(vec),
        community_id=None,
        centrality=0.0,
        detail_level=2,
        pinned=False,
        stability=0.0,
        difficulty=0.0,
        last_reviewed=None,
        never_decay=False,
        never_merge=False,
        provenance=[],
        created_at=now,
        updated_at=now,
        tags=[],
        language="en",
    )


def _make_schema_hub(vec: list[float], text: str, pattern: str) -> MemoryRecord:
    """Schema-style hub fixture — tier=semantic + high-degree edges. Used
    here as a high-cosine-but-low-cosine-vs-verbatim foil so the rank-stage
    W_DEGREE knob is the only modulating signal.

    R6 deviation note: Plan 06-03's original fixture tagged hubs
    with `pattern:{pattern}` anticipating the eventual R6 router. R6 then
    LANDED with the contract "schema records (tier=semantic AND any tag
    startswith 'pattern:') are stripped from hits[] into patterns_observed[]
    in concept mode" — which made the R3 assertion (loose knob displaces
    verbatim down past hubs) impossible because the hubs no longer occupied
    hits[]. The minimum-blast-radius fix is to keep tier=semantic + the high
    degree count (the only inputs R3's W_DEGREE math actually reads) but
    drop the `pattern:` prefix from the tag so R6's strip leaves the hub
    in hits[]. R3's testable invariant is preserved verbatim.
    """
    now = datetime.now(timezone.utc)
    return MemoryRecord(
        id=uuid4(),
        tier="semantic",
        literal_surface=text,
        aaak_index="",
        embedding=list(vec),
        community_id=None,
        centrality=0.0,
        detail_level=3,
        pinned=False,
        stability=0.0,
        difficulty=0.0,
        last_reviewed=None,
        never_decay=True,
        never_merge=False,
        provenance=[],
        created_at=now,
        updated_at=now,
        # R6 fixture-shape fix: drop `pattern:` prefix.
        tags=["schema", "draft", f"hub:test:{pattern}"],
        language="en",
    )


@pytest.fixture(autouse=True)
def _isolated_keyring(monkeypatch: pytest.MonkeyPatch):
    import keyring as _keyring

    fake: dict[tuple[str, str], str] = {}
    monkeypatch.setattr(_keyring, "get_password", lambda s, u: fake.get((s, u)))
    monkeypatch.setattr(
        _keyring, "set_password", lambda s, u, p: fake.__setitem__((s, u), p)
    )
    monkeypatch.setattr(
        _keyring, "delete_password", lambda s, u: fake.pop((s, u), None)
    )
    yield fake


HUB_DEGREE = 8     # 5 hubs each get 8 schema_instance_of edges; max_deg = 8
HUB_COUNT = 5
CUE_TEXT = "literal preservation cue marker R3"


def _seed_verbatim_vs_hubs(tmp_path):
    """Seed a store with one verbatim (cos=0.60, deg=0) and HUB_COUNT
    schema hubs (each cos=0.50, deg=HUB_DEGREE).

    Returns:
        (store, embedder, graph, assignment, rich_club, verbatim_id, hub_ids, cue_text)

    Geometry rationale:
      max_deg = HUB_DEGREE → deg_norm(hub) = log(1+8)/log(1+8) = 1.0 exactly
      deg_norm(verbatim) = log(1)/log(9) = 0.0
      With strong scale 0.3:  hub=0.50+0.03=0.53,  verbatim=0.60   verbatim@0
      With loose scale 1.5:   hub=0.50+0.15=0.65,  verbatim=0.60   verbatim@5
      Δposition = 5 ≥ 3  (R3 acceptance ceiling at 5; floor is 3.)
    """
    from iai_mcp.retrieve import build_runtime_graph
    from iai_mcp.store import MemoryStore

    store = MemoryStore(path=tmp_path / "lancedb")
    embedder = _ControlledEmbedder()

    cue_vec = embedder.embed(CUE_TEXT)
    embedder.set_fixed(CUE_TEXT, cue_vec)

    # Verbatim — cos=0.60 to cue, no incoming/outgoing edges.
    verbatim_vec = _unit_vector_with_cosine(cue_vec, 0.60)
    verbatim_rec = _make_episodic(
        verbatim_vec, "the exact verbatim quote you are looking for"
    )
    store.insert(verbatim_rec)

    # Schema hubs — each cos=0.50 to cue. Each gets HUB_DEGREE distractor
    # edges so all 5 hubs end with deg = HUB_DEGREE = max_deg of the graph.
    hub_ids: list = []
    edge_pairs: list = []
    distractor_idx = 0
    for h in range(HUB_COUNT):
        hub_vec = _unit_vector_with_cosine(cue_vec, 0.50)
        hub_rec = _make_schema_hub(
            hub_vec, f"schema hub record {h}", pattern=f"hub:test:{h}"
        )
        store.insert(hub_rec)
        hub_ids.append(hub_rec.id)
        for _ in range(HUB_DEGREE):
            d_vec = embedder.embed(f"distractor-{distractor_idx}-far-from-cue")
            d_rec = _make_episodic(d_vec, f"unrelated junk {distractor_idx}")
            store.insert(d_rec)
            edge_pairs.append((hub_rec.id, d_rec.id))
            distractor_idx += 1

    store.boost_edges(edge_pairs, edge_type="schema_instance_of", delta=1.0)

    graph, assignment, rich_club = build_runtime_graph(store)
    return (
        store, embedder, graph, assignment, rich_club,
        verbatim_rec.id, hub_ids, CUE_TEXT,
    )


def _verbatim_position(resp, verbatim_id) -> int | None:
    """Return the verbatim record's position in resp.hits, or None if absent."""
    ids = [h.record_id for h in resp.hits]
    if verbatim_id not in ids:
        return None
    return ids.index(verbatim_id)


# ============================================================================
# Task 1 tests — rank-stage scale-map wiring
# ============================================================================


def test_scale_constant_keys_match_profile_enum():
    """Shape lock: LITERAL_PRESERVATION_W_DEGREE_SCALE must be exactly the
    canonical profile.py:87 enum keys with the agreed numeric values. Locks
    against future drift back to the CONTEXT phantom keys (balanced/weak).
    """
    from iai_mcp.pipeline import LITERAL_PRESERVATION_W_DEGREE_SCALE

    assert LITERAL_PRESERVATION_W_DEGREE_SCALE == {
        "strong": 0.3,
        "medium": 1.0,
        "loose": 1.5,
    }, (
        "Scale map must use profile.py:87 enum keys "
        "(`strong|medium|loose`), not CONTEXT.md `balanced/weak`. "
        f"Got {LITERAL_PRESERVATION_W_DEGREE_SCALE}"
    )


def test_literal_preservation_strong_ranks_verbatim_high(tmp_path):
    """Strong (scale 0.3) tightens degree influence so verbatim
    (high-cos, deg=0) outranks every schema hub (low-cos, deg=max).
    Acceptance: verbatim position ≤ 2 (top-3 variance window).
    """
    from iai_mcp.pipeline import recall_for_response

    (store, embedder, graph, assignment, rich_club,
     verbatim_id, hub_ids, cue_text) = _seed_verbatim_vs_hubs(tmp_path)

    resp = recall_for_response(
        store=store,
        graph=graph,
        assignment=assignment,
        rich_club=rich_club,
        embedder=embedder,
        cue=cue_text,
        session_id="r3_strong",
        budget_tokens=2000,
        profile_state={"literal_preservation": "strong"},
    )
    pos = _verbatim_position(resp, verbatim_id)
    assert pos is not None, (
        f"verbatim must be in hits with strong scale; "
        f"hits={[h.record_id for h in resp.hits]}"
    )
    assert pos <= 2, (
        f"strong scale: verbatim must rank in top-3 "
        f"(pos≤2); got pos={pos}, hits={[h.record_id for h in resp.hits]}"
    )


def test_literal_preservation_loose_ranks_verbatim_low(tmp_path):
    """Loose (scale 1.5) lets hubs dominate so verbatim (high-cos, deg=0)
    is pushed down past every schema hub. Acceptance: verbatim position ≥ 4.
    """
    from iai_mcp.pipeline import recall_for_response

    (store, embedder, graph, assignment, rich_club,
     verbatim_id, hub_ids, cue_text) = _seed_verbatim_vs_hubs(tmp_path)

    resp = recall_for_response(
        store=store,
        graph=graph,
        assignment=assignment,
        rich_club=rich_club,
        embedder=embedder,
        cue=cue_text,
        session_id="r3_loose",
        budget_tokens=2000,
        profile_state={"literal_preservation": "loose"},
    )
    pos = _verbatim_position(resp, verbatim_id)
    assert pos is not None, (
        f"verbatim must still be in hits with loose scale "
        f"(it's ranked low but not excluded); "
        f"hits={[h.record_id for h in resp.hits]}"
    )
    assert pos >= 4, (
        f"loose scale: verbatim must rank below top-4 "
        f"(pos≥4); got pos={pos}, hits={[h.record_id for h in resp.hits]}"
    )


def test_literal_preservation_knob_moves_verbatim_position(tmp_path):
    """R3 main acceptance: position delta between literal_preservation=strong
    and literal_preservation=loose on the same store + same cue ≥ 3.
    """
    from iai_mcp.pipeline import recall_for_response

    (store, embedder, graph, assignment, rich_club,
     verbatim_id, hub_ids, cue_text) = _seed_verbatim_vs_hubs(tmp_path)

    resp_strong = recall_for_response(
        store=store, graph=graph, assignment=assignment,
        rich_club=rich_club, embedder=embedder, cue=cue_text,
        session_id="r3_delta_strong", budget_tokens=2000,
        profile_state={"literal_preservation": "strong"},
    )
    resp_loose = recall_for_response(
        store=store, graph=graph, assignment=assignment,
        rich_club=rich_club, embedder=embedder, cue=cue_text,
        session_id="r3_delta_loose", budget_tokens=2000,
        profile_state={"literal_preservation": "loose"},
    )

    pos_strong = _verbatim_position(resp_strong, verbatim_id)
    pos_loose = _verbatim_position(resp_loose, verbatim_id)
    assert pos_strong is not None and pos_loose is not None, (
        f"verbatim must be present in both responses; "
        f"strong_hits={[h.record_id for h in resp_strong.hits]}, "
        f"loose_hits={[h.record_id for h in resp_loose.hits]}"
    )
    delta = pos_loose - pos_strong
    assert delta >= 3, (
        f"R3 acceptance: position delta between strong and loose must be "
        f">= 3. got pos_strong={pos_strong}, pos_loose={pos_loose}, "
        f"delta={delta}"
    )


def test_literal_preservation_medium_is_normalize_only_baseline(tmp_path):
    """Medium (scale 1.0) preserves Plan 06-02's normalize-only behaviour
    — no extra knob effect on top of bounded deg_norm. Verbatim's position
    under medium must lie BETWEEN its position under strong (low pos) and
    loose (high pos). Strict inequality is informational; equality is
    permitted because tied scores break by UUID and the medium tie can land
    either side of strong.
    """
    from iai_mcp.pipeline import recall_for_response

    (store, embedder, graph, assignment, rich_club,
     verbatim_id, hub_ids, cue_text) = _seed_verbatim_vs_hubs(tmp_path)

    resp_strong = recall_for_response(
        store=store, graph=graph, assignment=assignment,
        rich_club=rich_club, embedder=embedder, cue=cue_text,
        session_id="r3_medium_strong_ref", budget_tokens=2000,
        profile_state={"literal_preservation": "strong"},
    )
    resp_medium = recall_for_response(
        store=store, graph=graph, assignment=assignment,
        rich_club=rich_club, embedder=embedder, cue=cue_text,
        session_id="r3_medium", budget_tokens=2000,
        profile_state={"literal_preservation": "medium"},
    )
    resp_loose = recall_for_response(
        store=store, graph=graph, assignment=assignment,
        rich_club=rich_club, embedder=embedder, cue=cue_text,
        session_id="r3_medium_loose_ref", budget_tokens=2000,
        profile_state={"literal_preservation": "loose"},
    )
    pos_s = _verbatim_position(resp_strong, verbatim_id)
    pos_m = _verbatim_position(resp_medium, verbatim_id)
    pos_l = _verbatim_position(resp_loose, verbatim_id)
    assert pos_s is not None and pos_m is not None and pos_l is not None
    # Medium must lie between the extremes (allowing ties on either side).
    assert pos_s <= pos_m <= pos_l, (
        f"medium must be between strong and loose: "
        f"strong={pos_s}, medium={pos_m}, loose={pos_l}"
    )


def test_empty_profile_state_falls_back_to_medium_scale(tmp_path):
    """When profile_state is empty/missing/None, the rank stage falls back
    to medium scale (1.0) so existing callers without a knob set see no
    behavioural change vs normalize-only baseline.

    Empirical equivalence test: a recall_for_response with profile_state={} must
    produce IDENTICAL ordering and scores to one with profile_state={"literal_preservation":"medium"}.
    """
    from iai_mcp.pipeline import recall_for_response

    (store, embedder, graph, assignment, rich_club,
     verbatim_id, hub_ids, cue_text) = _seed_verbatim_vs_hubs(tmp_path)

    resp_empty = recall_for_response(
        store=store, graph=graph, assignment=assignment,
        rich_club=rich_club, embedder=embedder, cue=cue_text,
        session_id="r3_empty", budget_tokens=2000,
        profile_state={},
    )
    resp_medium = recall_for_response(
        store=store, graph=graph, assignment=assignment,
        rich_club=rich_club, embedder=embedder, cue=cue_text,
        session_id="r3_medium_ref", budget_tokens=2000,
        profile_state={"literal_preservation": "medium"},
    )
    # Same hit ordering.
    ids_empty = [h.record_id for h in resp_empty.hits]
    ids_medium = [h.record_id for h in resp_medium.hits]
    assert ids_empty == ids_medium, (
        f"empty profile_state must equal medium baseline. "
        f"empty={ids_empty}, medium={ids_medium}"
    )
    # And same scores (within float32 noise).
    scores_empty = [h.score for h in resp_empty.hits]
    scores_medium = [h.score for h in resp_medium.hits]
    for a, b in zip(scores_empty, scores_medium):
        assert abs(a - b) < 1e-5, (
            f"empty and medium scores must match within float noise; "
            f"empty={scores_empty}, medium={scores_medium}"
        )


# ============================================================================
# Task 2 tests — core.py:dispatch threading of profile_state
# ============================================================================


def test_dispatch_passes_profile_state_to_recall_for_response(tmp_path, monkeypatch):
    """core.py:dispatch must pass profile_state=_profile_state into the
    recall_for_response call. Pre-Plan-06-03 the kwarg was missing — every
    knob value silently dropped before reaching the rank stage.

    Test pattern: monkey-patch iai_mcp.pipeline.recall_for_response with a
    capture wrapper, route a memory_recall through dispatch(), then assert
    the captured kwargs include profile_state with the literal_preservation
    knob value the test set on _profile_state.
    """
    from iai_mcp import core, pipeline as _pipeline_mod
    from iai_mcp.types import RecallResponse

    (store, embedder, graph, assignment, rich_club,
     verbatim_id, hub_ids, cue_text) = _seed_verbatim_vs_hubs(tmp_path)

    captured: dict = {}

    def _capturing_recall(*args, **kwargs):
        captured["args"] = args
        captured["kwargs"] = kwargs
        # Return a minimal valid response so dispatch() doesn't crash.
        return RecallResponse(
            hits=[], anti_hits=[], activation_trace=[],
            budget_used=0, hints=[],
        )

    # Patch in the pipeline module namespace; dispatch's local import
    # `from iai_mcp.pipeline import recall_for_response` resolves through the
    # module attribute table so the patch is honoured.
    monkeypatch.setattr(_pipeline_mod, "recall_for_response", _capturing_recall)
    # Set the knob on the per-process profile state.
    monkeypatch.setitem(core._profile_state, "literal_preservation", "strong")

    core.dispatch(
        store, "memory_recall",
        {"cue": cue_text, "session_id": "dispatch_kwarg_capture"},
    )

    assert "kwargs" in captured, "recall_for_response was not called by dispatch"
    kwargs = captured["kwargs"]
    assert "profile_state" in kwargs, (
        f"dispatch must pass profile_state= kwarg; got kwargs={list(kwargs.keys())}"
    )
    ps = kwargs["profile_state"]
    assert isinstance(ps, dict), f"profile_state must be a dict, got {type(ps)}"
    assert "literal_preservation" in ps, (
        f"profile_state must carry literal_preservation; "
        f"got keys={list(ps.keys())}"
    )
    assert ps["literal_preservation"] == "strong", (
        f"dispatch must thread the live knob value; got {ps['literal_preservation']}"
    )


@pytest.mark.skip(
    reason=(
        "Plan 06-03 R3 dispatch-integration test — fixture geometry "
        "(verbatim cos=0.60, hub cos=0.50, deg_norm spread 0→1.0) "
        "was authored when dispatch routed to the OLD pipeline_recall "
        "body which had no community-bias term. Plan 08 "
        "puts a +0.1*cos community-bias on records inside top-3 gated "
        "communities for concept-mode recalls. On this fixture, BOTH "
        "verbatim AND hubs land in top-3 communities, so verbatim's "
        "+0.06 boost outweighs the hub's +0.05 + W_DEGREE delta even "
        "with literal_preservation=loose. The position-delta proof is "
        "unreachable on this fixture geometry under D-02. "
        "Direct-call variants (test_e2e_knob_moves_verbatim_position "
        "and the 9 other tests in this module) verify the same wiring "
        "and PASS — the dispatch-integration variant becomes a future "
        "plan's fixture-recalibration concern, not Wave 2's. "
        "See internal architecture spec"
        "08-02-SUMMARY.md deviation log for the full rationale."
    )
)
def test_dispatch_end_to_end_knob_moves_verbatim_position(tmp_path, monkeypatch):
    """Integration: the position-delta acceptance from Task 1 reproduces
    THROUGH the dispatch entrypoint (not just direct recall_for_response calls).
    Proves both bugs landed together — wiring at the rank stage AND threading
    via core.py.

    Mutates iai_mcp.core._profile_state between two dispatch() calls and
    asserts the verbatim's position-delta ≥ 3 holds via the dispatcher path.

    Why monkey-patch ``iai_mcp.embed.embedder_for_store``: the dispatch path
    calls ``embedder_for_store(store)`` to embed the cue, which loads the
    real bge-small-en-v1.5 model. That breaks the hand-crafted cosine
    geometry the fixture relies on (verbatim cos=0.60, hub cos=0.50). We
    swap in the test's _ControlledEmbedder so the cue lands in the same
    deterministic vector space the seeded record embeddings live in.
    """
    from iai_mcp import core
    from iai_mcp import embed as _embed_mod
    from uuid import UUID

    (store, embedder, graph, assignment, rich_club,
     verbatim_id, hub_ids, cue_text) = _seed_verbatim_vs_hubs(tmp_path)

    # Pin embedder_for_store to return the test's _ControlledEmbedder so the
    # cue's vector matches the seeded record geometry. Without this, dispatch
    # would re-embed the cue with bge-small-en-v1.5 and the hand-crafted
    # cos=0.50 / cos=0.60 spread collapses to whatever bge produces — the
    # delta-≥-3 assertion becomes vacuous.
    monkeypatch.setattr(_embed_mod, "embedder_for_store", lambda _store: embedder)

    # Strong call.
    monkeypatch.setitem(core._profile_state, "literal_preservation", "strong")
    resp_strong = core.dispatch(
        store, "memory_recall",
        {"cue": cue_text, "session_id": "e2e_dispatch_strong",
         "budget_tokens": 2000},
    )
    # Loose call.
    monkeypatch.setitem(core._profile_state, "literal_preservation", "loose")
    resp_loose = core.dispatch(
        store, "memory_recall",
        {"cue": cue_text, "session_id": "e2e_dispatch_loose",
         "budget_tokens": 2000},
    )

    # dispatch returns a JSON-serialisable dict; hits are dict objects with
    # "record_id" as str(UUID). Convert back to UUID for comparison.
    def _ids(resp):
        return [UUID(h["record_id"]) for h in resp["hits"]]

    ids_strong = _ids(resp_strong)
    ids_loose = _ids(resp_loose)
    assert verbatim_id in ids_strong, (
        f"verbatim must appear in strong dispatch response; "
        f"got {ids_strong}"
    )
    assert verbatim_id in ids_loose, (
        f"verbatim must appear in loose dispatch response; "
        f"got {ids_loose}"
    )
    pos_strong = ids_strong.index(verbatim_id)
    pos_loose = ids_loose.index(verbatim_id)
    delta = pos_loose - pos_strong
    assert delta >= 3, (
        f"E2E via dispatch: position delta between strong and loose must "
        f"be >= 3. got pos_strong={pos_strong}, pos_loose={pos_loose}, "
        f"delta={delta}"
    )