iai-mcp-opencode/tests/test_schema_induction_streaming.py

"""Plan 07.7-04 D-26-C — schema.py induce_schemas_tier0 + persist_schema migrate
to ``store.iter_record_columns(...)`` projection.

CONTEXT.md amendment (added 2026-04-29 mid-execution): the original Plan 04
W4 scope (sleep.py invariant + comment marker) is REPLACED-AND-EXTENDED by
migrating two `all_records()` callers in `schema.py` so that the W4 ≤1
all_records() invariant on `run_heavy_consolidation` becomes achievable.

Pre-D-26 architecture:

    run_heavy_consolidation
      ├── all_records() at sleep.py:513      (records_by_id — kept by W4)
      ├── _tier0_schema_surfacing            (W3 — projection-only via Plan 03)
      └── induce_schemas_tier1
            └── induce_schemas_tier0
                  ├── all_records() at schema.py:89        ← D-26-A target
                  └── (downstream) persist_schema
                        └── all_records() at schema.py:267 ← D-26-B target

Total: 3 all_records() calls per heavy invocation (when auto-status candidates
fire).

Post-D-26 architecture:

    run_heavy_consolidation
      ├── all_records() at sleep.py:513      (records_by_id — kept by W4)
      ├── _tier0_schema_surfacing            (W3 — projection-only via Plan 03)
      └── induce_schemas_tier1
            └── induce_schemas_tier0
                  ├── iter_record_columns(["id", "tags_json"]) ← D-26-A
                  └── persist_schema
                        └── iter_record_columns(["id", "tier", "tags_json"])
                              ← D-26-B (early-exit via break on first match)

Total: 1 all_records() call per heavy invocation. W4 invariant becomes
achievable; the W4 invariant test in tests/test_sleep_consolidation_streaming.py
asserts ``count_all.call_count <= 1``.

Covered contracts (D-26-C):

  D-26-A — induce_schemas_tier0 migration:
    1. Calls iter_record_columns, NOT all_records (spy via monkeypatch)
    2. _decrypt_for_record fires zero times (proof of zero-AES-GCM W3-style)
    3. SchemaCandidate output is byte-identical to pre-W4-ext implementation
       on a deterministic synthetic store (same patterns, same evidence_count,
       same confidence, same status)

  D-26-B — persist_schema migration:
    4. Calls iter_record_columns, NOT all_records (spy via monkeypatch)
    5. Early-exit via break on first matching pattern row works (the keeper
       scan must NOT iterate every record after a hit)
    6. Correct schema_id returned when keeper is mid-stream (the keeper's
       UUID is preserved across the iter_record_columns→str→UUID round-trip)

  Cross-cutting:
    7. existing_keeper_id remains a UUID (not a string from row["id"])
    8. The pattern_tag check is preserved byte-for-byte: tier == "semantic"
       AND f"pattern:{candidate.pattern}" in tags

Phase 07.6 plan-checker B-1 lesson: every test uses a real ``MemoryRecord``
dataclass via ``_rec()`` — never a plain dict against attribute-access code.
"""
from __future__ import annotations

from datetime import datetime, timezone
from pathlib import Path
from unittest.mock import MagicMock
from uuid import UUID, uuid4

import pytest

from iai_mcp.schema import (
    SchemaCandidate,
    induce_schemas_tier0,
    persist_schema,
)
from iai_mcp.store import MemoryStore
from iai_mcp.types import EMBED_DIM, MemoryRecord


# --------------------------------------------------------------------------- fixtures


@pytest.fixture(autouse=True)
def _isolated_keyring(monkeypatch: pytest.MonkeyPatch):
    """Mirror tests/test_store_iter_records.py — process-isolated keyring so
    AES-256-GCM key generation does not poke the OS keychain inside CI."""
    import keyring as _keyring

    fake: dict[tuple[str, str], str] = {}
    monkeypatch.setattr(_keyring, "get_password", lambda s, u: fake.get((s, u)))
    monkeypatch.setattr(
        _keyring, "set_password", lambda s, u, p: fake.__setitem__((s, u), p)
    )
    monkeypatch.setattr(
        _keyring, "delete_password", lambda s, u: fake.pop((s, u), None)
    )
    yield fake


@pytest.fixture(autouse=True)
def _patch_embedder(monkeypatch: pytest.MonkeyPatch):
    """Avoid loading bge-m3 — persist_schema's insert path embeds the schema
    summary; without this fixture each test pays ~5s embedder load."""
    from iai_mcp import embed as embed_mod

    class _FakeEmbedder:
        DIM = EMBED_DIM
        DEFAULT_DIM = EMBED_DIM
        DEFAULT_MODEL_KEY = "fake"

        def __init__(self, *args, **kwargs):  # noqa: ANN001
            self.DIM = EMBED_DIM

        def embed(self, text: str) -> list[float]:
            return [1.0] + [0.0] * (EMBED_DIM - 1)

        def embed_batch(self, texts):  # noqa: ANN001
            return [self.embed(t) for t in texts]

    monkeypatch.setattr(embed_mod, "Embedder", _FakeEmbedder)
    yield


def _rec(
    *,
    text: str = "t",
    tags: list[str] | None = None,
    tier: str = "episodic",
    detail_level: int = 2,
    language: str = "en",
) -> MemoryRecord:
    """Real-dataclass fixture (NEVER a plain dict — plan-checker B-1)."""
    now = datetime.now(timezone.utc)
    return MemoryRecord(
        id=uuid4(),
        tier=tier,
        literal_surface=text,
        aaak_index="",
        embedding=[1.0] + [0.0] * (EMBED_DIM - 1),
        community_id=None,
        centrality=0.0,
        detail_level=detail_level,
        pinned=False,
        stability=0.0,
        difficulty=0.0,
        last_reviewed=None,
        never_decay=(detail_level >= 3),
        never_merge=False,
        provenance=[],
        created_at=now,
        updated_at=now,
        tags=list(tags or []),
        language=language,
    )


@pytest.fixture
def store(tmp_path: Path) -> MemoryStore:
    """Fresh MemoryStore in tmp_path/lancedb (one per test, no cross-test bleed)."""
    return MemoryStore(path=tmp_path / "lancedb")


# --------------------------------------------------------------------------- D-26-A: induce_schemas_tier0


def test_induce_schemas_tier0_uses_iter_record_columns_not_all_records(
    store: MemoryStore, monkeypatch: pytest.MonkeyPatch
) -> None:
    """D-26-A architecture flip: rewritten function uses
    ``iter_record_columns(["id", "tags_json"], ...)`` and never calls
    ``all_records()``.

    Pre-D-26-A (current main): ``induce_schemas_tier0`` calls
    ``store.all_records()`` at schema.py:89 — spy on ``all_records`` fires
    once and spy on ``iter_record_columns`` fires zero times → assertion
    fails RED.

    Post-D-26-A: spy on ``iter_record_columns`` fires once and spy on
    ``all_records`` fires zero times → assertion passes GREEN.
    """
    # 5 records with the same tag pair (above CLUSTER_MIN_SIZE=3).
    for i in range(5):
        store.insert(_rec(text=f"r{i}", tags=["meeting", "notes"]))

    spy_all = MagicMock(wraps=store.all_records)
    spy_iter = MagicMock(wraps=store.iter_record_columns)
    monkeypatch.setattr(store, "all_records", spy_all)
    monkeypatch.setattr(store, "iter_record_columns", spy_iter)

    induce_schemas_tier0(store)

    assert spy_all.call_count == 0, (
        f"induce_schemas_tier0 must NOT call store.all_records() post-D-26-A; "
        f"got {spy_all.call_count} call(s)"
    )
    assert spy_iter.call_count >= 1, (
        f"induce_schemas_tier0 must call store.iter_record_columns() at least "
        f"once post-D-26-A; got {spy_iter.call_count} call(s)"
    )


def test_induce_schemas_tier0_zero_decrypt_calls(
    store: MemoryStore, monkeypatch: pytest.MonkeyPatch
) -> None:
    """D-26-A zero-decrypt contract: ``_decrypt_for_record`` fires zero times
    during the migrated path.

    Projection is ``["id", "tags_json"]`` — neither column is encrypted
    (``id`` is plain string UUID; ``tags_json`` is plain JSON string per
    store.py:273). Therefore the W5 cipher cache is short-circuited entirely
    on this path, mirroring the W3 ``_tier0_schema_surfacing`` win.

    Pre-D-26-A (current main): ``store.all_records()`` round-trips every row
    through ``_from_row``, which calls ``_decrypt_for_record`` on each of
    literal_surface + provenance_json + profile_modulation_gain_json
    (encrypted columns). For a 5-record store: up to 15 calls. Assertion
    ``call_count == 0`` fails RED.

    Post-D-26-A: zero calls — assertion passes GREEN.
    """
    for i in range(5):
        store.insert(_rec(text=f"r{i}", tags=["meeting", "notes"]))

    decrypt_spy = MagicMock(wraps=store._decrypt_for_record)
    monkeypatch.setattr(store, "_decrypt_for_record", decrypt_spy)

    induce_schemas_tier0(store)

    assert decrypt_spy.call_count == 0, (
        f"induce_schemas_tier0 must NOT trigger ANY _decrypt_for_record "
        f"calls post-D-26-A; got {decrypt_spy.call_count} call(s)"
    )


def test_induce_schemas_tier0_byte_identical_to_pre_d26_implementation(
    store: MemoryStore,
) -> None:
    """D-26-C contract: rewritten function produces identical SchemaCandidate
    output to the pre-D-26-A implementation on a deterministic synthetic
    store.

    Compute the expected output inline using the pre-D-26-A algorithm
    (``store.all_records()`` + ``_tag_cooccurrence``) and assert
    order-independent equality (sort by pattern) against the migrated
    function's output.

    Fixture (deterministic, 8 records):
      - 5 records tagged ["meeting", "notes"] → pair count = 5
      - 3 records tagged ["report", "deadline"] → pair count = 3

    Expected:
      - "tags:meeting+notes" — count=5, confidence=0.5, status="auto"
        (5 >= AUTO_INDUCT_COOCCURRENCE=5 BUT confidence < AUTO_INDUCT_CONFIDENCE
        =0.85, so it falls into pending_user_approval branch instead)
      - Wait — actually count=5 falls into the ``elif`` guard
        ``USER_APPROVAL_COOCCURRENCE <= count < AUTO_INDUCT_COOCCURRENCE``
        which is ``3 <= 5 < 5`` → False. So count=5 needs auto path
        ``count >= 5 AND confidence >= 0.85``; confidence 0.5 fails the
        confidence floor. Result: SKIPPED.
      - count=3 → ``elif 3 <= 3 < 5`` AND confidence=0.3 < 0.65 → SKIPPED.

    To get measurable output, raise count to clear the floors:
      - 9 records tagged ["meeting", "notes"]: count=9, conf=0.9 → "auto"
      - 4 records tagged ["report", "deadline"]: count=4, conf=0.4 →
        elif 3 <= 4 < 5 → True; conf 0.4 < 0.65 → SKIPPED
      - Add 4 records tagged ["alpha", "beta"]: count=4, conf=0.4 → SKIPPED
        same as above

    To exercise the user-approval path, we need conf >= 0.65. Confidence
    saturates at count/10, so count >= 7 with count < 5 is impossible.
    We accept that on this fixture only the auto path emits a candidate.
    """
    # 9 records with the same tag pair → count=9, confidence=0.9, status="auto"
    auto_recs: list[MemoryRecord] = []
    for i in range(9):
        r = _rec(text=f"auto-{i}", tags=["meeting", "notes"])
        auto_recs.append(r)
        store.insert(r)
    # 4 records with a different tag pair — below auto threshold (count<5),
    # below confidence threshold for user-approval (conf=0.4 < 0.65), so
    # contributes nothing to the candidate list.
    for i in range(4):
        store.insert(_rec(text=f"low-{i}", tags=["report", "deadline"]))

    # Compute expected via the pre-D-26-A algorithm inline. We re-implement
    # the contract directly so the test does not depend on the prior
    # implementation surviving the migration unchanged.
    from iai_mcp.schema import (
        AUTO_INDUCT_CONFIDENCE,
        AUTO_INDUCT_COOCCURRENCE,
        MAX_EVIDENCE_PER_SCHEMA,
        USER_APPROVAL_CONFIDENCE,
        USER_APPROVAL_COOCCURRENCE,
        _tag_cooccurrence,
    )

    expected_records = store.all_records()
    pair_counts = _tag_cooccurrence(expected_records)
    expected: list[dict] = []
    for pair, evidence in pair_counts.items():
        count = len(evidence)
        confidence = min(1.0, count / 10.0)
        if count >= AUTO_INDUCT_COOCCURRENCE and confidence >= AUTO_INDUCT_CONFIDENCE:
            status = "auto"
        elif (
            USER_APPROVAL_COOCCURRENCE <= count < AUTO_INDUCT_COOCCURRENCE
            and confidence >= USER_APPROVAL_CONFIDENCE
        ):
            status = "pending_user_approval"
        else:
            continue
        expected.append({
            "pattern": f"tags:{'+'.join(sorted(pair))}",
            "confidence": confidence,
            "evidence_count": count,
            "status": status,
            "evidence_ids_set": set(evidence[:MAX_EVIDENCE_PER_SCHEMA]),
        })

    actual = induce_schemas_tier0(store)

    expected_sorted = sorted(expected, key=lambda d: d["pattern"])
    actual_sorted = sorted(actual, key=lambda c: c.pattern)

    assert len(actual_sorted) == len(expected_sorted), (
        f"candidate count mismatch — expected={len(expected_sorted)} "
        f"actual={len(actual_sorted)}; expected={expected_sorted!r}; "
        f"actual={[(c.pattern, c.evidence_count, c.confidence, c.status) for c in actual_sorted]!r}"
    )
    for e, a in zip(expected_sorted, actual_sorted, strict=True):
        assert a.pattern == e["pattern"]
        assert a.evidence_count == e["evidence_count"]
        assert a.confidence == pytest.approx(e["confidence"])
        assert a.status == e["status"]
        # evidence_ids must round-trip back to the same UUIDs (set equality —
        # iter_record_columns batch order may differ from all_records pandas
        # iter order, but the underlying set must match).
        assert set(a.evidence_ids) == e["evidence_ids_set"]

    # Sanity: at least one auto candidate surfaced (the 9-records pair).
    assert any(c.status == "auto" for c in actual_sorted), (
        f"expected at least one status='auto' candidate on the 9-record "
        f"meeting+notes pair; got {[(c.pattern, c.evidence_count, c.status) for c in actual_sorted]!r}"
    )


def test_induce_schemas_tier0_evidence_ids_are_uuids(
    store: MemoryStore,
) -> None:
    """D-26-A boundary contract: ``iter_record_columns`` returns ``id`` as a
    string (per tests/test_store_iter_records.py:250) but
    ``SchemaCandidate.evidence_ids`` is typed ``list[UUID]``. The migration
    must convert at the boundary; without conversion, downstream code (e.g.
    ``store.boost_edges([(ev_id, schema_id) for ev_id in evidence_ids])``)
    would break.
    """
    inserted = []
    for i in range(9):
        r = _rec(text=f"r{i}", tags=["meeting", "notes"])
        store.insert(r)
        inserted.append(r.id)

    candidates = induce_schemas_tier0(store)
    auto = [c for c in candidates if c.status == "auto"]
    assert len(auto) >= 1, "expected at least one auto candidate"

    for c in auto:
        for ev_id in c.evidence_ids:
            assert isinstance(ev_id, UUID), (
                f"evidence_ids must be list[UUID]; got {type(ev_id).__name__} "
                f"for {ev_id!r}"
            )
        # Set equality with inserted ids — every evidence id must trace back
        # to a real record we inserted.
        assert set(c.evidence_ids).issubset(set(inserted))


# --------------------------------------------------------------------------- D-26-B: persist_schema


def test_persist_schema_uses_iter_record_columns_not_all_records_for_keeper_scan(
    store: MemoryStore, monkeypatch: pytest.MonkeyPatch
) -> None:
    """D-26-B architecture flip: the keeper-pattern scan in persist_schema
    uses ``iter_record_columns(["id", "tier", "tags_json"], ...)``, NOT
    ``store.all_records()``.

    Fixture: empty store (no existing keeper); we are exercising the
    no-keeper-found branch, which still must execute the scan.

    Pre-D-26-B (current main): ``persist_schema`` calls ``store.all_records()``
    at schema.py:267 — spy on ``all_records`` fires once. Assertion fails RED.

    Post-D-26-B: spy on ``iter_record_columns`` fires (with at minimum
    ``["id", "tier", "tags_json"]`` projection); spy on ``all_records``
    fires zero times.

    Note: the fallback insert path at schema.py:371 calls ``store.insert(...)``
    which internally uses ``boost_edges``/``merge_insert`` and may touch other
    tables — but it does NOT call ``store.all_records()`` (verified by reading
    store.py). So the spy on ``all_records`` cleanly captures only the
    keeper-scan path's calls.
    """
    # Seed 3 evidence records — minimum CLUSTER_MIN_SIZE.
    ev_recs = [_rec(text=f"ev{i}", tags=["meeting", "notes"]) for i in range(3)]
    for r in ev_recs:
        store.insert(r)

    spy_all = MagicMock(wraps=store.all_records)
    spy_iter = MagicMock(wraps=store.iter_record_columns)
    monkeypatch.setattr(store, "all_records", spy_all)
    monkeypatch.setattr(store, "iter_record_columns", spy_iter)

    cand = SchemaCandidate(
        pattern="tags:meeting+notes",
        confidence=0.9,
        evidence_count=3,
        evidence_ids=[r.id for r in ev_recs],
        status="auto",
    )
    persist_schema(store, cand)

    assert spy_all.call_count == 0, (
        f"persist_schema must NOT call store.all_records() post-D-26-B; "
        f"got {spy_all.call_count} call(s)"
    )
    assert spy_iter.call_count >= 1, (
        f"persist_schema must call store.iter_record_columns() at least once "
        f"post-D-26-B (keeper scan); got {spy_iter.call_count} call(s)"
    )


def test_persist_schema_early_exit_on_first_match(
    store: MemoryStore, monkeypatch: pytest.MonkeyPatch
) -> None:
    """D-26-B: the keeper scan must break on the FIRST matching pattern row,
    matching the existing schema.py:268-272 ``break`` semantics.

    Fixture: 50 schema-tier records, ALL carrying the keeper pattern tag.
    The migrated code must stop iterating after the first match — proven by
    counting how many rows the iterator yields before persist_schema returns.

    Strategy: monkeypatch-wrap ``iter_record_columns`` with a row counter.
    """
    # Insert 50 schema-tier records, all carrying the same pattern tag.
    pattern = "tags:meeting+notes"
    pattern_tag = f"pattern:{pattern}"
    keeper_ids: list[UUID] = []
    for i in range(50):
        r = _rec(
            text=f"schema-{i}",
            tags=["schema", "auto", pattern_tag],
            tier="semantic",
            detail_level=3,
        )
        store.insert(r)
        keeper_ids.append(r.id)

    # Wrap iter_record_columns with a row counter.
    real_iter = store.iter_record_columns
    yielded = {"count": 0}

    def counting_iter(columns, **kwargs):  # noqa: ANN001
        for row in real_iter(columns, **kwargs):
            yielded["count"] += 1
            yield row

    monkeypatch.setattr(store, "iter_record_columns", counting_iter)

    # Seed evidence records.
    ev_recs = [_rec(text=f"ev{i}", tags=["meeting", "notes"]) for i in range(3)]
    for r in ev_recs:
        store.insert(r)

    cand = SchemaCandidate(
        pattern=pattern,
        confidence=0.9,
        evidence_count=3,
        evidence_ids=[r.id for r in ev_recs],
        status="auto",
    )
    schema_id = persist_schema(store, cand)

    # Returned id must be one of the existing keepers (the first matching row).
    assert schema_id in keeper_ids, (
        f"persist_schema must return an existing keeper id when a match exists; "
        f"got {schema_id} not in {keeper_ids[:3]}..."
    )

    # Early-exit invariant: substantially fewer than 50 rows iterated. Without
    # a `break` after first match, the wrap counter would see all 50 records.
    # Allow up to 2× CLUSTER_MIN_SIZE to absorb LanceDB batch boundaries —
    # iter_record_columns yields per row but the scanner reads in batches of
    # 1024, so the in-process generator stops cleanly on `break` from the
    # consuming code.
    assert yielded["count"] <= 50 // 2, (
        f"persist_schema must early-exit on first match; iterator yielded "
        f"{yielded['count']} rows on a 50-keeper-row store (expected break "
        f"after the first match — strictly < 50)"
    )


def test_persist_schema_returns_correct_id_when_keeper_is_mid_stream(
    store: MemoryStore,
) -> None:
    """D-26-B: when the keeper is the Nth row of the scan (not the first),
    the returned UUID must match the keeper's id, not a string from
    row["id"] or a different match-but-not-the-first-one row.

    Fixture: 5 schema records, only ONE of which carries the matching
    pattern tag. The migrated code must:
      1. Iterate through non-matching rows without misfiring.
      2. Find the matching row and capture its id (with str→UUID conversion).
      3. Break out of the loop.
      4. Return that captured UUID.
    """
    pattern = "tags:meeting+notes"
    pattern_tag = f"pattern:{pattern}"

    # Insert 5 schema-tier records, only ONE carries the matching tag.
    for i in range(2):
        store.insert(_rec(
            text=f"unrelated-{i}",
            tags=["schema", "auto", "pattern:other"],
            tier="semantic",
            detail_level=3,
        ))
    keeper = _rec(
        text="the-keeper",
        tags=["schema", "auto", pattern_tag],
        tier="semantic",
        detail_level=3,
    )
    store.insert(keeper)
    keeper_id = keeper.id
    for i in range(2):
        store.insert(_rec(
            text=f"trailing-{i}",
            tags=["schema", "auto", "pattern:something-else"],
            tier="semantic",
            detail_level=3,
        ))

    # Seed evidence records.
    ev_recs = [_rec(text=f"ev{i}", tags=["meeting", "notes"]) for i in range(3)]
    for r in ev_recs:
        store.insert(r)

    cand = SchemaCandidate(
        pattern=pattern,
        confidence=0.9,
        evidence_count=3,
        evidence_ids=[r.id for r in ev_recs],
        status="auto",
    )
    returned_id = persist_schema(store, cand)

    assert returned_id == keeper_id, (
        f"persist_schema must return the matching keeper's UUID; "
        f"got {returned_id} expected {keeper_id}"
    )
    assert isinstance(returned_id, UUID), (
        f"persist_schema must return a UUID, not a string from row['id']; "
        f"got {type(returned_id).__name__}"
    )


def test_persist_schema_falls_through_to_insert_when_no_keeper(
    store: MemoryStore,
) -> None:
    """D-26-B byte-identical contract: when no existing schema carries the
    pattern tag, persist_schema falls through to the original insert path
    (line 371 ``store.insert(schema_rec)``) and returns a NEW UUID — not
    one of the existing-but-non-matching record ids.

    Fixture: 5 schema-tier records carrying DIFFERENT pattern tags. None
    matches our candidate; the function must insert a new schema record.
    """
    # Insert 5 schema-tier records, none matching the candidate pattern.
    other_ids: list[UUID] = []
    for i in range(5):
        r = _rec(
            text=f"other-{i}",
            tags=["schema", "auto", f"pattern:other-{i}"],
            tier="semantic",
            detail_level=3,
        )
        store.insert(r)
        other_ids.append(r.id)

    # Seed evidence.
    ev_recs = [_rec(text=f"ev{i}", tags=["meeting", "notes"]) for i in range(3)]
    for r in ev_recs:
        store.insert(r)

    cand = SchemaCandidate(
        pattern="tags:meeting+notes",
        confidence=0.9,
        evidence_count=3,
        evidence_ids=[r.id for r in ev_recs],
        status="auto",
    )
    schema_id = persist_schema(store, cand)

    # Must be a fresh UUID, not one of the non-matching keepers.
    assert schema_id not in other_ids, (
        f"persist_schema must insert a new schema when no keeper matches; "
        f"got returned id {schema_id} which equals one of the existing "
        f"non-matching schema ids ({other_ids!r})"
    )
    # The new schema record exists in the store.
    new_rec = store.get(schema_id)
    assert new_rec is not None
    assert new_rec.tier == "semantic"
    assert new_rec.detail_level == 3
    assert "schema" in (new_rec.tags or [])
    assert f"pattern:{cand.pattern}" in (new_rec.tags or [])