iai-mcp-opencode/tests/test_store_get_fast.py

"""Plan 05-15 — store.get filter-pushdown fast-path (OPS-10 / M-02).

TDD RED scaffold for exit gate.

Goal: MemoryStore.get(record_id) must use a LanceDB filter-pushdown
point read instead of tbl.to_pandas() full-table-scan. At N=1k the old
path materialised every row + column into a pandas DataFrame and then
filtered in-process; on the prod schema (embedding 384d + encrypted
text + many columns) this ate ~34 ms per call -> ~340 ms per recall
iteration (L0 fast-path + anti-hit lookup = 10 calls/iter).

Invariants preserved:
  - unknown id -> None
  - known id   -> MemoryRecord via _from_row (AES-GCM decrypt fidelity)
  - semantics identical to the full-scan path (byte-identical fields)
"""
from __future__ import annotations

import random
import time
from uuid import UUID, uuid4

import pytest

from iai_mcp.store import MemoryStore
from iai_mcp.types import EMBED_DIM, MemoryRecord
from tests.test_store import _make


# --------------------------------------------------------------------------- #
# Fixtures                                                                    #
# --------------------------------------------------------------------------- #

def _seed(
    store: MemoryStore, n: int, *, seed: int = 0, compact: bool = False
) -> list[UUID]:
    """Seed `n` records with deterministic embeddings; return ids in order.

    When ``compact=True``, run ``tbl.optimize()`` after the inserts so the
    table is in a single-fragment steady state -- this mirrors what the
    AsyncWriteQueue produces in production and what the
    bench actually measures after warm-up. Without compaction the
    per-insert fragments force every scan (filter-pushdown or not) to
    touch N fragments and perf-fence numbers are dominated by fragment
    open cost rather than the get-path cost we actually want to measure.
    """
    from iai_mcp.store import RECORDS_TABLE

    rnd = random.Random(seed)
    ids: list[UUID] = []
    for i in range(n):
        vec = [rnd.random() for _ in range(EMBED_DIM)]
        r = _make(text=f"fact {i} :: verbatim payload {rnd.random():.6f}", vec=vec)
        store.insert(r)
        ids.append(r.id)
    if compact:
        try:
            tbl = store.db.open_table(RECORDS_TABLE)
            tbl.optimize()
        except Exception:
            # optimize() requires pylance on some platforms; skipping is
            # non-fatal -- the test will just see the pre-compaction
            # numbers, which still exercise the filter-pushdown code path.
            pass
    return ids


# --------------------------------------------------------------------------- #
# G1: unknown id -> None                                                      #
# --------------------------------------------------------------------------- #

def test_get_unknown_id_returns_none(tmp_path):
    """G1: unknown uuid returns None (unchanged semantics)."""
    store = MemoryStore(path=tmp_path)
    _seed(store, n=5)
    phantom = uuid4()
    assert store.get(phantom) is None


# --------------------------------------------------------------------------- #
# G2: known id round-trips + literal_surface decrypts                         #
# --------------------------------------------------------------------------- #

def test_get_known_id_roundtrip_with_decrypt(tmp_path):
    """G2: known id -> MemoryRecord; encrypted literal_surface decrypts."""
    store = MemoryStore(path=tmp_path)
    verbatim = "пусть каждое слово сохранится точно — G2 fidelity"
    r = _make(text=verbatim)
    store.insert(r)
    got = store.get(r.id)
    assert got is not None
    assert got.id == r.id
    assert got.literal_surface == verbatim


# --------------------------------------------------------------------------- #
# G3: no unfiltered to_pandas() on MemoryStore.get                            #
# --------------------------------------------------------------------------- #

def test_get_does_not_call_unfiltered_to_pandas(tmp_path, monkeypatch):
    """G3: store.get must NOT call tbl.to_pandas() without a filter.

    Accept either:
      - tbl.search(...).where(...).to_pandas()
      - tbl.to_lance().to_table(filter=...).to_pandas()
    Reject: bare tbl.to_pandas() with no filter kwarg.
    """
    store = MemoryStore(path=tmp_path)
    _seed(store, n=20)
    target = _seed(store, n=1)[0]

    import lancedb.table as _lt

    # LanceTable is the concrete subclass of Table that open_table returns
    # in lancedb 0.30.x; it overrides to_pandas, so we must patch the
    # concrete class, not the ABC.
    target_cls = _lt.LanceTable
    base_to_pandas = target_cls.to_pandas
    unfiltered_calls: list[dict] = []

    def traced(self, *args, **kwargs):
        # If called on the Table directly (NOT on a search/query builder)
        # and no filter kwarg is passed, record it — that is the old
        # full-scan path.
        if "filter" not in kwargs:
            unfiltered_calls.append({"args": args, "kwargs": dict(kwargs)})
        return base_to_pandas(self, *args, **kwargs)

    monkeypatch.setattr(target_cls, "to_pandas", traced)

    got = store.get(target)
    assert got is not None
    assert got.id == target
    assert not unfiltered_calls, (
        "store.get called Table.to_pandas() without a filter — "
        "full-scan path still in use. Expected filter-pushdown via "
        "tbl.search(...).where(...) or tbl.to_lance().to_table(filter=...)."
    )


# --------------------------------------------------------------------------- #
# G4: perf fence — 100 sequential store.get at N=1k <= 500 ms total           #
# --------------------------------------------------------------------------- #

def test_get_perf_fence_n1k(tmp_path):
    """G4: 100 sequential store.get at N=1k <= 500 ms total (mean <=5 ms, p95 <=10 ms).

    Uses ``compact=True`` in the fixture so the table is a single-fragment
    steady state -- this is what the production AsyncWriteQueue
    produces and what the bench measures after warm-up. Without
    compaction, per-insert fragments dominate every scan and the numbers
    measure fragment open cost rather than the get-path cost the plan
    actually wants to fence.
    """
    store = MemoryStore(path=tmp_path)
    ids = _seed(store, n=1000, compact=True)
    rnd = random.Random(42)
    picks = [rnd.choice(ids) for _ in range(100)]

    # Warmup — pay the first-call LanceDB table-open / index compile once.
    store.get(picks[0])

    samples_ms: list[float] = []
    for rid in picks:
        t0 = time.perf_counter()
        rec = store.get(rid)
        samples_ms.append((time.perf_counter() - t0) * 1000.0)
        assert rec is not None and rec.id == rid

    total = sum(samples_ms)
    mean = total / len(samples_ms)
    samples_ms.sort()
    p95 = samples_ms[int(0.95 * len(samples_ms)) - 1]

    # Perf fence — generous margins so CI noise does not flake.
    assert total <= 500.0, f"N=1k 100x store.get total {total:.1f} ms > 500 ms budget"
    assert mean <= 5.0, f"N=1k store.get mean {mean:.2f} ms > 5 ms/call"
    assert p95 <= 10.0, f"N=1k store.get p95 {p95:.2f} ms > 10 ms/call"


# --------------------------------------------------------------------------- #
# G5: correctness fence vs full-scan baseline                                 #
# --------------------------------------------------------------------------- #

def test_get_matches_full_scan_baseline(tmp_path):
    """G5: for 50 random ids at N=1k, store.get output equals _from_row applied
    to the full-scan row — byte-identical on id, literal_surface, embedding,
    tags, provenance, language, community_id, centrality, stability,
    difficulty, last_reviewed, updated_at.
    """
    store = MemoryStore(path=tmp_path)
    ids = _seed(store, n=1000)
    rnd = random.Random(7)
    picks = [rnd.choice(ids) for _ in range(50)]

    # Build the baseline via the legacy full-scan reconstruction.
    tbl = store.db.open_table("records")
    df = tbl.to_pandas()

    for rid in picks:
        got = store.get(rid)
        assert got is not None
        baseline_row = df[df["id"] == str(rid)].iloc[0].to_dict()
        baseline = store._from_row(baseline_row)

        assert got.id == baseline.id
        assert got.literal_surface == baseline.literal_surface
        assert list(got.embedding) == list(baseline.embedding)
        assert got.tags == baseline.tags
        assert got.provenance == baseline.provenance
        assert got.language == baseline.language
        assert got.community_id == baseline.community_id
        assert got.centrality == baseline.centrality
        assert got.stability == baseline.stability
        assert got.difficulty == baseline.difficulty
        assert got.last_reviewed == baseline.last_reviewed
        assert got.updated_at == baseline.updated_at