iai-mcp-opencode/tests/test_provenance_async.py

"""Plan 05-14 — async provenance write queue (OPS-10 / M-02).

Moves provenance writes off the recall critical path via a daemon-thread
queue so pipeline_recall returns before append_provenance_batch runs.

All 6 tests below MUST FAIL on first run (RED) — the module
`iai_mcp.provenance_queue` and the `MemoryStore.queue_provenance_batch`
entry point do not exist yet.

Constitutional fence:
- preserved (every recall still appends a provenance entry;
  writes are async but not dropped).
- Rule 1: provenance-write failure never blocks recall.
- C3/C6: no external deps, pure stdlib.
"""
from __future__ import annotations

import time
from uuid import UUID

import pytest

from iai_mcp.store import MemoryStore
from tests.test_store import _make


# --------------------------------------------------------------------- P1, P2, P5

def test_enqueue_fast(tmp_path):
    """P1: ProvenanceWriteQueue.enqueue returns in <= 2ms even when worker is slowed.

    We artificially slow the underlying store.append_provenance_batch so that
    each flush takes 200ms; enqueue must NOT wait for it.
    """
    from iai_mcp.provenance_queue import ProvenanceWriteQueue

    store = MemoryStore(path=tmp_path)
    r = _make()
    store.insert(r)

    # Wrap append_provenance_batch to be slow.
    real_batch = store.append_provenance_batch

    def slow_batch(pairs, records_cache=None):
        time.sleep(0.2)
        return real_batch(pairs, records_cache=records_cache)

    store.append_provenance_batch = slow_batch  # type: ignore[method-assign]

    q = ProvenanceWriteQueue(store, coalesce_ms=50)
    q.start()
    try:
        t0 = time.perf_counter()
        q.enqueue([(r.id, {"ts": "x", "cue": "c", "session_id": "s"})])
        elapsed_ms = (time.perf_counter() - t0) * 1000.0
        assert elapsed_ms <= 20.0, f"enqueue took {elapsed_ms:.1f}ms (target <=2ms, headroom <=20ms)"
    finally:
        q.stop()


def test_flush_drains(tmp_path):
    """P2: worker drains all pending pairs within 500ms after .flush()."""
    from iai_mcp.provenance_queue import ProvenanceWriteQueue

    store = MemoryStore(path=tmp_path)
    r = _make()
    store.insert(r)

    q = ProvenanceWriteQueue(store, coalesce_ms=50)
    q.start()
    try:
        for i in range(10):
            q.enqueue([(r.id, {"ts": f"t{i}", "cue": f"c{i}", "session_id": "s"})])
        t0 = time.perf_counter()
        q.flush(timeout=2.0)
        elapsed_ms = (time.perf_counter() - t0) * 1000.0
        assert elapsed_ms <= 500.0, f"flush took {elapsed_ms:.1f}ms (target <=500ms)"
    finally:
        q.stop()

    # All 10 entries should now be durable.
    got = store.get(r.id)
    assert got is not None
    assert len(got.provenance) == 10


def test_atexit_flush(tmp_path, monkeypatch):
    """P5: atexit hook flushes the queue on interpreter shutdown.

    We simulate by registering a queue, capturing the atexit handler
    it installs, calling it manually, and verifying the store is
    consistent afterward.
    """
    import atexit as _atexit
    from iai_mcp.provenance_queue import ProvenanceWriteQueue

    captured: list = []

    def _fake_register(fn, *a, **kw):
        captured.append(fn)
        return fn

    monkeypatch.setattr(_atexit, "register", _fake_register)

    store = MemoryStore(path=tmp_path)
    r = _make()
    store.insert(r)

    q = ProvenanceWriteQueue(store, coalesce_ms=50)
    q.start()
    q.enqueue([(r.id, {"ts": "t", "cue": "c", "session_id": "s"})])

    # The atexit handler should have been registered during start().
    assert captured, "ProvenanceWriteQueue.start() must register atexit flush"
    # Invoke the registered handler — it must drain + not raise.
    captured[0]()

    # After the handler runs, the provenance entry must be durable.
    got = store.get(r.id)
    assert got is not None
    assert len(got.provenance) == 1
    q.stop()


# ---------------------------------------------------------------------- P3, P4, P6

def test_pipeline_recall_does_not_block_on_merge_insert(tmp_path):
    """P3: pipeline_recall latency does NOT include merge_insert when queue is enabled.

    Setup: make append_provenance_batch artificially slow (150ms). With the
    queue enabled, pipeline_recall should return well under 100ms (the write
    is handed off). Without the queue it would be >=150ms.
    """
    from iai_mcp.core import dispatch

    store = MemoryStore(path=tmp_path)
    r = _make()
    store.insert(r)

    # Warm call first — initialises embedders, opens LanceDB tables, etc.
    # so the timed call below only measures the hot path.
    dispatch(
        store, "memory_recall",
        {"cue": "warmup", "session_id": "s0", "cue_embedding": r.embedding},
    )

    # Enable the provenance queue.
    store.enable_provenance_queue(coalesce_ms=50)
    try:
        # Slow the actual batch write.
        real_batch = store.append_provenance_batch

        def slow_batch(pairs, records_cache=None):
            time.sleep(0.5)  # 500ms slow write
            return real_batch(pairs, records_cache=records_cache)

        store.append_provenance_batch = slow_batch  # type: ignore[method-assign]

        t0 = time.perf_counter()
        dispatch(
            store,
            "memory_recall",
            {"cue": "q", "session_id": "s1", "cue_embedding": r.embedding},
        )
        elapsed_ms = (time.perf_counter() - t0) * 1000.0
        # Target: the 500ms slow write is off-path; the queue hands off so
        # pipeline_recall returns well before the write completes. We give
        # a very generous budget (400ms) to accommodate CI-hardware jitter
        # while still proving the write is NOT inline (inline would be
        # >= 500ms).
        assert elapsed_ms < 400.0, (
            f"pipeline_recall blocked on merge_insert: {elapsed_ms:.1f}ms "
            f"(queue should hand off; target <400ms given 500ms slow write)"
        )
    finally:
        store.disable_provenance_queue()


def test_mem05_preserved_after_drain(tmp_path):
    """P4: after flush, store reflects all enqueued provenance entries in insertion order."""
    from iai_mcp.core import dispatch

    store = MemoryStore(path=tmp_path)
    r = _make()
    store.insert(r)

    store.enable_provenance_queue(coalesce_ms=50)
    try:
        dispatch(store, "memory_recall",
                 {"cue": "first", "session_id": "s1", "cue_embedding": r.embedding})
        dispatch(store, "memory_recall",
                 {"cue": "second", "session_id": "s2", "cue_embedding": r.embedding})
        dispatch(store, "memory_recall",
                 {"cue": "third", "session_id": "s3", "cue_embedding": r.embedding})
        # Drain.
        store._provenance_queue.flush(timeout=2.0)  # type: ignore[attr-defined]
    finally:
        store.disable_provenance_queue()

    got = store.get(r.id)
    assert got is not None
    assert len(got.provenance) == 3
    cues = [p["cue"] for p in got.provenance]
    assert cues == ["first", "second", "third"], f"order violated: {cues}"


def test_overflow_spill_round_trip(tmp_path, monkeypatch):
    """W1 / when _q is full, batches spill to
    ~/.iai-mcp/.provenance-overflow/ instead of dropping. The worker
    re-enqueues spilled batches on idle. holds under overload."""
    import threading
    from iai_mcp.provenance_queue import ProvenanceWriteQueue

    # Init store BEFORE HOME redirect (keyring uses real HOME).
    store = MemoryStore(path=tmp_path / "store")
    r = _make()
    store.insert(r)

    monkeypatch.setenv("HOME", str(tmp_path))

    # Throttle the worker's batch flush so _q fills up.
    flushed_pairs: list = []
    flush_release = threading.Event()
    flush_release.clear()
    real_batch = store.append_provenance_batch

    def slow_batch(pairs, records_cache=None):
        # Block until the test releases; then call the real batch.
        flush_release.wait(timeout=10.0)
        flushed_pairs.extend(pairs)
        return real_batch(pairs, records_cache=records_cache)

    store.append_provenance_batch = slow_batch  # type: ignore[method-assign]

    # Tiny queue so we hit overflow fast.
    q = ProvenanceWriteQueue(store, coalesce_ms=10, max_queue_size=2,
                             max_batch_pairs=1)
    q.start()
    try:
        # Push 5 single-pair batches. The worker will pull the first,
        # block on slow_batch; _q at maxsize=2 fills with two more;
        # the remaining 2 must spill.
        for i in range(5):
            q.enqueue([(r.id, {"ts": f"t{i}", "cue": f"c{i}",
                               "session_id": "sov"})])
        # Give the spill writes a moment to land on disk.
        time.sleep(0.1)
        overflow_dir = tmp_path / ".iai-mcp" / ".provenance-overflow"
        spilled_before_release = list(overflow_dir.glob("*.jsonl"))
        assert len(spilled_before_release) >= 1, (
            f"expected at least 1 spilled file, got {len(spilled_before_release)} "
            f"(overflow dir contents: {list(overflow_dir.iterdir()) if overflow_dir.exists() else 'absent'})"
        )
        # Release the worker — it drains _q first, then on idle ticks
        # picks up the overflow dir and re-enqueues spilled batches.
        flush_release.set()
        # Wait for the queue idle-poll cycle (5s) plus headroom — but
        # the immediate flush() pushes a sentinel that wakes it sooner.
        # We poll until overflow dir is empty OR timeout.
        deadline = time.time() + 12.0
        while time.time() < deadline:
            if not list(overflow_dir.glob("*.jsonl")):
                break
            time.sleep(0.2)
        # Final flush + assertions.
        q.flush(timeout=2.0)
    finally:
        q.stop()

    # All 5 cues reached append_provenance_batch exactly once.
    flushed_cues = [p[1]["cue"] for p in flushed_pairs]
    assert sorted(flushed_cues) == [f"c{i}" for i in range(5)], (
        f"expected all 5 cues flushed exactly once; got {sorted(flushed_cues)}"
    )
    # Spill dir is empty (every file unlinked after re-enqueue + flush).
    overflow_dir = tmp_path / ".iai-mcp" / ".provenance-overflow"
    assert list(overflow_dir.glob("*.jsonl")) == [], (
        f"spill dir should be empty after drain; got {list(overflow_dir.iterdir())}"
    )


def test_overflow_dir_lazy_create(tmp_path, monkeypatch):
    """W1 / the overflow dir is created only on the first spill.
    Cold start with no overload must NOT create it."""
    from iai_mcp.provenance_queue import ProvenanceWriteQueue

    # Build the store BEFORE redirecting HOME so MemoryStore init
    # uses the real keyring + env, then redirect HOME so the
    # overflow dir under HOME points to tmp.
    store = MemoryStore(path=tmp_path / "store")
    r = _make()
    store.insert(r)

    monkeypatch.setenv("HOME", str(tmp_path))

    q = ProvenanceWriteQueue(store, coalesce_ms=50)
    q.start()
    try:
        q.enqueue([(r.id, {"ts": "t", "cue": "c", "session_id": "s"})])
        q.flush(timeout=2.0)
    finally:
        q.stop()

    overflow_dir = tmp_path / ".iai-mcp" / ".provenance-overflow"
    assert not overflow_dir.exists(), (
        "overflow dir must not be created when no spill happens"
    )


def test_overflow_malformed_spill_file_quarantined(tmp_path, monkeypatch):
    """W1 / a malformed spill file is renamed .failed-<ts>.jsonl
    and does NOT block the drain loop."""
    from iai_mcp.provenance_queue import ProvenanceWriteQueue

    # Init store BEFORE HOME redirect (keyring uses real HOME).
    store = MemoryStore(path=tmp_path / "store")

    monkeypatch.setenv("HOME", str(tmp_path))
    overflow_dir = tmp_path / ".iai-mcp" / ".provenance-overflow"
    overflow_dir.mkdir(parents=True)
    bad_file = overflow_dir / "bad.jsonl"
    bad_file.write_text("this is not valid json at all\n")

    q = ProvenanceWriteQueue(store, coalesce_ms=50)
    q.start()
    try:
        # Trigger an idle drain by waiting past the idle-poll boundary
        # (5s WORKER_IDLE_POLL_S + headroom).
        time.sleep(6.5)
    finally:
        q.stop()

    # Malformed file moved to .failed-*.jsonl
    assert not bad_file.exists()
    failed_files = list(overflow_dir.glob("*.failed-*.jsonl"))
    assert len(failed_files) == 1, (
        f"expected 1 failed-quarantined file; got {len(failed_files)} "
        f"(overflow dir contents: {list(overflow_dir.iterdir())})"
    )


def test_queue_disabled_falls_back_to_sync(tmp_path):
    """P6: store.enable_provenance_queue() toggles behaviour — when disabled,
    pipeline_recall falls back to the sync append_provenance_batch path.

    Verify by monkey-patching append_provenance_batch to record calls and
    confirming it was called synchronously (on the caller thread) before
    dispatch returns.
    """
    import threading
    from iai_mcp.core import dispatch

    store = MemoryStore(path=tmp_path)
    r = _make()
    store.insert(r)

    # Queue NOT enabled.
    assert getattr(store, "_provenance_queue", None) is None

    call_threads: list[int] = []
    real_batch = store.append_provenance_batch

    def tracking_batch(pairs, records_cache=None):
        call_threads.append(threading.get_ident())
        return real_batch(pairs, records_cache=records_cache)

    store.append_provenance_batch = tracking_batch  # type: ignore[method-assign]

    main_ident = threading.get_ident()
    dispatch(store, "memory_recall",
             {"cue": "q", "session_id": "s1", "cue_embedding": r.embedding})

    # Batch was called on the main thread (sync fallback).
    assert call_threads, "append_provenance_batch not called in sync fallback"
    assert call_threads[0] == main_ident, (
        f"sync fallback ran on thread {call_threads[0]!r}, expected main {main_ident!r}"
    )

    got = store.get(r.id)
    assert got is not None
    assert len(got.provenance) == 1