"""Plan 05-14 — async provenance write queue (OPS-10 / M-02). Moves provenance writes off the recall critical path via a daemon-thread queue so pipeline_recall returns before append_provenance_batch runs. All 6 tests below MUST FAIL on first run (RED) — the module `iai_mcp.provenance_queue` and the `MemoryStore.queue_provenance_batch` entry point do not exist yet. Constitutional fence: - preserved (every recall still appends a provenance entry; writes are async but not dropped). - Rule 1: provenance-write failure never blocks recall. - C3/C6: no external deps, pure stdlib. """ from __future__ import annotations import time from uuid import UUID import pytest from iai_mcp.store import MemoryStore from tests.test_store import _make # --------------------------------------------------------------------- P1, P2, P5 def test_enqueue_fast(tmp_path): """P1: ProvenanceWriteQueue.enqueue returns in <= 2ms even when worker is slowed. We artificially slow the underlying store.append_provenance_batch so that each flush takes 200ms; enqueue must NOT wait for it. """ from iai_mcp.provenance_queue import ProvenanceWriteQueue store = MemoryStore(path=tmp_path) r = _make() store.insert(r) # Wrap append_provenance_batch to be slow. real_batch = store.append_provenance_batch def slow_batch(pairs, records_cache=None): time.sleep(0.2) return real_batch(pairs, records_cache=records_cache) store.append_provenance_batch = slow_batch # type: ignore[method-assign] q = ProvenanceWriteQueue(store, coalesce_ms=50) q.start() try: t0 = time.perf_counter() q.enqueue([(r.id, {"ts": "x", "cue": "c", "session_id": "s"})]) elapsed_ms = (time.perf_counter() - t0) * 1000.0 assert elapsed_ms <= 20.0, f"enqueue took {elapsed_ms:.1f}ms (target <=2ms, headroom <=20ms)" finally: q.stop() def test_flush_drains(tmp_path): """P2: worker drains all pending pairs within 500ms after .flush().""" from iai_mcp.provenance_queue import ProvenanceWriteQueue store = MemoryStore(path=tmp_path) r = _make() store.insert(r) q = ProvenanceWriteQueue(store, coalesce_ms=50) q.start() try: for i in range(10): q.enqueue([(r.id, {"ts": f"t{i}", "cue": f"c{i}", "session_id": "s"})]) t0 = time.perf_counter() q.flush(timeout=2.0) elapsed_ms = (time.perf_counter() - t0) * 1000.0 assert elapsed_ms <= 500.0, f"flush took {elapsed_ms:.1f}ms (target <=500ms)" finally: q.stop() # All 10 entries should now be durable. got = store.get(r.id) assert got is not None assert len(got.provenance) == 10 def test_atexit_flush(tmp_path, monkeypatch): """P5: atexit hook flushes the queue on interpreter shutdown. We simulate by registering a queue, capturing the atexit handler it installs, calling it manually, and verifying the store is consistent afterward. """ import atexit as _atexit from iai_mcp.provenance_queue import ProvenanceWriteQueue captured: list = [] def _fake_register(fn, *a, **kw): captured.append(fn) return fn monkeypatch.setattr(_atexit, "register", _fake_register) store = MemoryStore(path=tmp_path) r = _make() store.insert(r) q = ProvenanceWriteQueue(store, coalesce_ms=50) q.start() q.enqueue([(r.id, {"ts": "t", "cue": "c", "session_id": "s"})]) # The atexit handler should have been registered during start(). assert captured, "ProvenanceWriteQueue.start() must register atexit flush" # Invoke the registered handler — it must drain + not raise. captured[0]() # After the handler runs, the provenance entry must be durable. got = store.get(r.id) assert got is not None assert len(got.provenance) == 1 q.stop() # ---------------------------------------------------------------------- P3, P4, P6 def test_pipeline_recall_does_not_block_on_merge_insert(tmp_path): """P3: pipeline_recall latency does NOT include merge_insert when queue is enabled. Setup: make append_provenance_batch artificially slow (150ms). With the queue enabled, pipeline_recall should return well under 100ms (the write is handed off). Without the queue it would be >=150ms. """ from iai_mcp.core import dispatch store = MemoryStore(path=tmp_path) r = _make() store.insert(r) # Warm call first — initialises embedders, opens LanceDB tables, etc. # so the timed call below only measures the hot path. dispatch( store, "memory_recall", {"cue": "warmup", "session_id": "s0", "cue_embedding": r.embedding}, ) # Enable the provenance queue. store.enable_provenance_queue(coalesce_ms=50) try: # Slow the actual batch write. real_batch = store.append_provenance_batch def slow_batch(pairs, records_cache=None): time.sleep(0.5) # 500ms slow write return real_batch(pairs, records_cache=records_cache) store.append_provenance_batch = slow_batch # type: ignore[method-assign] t0 = time.perf_counter() dispatch( store, "memory_recall", {"cue": "q", "session_id": "s1", "cue_embedding": r.embedding}, ) elapsed_ms = (time.perf_counter() - t0) * 1000.0 # Target: the 500ms slow write is off-path; the queue hands off so # pipeline_recall returns well before the write completes. We give # a very generous budget (400ms) to accommodate CI-hardware jitter # while still proving the write is NOT inline (inline would be # >= 500ms). assert elapsed_ms < 400.0, ( f"pipeline_recall blocked on merge_insert: {elapsed_ms:.1f}ms " f"(queue should hand off; target <400ms given 500ms slow write)" ) finally: store.disable_provenance_queue() def test_mem05_preserved_after_drain(tmp_path): """P4: after flush, store reflects all enqueued provenance entries in insertion order.""" from iai_mcp.core import dispatch store = MemoryStore(path=tmp_path) r = _make() store.insert(r) store.enable_provenance_queue(coalesce_ms=50) try: dispatch(store, "memory_recall", {"cue": "first", "session_id": "s1", "cue_embedding": r.embedding}) dispatch(store, "memory_recall", {"cue": "second", "session_id": "s2", "cue_embedding": r.embedding}) dispatch(store, "memory_recall", {"cue": "third", "session_id": "s3", "cue_embedding": r.embedding}) # Drain. store._provenance_queue.flush(timeout=2.0) # type: ignore[attr-defined] finally: store.disable_provenance_queue() got = store.get(r.id) assert got is not None assert len(got.provenance) == 3 cues = [p["cue"] for p in got.provenance] assert cues == ["first", "second", "third"], f"order violated: {cues}" def test_overflow_spill_round_trip(tmp_path, monkeypatch): """W1 / when _q is full, batches spill to ~/.iai-mcp/.provenance-overflow/ instead of dropping. The worker re-enqueues spilled batches on idle. holds under overload.""" import threading from iai_mcp.provenance_queue import ProvenanceWriteQueue # Init store BEFORE HOME redirect (keyring uses real HOME). store = MemoryStore(path=tmp_path / "store") r = _make() store.insert(r) monkeypatch.setenv("HOME", str(tmp_path)) # Throttle the worker's batch flush so _q fills up. flushed_pairs: list = [] flush_release = threading.Event() flush_release.clear() real_batch = store.append_provenance_batch def slow_batch(pairs, records_cache=None): # Block until the test releases; then call the real batch. flush_release.wait(timeout=10.0) flushed_pairs.extend(pairs) return real_batch(pairs, records_cache=records_cache) store.append_provenance_batch = slow_batch # type: ignore[method-assign] # Tiny queue so we hit overflow fast. q = ProvenanceWriteQueue(store, coalesce_ms=10, max_queue_size=2, max_batch_pairs=1) q.start() try: # Push 5 single-pair batches. The worker will pull the first, # block on slow_batch; _q at maxsize=2 fills with two more; # the remaining 2 must spill. for i in range(5): q.enqueue([(r.id, {"ts": f"t{i}", "cue": f"c{i}", "session_id": "sov"})]) # Give the spill writes a moment to land on disk. time.sleep(0.1) overflow_dir = tmp_path / ".iai-mcp" / ".provenance-overflow" spilled_before_release = list(overflow_dir.glob("*.jsonl")) assert len(spilled_before_release) >= 1, ( f"expected at least 1 spilled file, got {len(spilled_before_release)} " f"(overflow dir contents: {list(overflow_dir.iterdir()) if overflow_dir.exists() else 'absent'})" ) # Release the worker — it drains _q first, then on idle ticks # picks up the overflow dir and re-enqueues spilled batches. flush_release.set() # Wait for the queue idle-poll cycle (5s) plus headroom — but # the immediate flush() pushes a sentinel that wakes it sooner. # We poll until overflow dir is empty OR timeout. deadline = time.time() + 12.0 while time.time() < deadline: if not list(overflow_dir.glob("*.jsonl")): break time.sleep(0.2) # Final flush + assertions. q.flush(timeout=2.0) finally: q.stop() # All 5 cues reached append_provenance_batch exactly once. flushed_cues = [p[1]["cue"] for p in flushed_pairs] assert sorted(flushed_cues) == [f"c{i}" for i in range(5)], ( f"expected all 5 cues flushed exactly once; got {sorted(flushed_cues)}" ) # Spill dir is empty (every file unlinked after re-enqueue + flush). overflow_dir = tmp_path / ".iai-mcp" / ".provenance-overflow" assert list(overflow_dir.glob("*.jsonl")) == [], ( f"spill dir should be empty after drain; got {list(overflow_dir.iterdir())}" ) def test_overflow_dir_lazy_create(tmp_path, monkeypatch): """W1 / the overflow dir is created only on the first spill. Cold start with no overload must NOT create it.""" from iai_mcp.provenance_queue import ProvenanceWriteQueue # Build the store BEFORE redirecting HOME so MemoryStore init # uses the real keyring + env, then redirect HOME so the # overflow dir under HOME points to tmp. store = MemoryStore(path=tmp_path / "store") r = _make() store.insert(r) monkeypatch.setenv("HOME", str(tmp_path)) q = ProvenanceWriteQueue(store, coalesce_ms=50) q.start() try: q.enqueue([(r.id, {"ts": "t", "cue": "c", "session_id": "s"})]) q.flush(timeout=2.0) finally: q.stop() overflow_dir = tmp_path / ".iai-mcp" / ".provenance-overflow" assert not overflow_dir.exists(), ( "overflow dir must not be created when no spill happens" ) def test_overflow_malformed_spill_file_quarantined(tmp_path, monkeypatch): """W1 / a malformed spill file is renamed .failed-.jsonl and does NOT block the drain loop.""" from iai_mcp.provenance_queue import ProvenanceWriteQueue # Init store BEFORE HOME redirect (keyring uses real HOME). store = MemoryStore(path=tmp_path / "store") monkeypatch.setenv("HOME", str(tmp_path)) overflow_dir = tmp_path / ".iai-mcp" / ".provenance-overflow" overflow_dir.mkdir(parents=True) bad_file = overflow_dir / "bad.jsonl" bad_file.write_text("this is not valid json at all\n") q = ProvenanceWriteQueue(store, coalesce_ms=50) q.start() try: # Trigger an idle drain by waiting past the idle-poll boundary # (5s WORKER_IDLE_POLL_S + headroom). time.sleep(6.5) finally: q.stop() # Malformed file moved to .failed-*.jsonl assert not bad_file.exists() failed_files = list(overflow_dir.glob("*.failed-*.jsonl")) assert len(failed_files) == 1, ( f"expected 1 failed-quarantined file; got {len(failed_files)} " f"(overflow dir contents: {list(overflow_dir.iterdir())})" ) def test_queue_disabled_falls_back_to_sync(tmp_path): """P6: store.enable_provenance_queue() toggles behaviour — when disabled, pipeline_recall falls back to the sync append_provenance_batch path. Verify by monkey-patching append_provenance_batch to record calls and confirming it was called synchronously (on the caller thread) before dispatch returns. """ import threading from iai_mcp.core import dispatch store = MemoryStore(path=tmp_path) r = _make() store.insert(r) # Queue NOT enabled. assert getattr(store, "_provenance_queue", None) is None call_threads: list[int] = [] real_batch = store.append_provenance_batch def tracking_batch(pairs, records_cache=None): call_threads.append(threading.get_ident()) return real_batch(pairs, records_cache=records_cache) store.append_provenance_batch = tracking_batch # type: ignore[method-assign] main_ident = threading.get_ident() dispatch(store, "memory_recall", {"cue": "q", "session_id": "s1", "cue_embedding": r.embedding}) # Batch was called on the main thread (sync fallback). assert call_threads, "append_provenance_batch not called in sync fallback" assert call_threads[0] == main_ident, ( f"sync fallback ran on thread {call_threads[0]!r}, expected main {main_ident!r}" ) got = store.get(r.id) assert got is not None assert len(got.provenance) == 1