"""Phase 07.9 W4 / — pipeline._find_anti_hits defensive UUID parse. Pre-fix: a single malformed src/dst value in the edges table aborts ``_find_anti_hits`` at the inner ``UUID(lid)`` call, which in turn aborts the post-rank stage of ``_recall_core`` for any recall whose top hit is a contradicts-edge endpoint of the corrupted row. One bad edge poisons every recall that touches the contradicting hit until the row is repaired. Post-fix: ``_find_anti_hits`` filters edge rows whose src/dst cannot be parsed as UUID before walking, with structured-log observability per skip; the inner ``UUID(lid)`` is still wrapped defensively for mid- iteration corruption. Anti-hits is an enrichment signal — degrading to "no anti-hits" on corruption is always preferred over crashing. """ from __future__ import annotations import logging from datetime import datetime, timezone from pathlib import Path from uuid import UUID, uuid4 import pytest from iai_mcp.pipeline import _find_anti_hits from iai_mcp.store import MemoryStore from iai_mcp.types import EMBED_DIM, MemoryHit, MemoryRecord # --------------------------------------------------------------------------- fixtures @pytest.fixture(autouse=True) def _isolated_keyring(monkeypatch: pytest.MonkeyPatch): import keyring as _keyring fake: dict[tuple[str, str], str] = {} monkeypatch.setattr(_keyring, "get_password", lambda s, u: fake.get((s, u))) monkeypatch.setattr( _keyring, "set_password", lambda s, u, p: fake.__setitem__((s, u), p) ) monkeypatch.setattr( _keyring, "delete_password", lambda s, u: fake.pop((s, u), None) ) yield fake @pytest.fixture def store(tmp_path: Path) -> MemoryStore: return MemoryStore(path=tmp_path / "lancedb") def _make_record(rid: UUID, surface: str = "topic") -> MemoryRecord: now = datetime.now(timezone.utc) return MemoryRecord( id=rid, tier="episodic", literal_surface=surface, aaak_index="", embedding=[0.1] * EMBED_DIM, community_id=None, centrality=0.0, detail_level=2, pinned=False, stability=0.0, difficulty=0.0, last_reviewed=None, never_decay=False, never_merge=False, provenance=[], created_at=now, updated_at=now, tags=[], language="en", ) def _add_edge_row( store: MemoryStore, *, src: str, dst: str, edge_type: str = "contradicts", weight: float = 1.0, ) -> None: """Direct LanceDB insert for the edges table — used to inject rows that the high-level store APIs would normally validate away.""" tbl = store.db.open_table("edges") tbl.add([{ "src": src, "dst": dst, "edge_type": edge_type, "weight": float(weight), "updated_at": datetime.now(timezone.utc), }]) def _make_hit(rid: UUID, surface: str = "primary topic") -> MemoryHit: return MemoryHit( record_id=rid, score=0.9, reason="test_hit", literal_surface=surface, adjacent_suggestions=[], ) # --------------------------------------------------------------------------- W4 tests def test_malformed_dst_does_not_crash_and_valid_anti_surfaces(store, caplog): """W4 / a contradicts edge with dst='not-a-uuid' is filtered + logged; the valid contradicts edge still surfaces as an anti-hit.""" rid_hit = uuid4() rid_anti = uuid4() store.insert(_make_record(rid_hit, "primary topic")) store.insert(_make_record(rid_anti, "anti topic")) # One valid contradicts edge AND one with malformed dst. _add_edge_row(store, src=str(rid_hit), dst=str(rid_anti), edge_type="contradicts", weight=1.0) _add_edge_row(store, src=str(rid_hit), dst="not-a-uuid", edge_type="contradicts", weight=1.0) # MemoryGraph isn't actually consulted in _find_anti_hits per the # current implementation (it walks the edges table directly), but # the signature requires it. A minimal MemoryGraph satisfies the # type contract. from iai_mcp.graph import MemoryGraph graph = MemoryGraph() hit = _make_hit(rid_hit) with caplog.at_level(logging.WARNING, logger="iai_mcp.pipeline"): anti = _find_anti_hits([hit], store, graph, k=3, records_cache=None) # Recall did NOT crash. The valid anti-hit surfaced. assert len(anti) == 1, ( f"expected 1 valid anti-hit; got {len(anti)} " f"(records: {[h.record_id for h in anti]})" ) assert anti[0].record_id == rid_anti # Log captures the skip event for observability. assert any( "anti_hits_skip_malformed_edge" in r.getMessage() for r in caplog.records ), f"expected log line; got {[r.getMessage() for r in caplog.records]}" def test_malformed_src_filtered_at_upstream_step(store, caplog): """W4 / a contradicts edge with src='not-a-uuid' is also filtered at the upstream pre-walk step. ``linked`` set never sees the bad value and the inner UUID(lid) call is never reached.""" rid_hit = uuid4() rid_anti = uuid4() store.insert(_make_record(rid_hit)) store.insert(_make_record(rid_anti)) # Valid edge + malformed src. _add_edge_row(store, src=str(rid_hit), dst=str(rid_anti), edge_type="contradicts", weight=1.0) _add_edge_row(store, src="zzz-bad-src", dst=str(rid_hit), edge_type="contradicts", weight=1.0) from iai_mcp.graph import MemoryGraph graph = MemoryGraph() hit = _make_hit(rid_hit) with caplog.at_level(logging.WARNING, logger="iai_mcp.pipeline"): anti = _find_anti_hits([hit], store, graph, k=3, records_cache=None) # The valid anti-hit still surfaces. assert len(anti) == 1 assert anti[0].record_id == rid_anti # Upstream filter logged the skip; inner-lid log did NOT fire. assert any( "anti_hits_skip_malformed_edge" in r.getMessage() for r in caplog.records ) assert not any( "anti_hits_skip_malformed_lid" in r.getMessage() for r in caplog.records ), "upstream filter must remove bad rows before the inner UUID(lid) call" def test_no_contradicts_edges_returns_empty_clean(store): """W4 / control: a hit with no contradicts edges still returns [] without crashing. (No regression from the defensive filter on the all-clean path.)""" rid_hit = uuid4() store.insert(_make_record(rid_hit)) from iai_mcp.graph import MemoryGraph graph = MemoryGraph() hit = _make_hit(rid_hit) anti = _find_anti_hits([hit], store, graph, k=3, records_cache=None) assert anti == []