Co-Authored-By: Claude <noreply@anthropic.com> Co-Authored-By: XNLLLLH <XNLLLLH@users.noreply.github.com>
199 lines
6.5 KiB
Python
199 lines
6.5 KiB
Python
"""Phase 07.9 W4 / — pipeline._find_anti_hits defensive UUID parse.
|
|
|
|
Pre-fix: a single malformed src/dst value in the edges table aborts
|
|
``_find_anti_hits`` at the inner ``UUID(lid)`` call, which in turn
|
|
aborts the post-rank stage of ``_recall_core`` for any recall whose
|
|
top hit is a contradicts-edge endpoint of the corrupted row. One bad
|
|
edge poisons every recall that touches the contradicting hit until
|
|
the row is repaired.
|
|
|
|
Post-fix: ``_find_anti_hits`` filters edge rows whose src/dst cannot be
|
|
parsed as UUID before walking, with structured-log observability per
|
|
skip; the inner ``UUID(lid)`` is still wrapped defensively for mid-
|
|
iteration corruption. Anti-hits is an enrichment signal — degrading
|
|
to "no anti-hits" on corruption is always preferred over crashing.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
from datetime import datetime, timezone
|
|
from pathlib import Path
|
|
from uuid import UUID, uuid4
|
|
|
|
import pytest
|
|
|
|
from iai_mcp.pipeline import _find_anti_hits
|
|
from iai_mcp.store import MemoryStore
|
|
from iai_mcp.types import EMBED_DIM, MemoryHit, MemoryRecord
|
|
|
|
|
|
# --------------------------------------------------------------------------- fixtures
|
|
|
|
|
|
@pytest.fixture(autouse=True)
|
|
def _isolated_keyring(monkeypatch: pytest.MonkeyPatch):
|
|
import keyring as _keyring
|
|
|
|
fake: dict[tuple[str, str], str] = {}
|
|
monkeypatch.setattr(_keyring, "get_password", lambda s, u: fake.get((s, u)))
|
|
monkeypatch.setattr(
|
|
_keyring, "set_password", lambda s, u, p: fake.__setitem__((s, u), p)
|
|
)
|
|
monkeypatch.setattr(
|
|
_keyring, "delete_password", lambda s, u: fake.pop((s, u), None)
|
|
)
|
|
yield fake
|
|
|
|
|
|
@pytest.fixture
|
|
def store(tmp_path: Path) -> MemoryStore:
|
|
return MemoryStore(path=tmp_path / "lancedb")
|
|
|
|
|
|
def _make_record(rid: UUID, surface: str = "topic") -> MemoryRecord:
|
|
now = datetime.now(timezone.utc)
|
|
return MemoryRecord(
|
|
id=rid,
|
|
tier="episodic",
|
|
literal_surface=surface,
|
|
aaak_index="",
|
|
embedding=[0.1] * EMBED_DIM,
|
|
community_id=None,
|
|
centrality=0.0,
|
|
detail_level=2,
|
|
pinned=False,
|
|
stability=0.0,
|
|
difficulty=0.0,
|
|
last_reviewed=None,
|
|
never_decay=False,
|
|
never_merge=False,
|
|
provenance=[],
|
|
created_at=now,
|
|
updated_at=now,
|
|
tags=[],
|
|
language="en",
|
|
)
|
|
|
|
|
|
def _add_edge_row(
|
|
store: MemoryStore,
|
|
*,
|
|
src: str,
|
|
dst: str,
|
|
edge_type: str = "contradicts",
|
|
weight: float = 1.0,
|
|
) -> None:
|
|
"""Direct LanceDB insert for the edges table — used to inject rows
|
|
that the high-level store APIs would normally validate away."""
|
|
tbl = store.db.open_table("edges")
|
|
tbl.add([{
|
|
"src": src,
|
|
"dst": dst,
|
|
"edge_type": edge_type,
|
|
"weight": float(weight),
|
|
"updated_at": datetime.now(timezone.utc),
|
|
}])
|
|
|
|
|
|
def _make_hit(rid: UUID, surface: str = "primary topic") -> MemoryHit:
|
|
return MemoryHit(
|
|
record_id=rid,
|
|
score=0.9,
|
|
reason="test_hit",
|
|
literal_surface=surface,
|
|
adjacent_suggestions=[],
|
|
)
|
|
|
|
|
|
# --------------------------------------------------------------------------- W4 tests
|
|
|
|
|
|
def test_malformed_dst_does_not_crash_and_valid_anti_surfaces(store, caplog):
|
|
"""W4 / a contradicts edge with dst='not-a-uuid' is filtered
|
|
+ logged; the valid contradicts edge still surfaces as an anti-hit."""
|
|
rid_hit = uuid4()
|
|
rid_anti = uuid4()
|
|
store.insert(_make_record(rid_hit, "primary topic"))
|
|
store.insert(_make_record(rid_anti, "anti topic"))
|
|
|
|
# One valid contradicts edge AND one with malformed dst.
|
|
_add_edge_row(store, src=str(rid_hit), dst=str(rid_anti),
|
|
edge_type="contradicts", weight=1.0)
|
|
_add_edge_row(store, src=str(rid_hit), dst="not-a-uuid",
|
|
edge_type="contradicts", weight=1.0)
|
|
|
|
# MemoryGraph isn't actually consulted in _find_anti_hits per the
|
|
# current implementation (it walks the edges table directly), but
|
|
# the signature requires it. A minimal MemoryGraph satisfies the
|
|
# type contract.
|
|
from iai_mcp.graph import MemoryGraph
|
|
graph = MemoryGraph()
|
|
|
|
hit = _make_hit(rid_hit)
|
|
|
|
with caplog.at_level(logging.WARNING, logger="iai_mcp.pipeline"):
|
|
anti = _find_anti_hits([hit], store, graph, k=3, records_cache=None)
|
|
|
|
# Recall did NOT crash. The valid anti-hit surfaced.
|
|
assert len(anti) == 1, (
|
|
f"expected 1 valid anti-hit; got {len(anti)} "
|
|
f"(records: {[h.record_id for h in anti]})"
|
|
)
|
|
assert anti[0].record_id == rid_anti
|
|
|
|
# Log captures the skip event for observability.
|
|
assert any(
|
|
"anti_hits_skip_malformed_edge" in r.getMessage()
|
|
for r in caplog.records
|
|
), f"expected log line; got {[r.getMessage() for r in caplog.records]}"
|
|
|
|
|
|
def test_malformed_src_filtered_at_upstream_step(store, caplog):
|
|
"""W4 / a contradicts edge with src='not-a-uuid' is also
|
|
filtered at the upstream pre-walk step. ``linked`` set never
|
|
sees the bad value and the inner UUID(lid) call is never reached."""
|
|
rid_hit = uuid4()
|
|
rid_anti = uuid4()
|
|
store.insert(_make_record(rid_hit))
|
|
store.insert(_make_record(rid_anti))
|
|
|
|
# Valid edge + malformed src.
|
|
_add_edge_row(store, src=str(rid_hit), dst=str(rid_anti),
|
|
edge_type="contradicts", weight=1.0)
|
|
_add_edge_row(store, src="zzz-bad-src", dst=str(rid_hit),
|
|
edge_type="contradicts", weight=1.0)
|
|
|
|
from iai_mcp.graph import MemoryGraph
|
|
graph = MemoryGraph()
|
|
hit = _make_hit(rid_hit)
|
|
|
|
with caplog.at_level(logging.WARNING, logger="iai_mcp.pipeline"):
|
|
anti = _find_anti_hits([hit], store, graph, k=3, records_cache=None)
|
|
|
|
# The valid anti-hit still surfaces.
|
|
assert len(anti) == 1
|
|
assert anti[0].record_id == rid_anti
|
|
# Upstream filter logged the skip; inner-lid log did NOT fire.
|
|
assert any(
|
|
"anti_hits_skip_malformed_edge" in r.getMessage()
|
|
for r in caplog.records
|
|
)
|
|
assert not any(
|
|
"anti_hits_skip_malformed_lid" in r.getMessage()
|
|
for r in caplog.records
|
|
), "upstream filter must remove bad rows before the inner UUID(lid) call"
|
|
|
|
|
|
def test_no_contradicts_edges_returns_empty_clean(store):
|
|
"""W4 / control: a hit with no contradicts edges still
|
|
returns [] without crashing. (No regression from the defensive
|
|
filter on the all-clean path.)"""
|
|
rid_hit = uuid4()
|
|
store.insert(_make_record(rid_hit))
|
|
|
|
from iai_mcp.graph import MemoryGraph
|
|
graph = MemoryGraph()
|
|
hit = _make_hit(rid_hit)
|
|
|
|
anti = _find_anti_hits([hit], store, graph, k=3, records_cache=None)
|
|
assert anti == []
|