Initial release: iai-mcp v0.1.0

Co-Authored-By: Claude <noreply@anthropic.com> Co-Authored-By: XNLLLLH <XNLLLLH@users.noreply.github.com>
2026-05-06 01:04:47 -07:00 · 2026-05-06 01:04:47 -07:00 · f6b876fbe7
commit f6b876fbe7
332 changed files with 97258 additions and 0 deletions
--- a/bench/contradiction_longitudinal.py
+++ b/bench/contradiction_longitudinal.py
@ -0,0 +1,80 @@
+#!/usr/bin/env python3
+"""Contradiction-longitudinal falsifiability bench (skeleton + pre-registered criteria).
+
+**Do not run on the construction host by default** — this module is meant for a
+dedicated bench machine with an isolated ``IAI_MCP_STORE`` and optional GPU.
+
+Pre-registered pass criteria:
+- **Metric B (post-flip):** cues issued after session ``t_0`` (contradiction +
+  consolidation window simulated) must rank the *current* winning fact above
+  flat cosine-only retrieval on the same store slice.
+- **Metric A (historical verbatim):** probes asking for superseded wording must
+  still surface the archived surface (verbatim MEM-06), not the post-flip fact alone.
+- **Regression gate:** pipeline score on B must beat cosine baseline; A must not
+  collapse below a configured verbatim hit threshold.
+
+This file loads :file:`fixtures/contradiction_longitudinal.jsonl` (synthetic JSONL
+rows: ``session``, ``text``, optional ``probe`` / ``expects``) and documents the
+evaluation harness contract. A full implementation wires:
+
+1. Fixture loader → ``MemoryStore`` inserts per session order.
+2. Explicit ``memory_contradict`` (or edge-equivalent) at ``t_0``.
+3. Optional sleep/consolidation tick simulation (bench-only knobs).
+4. Two eval slices: ``pre_flip_cues`` vs ``post_flip_cues`` with separated metrics.
+
+Exit code 0 only when all gates pass; non-zero on any failure. Until the harness
+is completed, ``main()`` prints the criteria and exits with code 2 to avoid a
+silent green run::
+
+    python bench/contradiction_longitudinal.py --fixture bench/fixtures/contradiction_longitudinal.jsonl
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import sys
+from pathlib import Path
+
+
+def load_rows(path: Path) -> list[dict]:
+    rows: list[dict] = []
+    with path.open(encoding="utf-8") as f:
+        for line in f:
+            line = line.strip()
+            if not line:
+                continue
+            rows.append(json.loads(line))
+    return rows
+
+
+def main(argv: list[str] | None = None) -> int:
+    parser = argparse.ArgumentParser(description=__doc__.split("\n\n")[0])
+    parser.add_argument(
+        "--fixture",
+        type=Path,
+        default=Path(__file__).resolve().parent / "fixtures" / "contradiction_longitudinal.jsonl",
+    )
+    args = parser.parse_args(argv)
+    rows = load_rows(args.fixture)
+    print(
+        json.dumps(
+            {
+                "loaded_rows": len(rows),
+                "fixture": str(args.fixture),
+                "status": "harness_stub",
+                "criteria": [
+                    "B: post-flip cues — pipeline beats flat cosine",
+                    "A: historical verbatim probes — superseded text still retrievable",
+                    "No regression: B gain without A collapse",
+                ],
+            },
+            indent=2,
+        )
+    )
+    # Stub: full eval is intentionally absent so CI never runs heavy retrieval.
+    return 2
+
+
+if __name__ == "__main__":
+    sys.exit(main())