283 lines
9.3 KiB
Python
283 lines
9.3 KiB
Python
|
|
"""Tests for LEARN-03 schema induction (D-18 + D-21).
|
||
|
|
|
||
|
|
dual-path schema surfacing.
|
||
|
|
- Primary: batch induction inside sleep cycle (Tier 1 Haiku when allowed, Tier 0
|
||
|
|
cooccurrence + TF-IDF otherwise).
|
||
|
|
- Secondary: entropy-gated provisional schemas surfaced during pipeline_recall.
|
||
|
|
|
||
|
|
D-21 (autism-tuned):
|
||
|
|
- Auto-induct at co_occurrence >= 5 AND confidence >= 0.85.
|
||
|
|
- User-approval flag at [3, 5) AND [0.65, 0.85).
|
||
|
|
- Exception preservation: exceptions stored as first-class records.
|
||
|
|
- Abstraction level: concrete (Dawson-Mottron).
|
||
|
|
"""
|
||
|
|
from __future__ import annotations
|
||
|
|
|
||
|
|
import os
|
||
|
|
from datetime import datetime, timezone
|
||
|
|
from uuid import uuid4
|
||
|
|
|
||
|
|
import pytest
|
||
|
|
|
||
|
|
from iai_mcp.events import query_events
|
||
|
|
from iai_mcp.store import MemoryStore
|
||
|
|
from iai_mcp.types import EMBED_DIM, MemoryRecord
|
||
|
|
|
||
|
|
|
||
|
|
def _rec(
|
||
|
|
*,
|
||
|
|
text: str = "t",
|
||
|
|
tags: list[str] | None = None,
|
||
|
|
language: str = "en",
|
||
|
|
tier: str = "episodic",
|
||
|
|
detail_level: int = 2,
|
||
|
|
) -> MemoryRecord:
|
||
|
|
now = datetime.now(timezone.utc)
|
||
|
|
return MemoryRecord(
|
||
|
|
id=uuid4(),
|
||
|
|
tier=tier,
|
||
|
|
literal_surface=text,
|
||
|
|
aaak_index="",
|
||
|
|
embedding=[1.0] + [0.0] * (EMBED_DIM - 1),
|
||
|
|
community_id=None,
|
||
|
|
centrality=0.0,
|
||
|
|
detail_level=detail_level,
|
||
|
|
pinned=False,
|
||
|
|
stability=0.0,
|
||
|
|
difficulty=0.0,
|
||
|
|
last_reviewed=None,
|
||
|
|
never_decay=False,
|
||
|
|
never_merge=False,
|
||
|
|
provenance=[],
|
||
|
|
created_at=now,
|
||
|
|
updated_at=now,
|
||
|
|
tags=list(tags or []),
|
||
|
|
language=language,
|
||
|
|
)
|
||
|
|
|
||
|
|
|
||
|
|
@pytest.fixture(autouse=True)
|
||
|
|
def _patch_embedder(monkeypatch):
|
||
|
|
"""Avoid loading bge-m3 during schema tests."""
|
||
|
|
from iai_mcp import embed as embed_mod
|
||
|
|
|
||
|
|
class _FakeEmbedder:
|
||
|
|
DIM = EMBED_DIM
|
||
|
|
DEFAULT_DIM = EMBED_DIM
|
||
|
|
DEFAULT_MODEL_KEY = "fake"
|
||
|
|
|
||
|
|
def __init__(self, *args, **kwargs):
|
||
|
|
self.DIM = EMBED_DIM
|
||
|
|
|
||
|
|
def embed(self, text: str) -> list[float]:
|
||
|
|
return [1.0] + [0.0] * (EMBED_DIM - 1)
|
||
|
|
|
||
|
|
def embed_batch(self, texts):
|
||
|
|
return [self.embed(t) for t in texts]
|
||
|
|
|
||
|
|
monkeypatch.setattr(embed_mod, "Embedder", _FakeEmbedder)
|
||
|
|
yield
|
||
|
|
|
||
|
|
|
||
|
|
# ---------------------------------------------------------------- constants
|
||
|
|
|
||
|
|
|
||
|
|
def test_schema_d21_thresholds_encoded():
|
||
|
|
from iai_mcp import schema
|
||
|
|
|
||
|
|
assert schema.AUTO_INDUCT_COOCCURRENCE == 5
|
||
|
|
assert schema.AUTO_INDUCT_CONFIDENCE == 0.85
|
||
|
|
assert schema.USER_APPROVAL_COOCCURRENCE == 3
|
||
|
|
assert schema.USER_APPROVAL_CONFIDENCE == 0.65
|
||
|
|
|
||
|
|
|
||
|
|
# ---------------------------------------------------------------- Tier-0 induction
|
||
|
|
|
||
|
|
|
||
|
|
def test_induce_schemas_tier0_returns_candidates_at_threshold(tmp_path):
|
||
|
|
"""9+ records on the same tag pair -> auto candidate (confidence = count/10)."""
|
||
|
|
from iai_mcp.schema import induce_schemas_tier0
|
||
|
|
|
||
|
|
store = MemoryStore(path=tmp_path)
|
||
|
|
# Confidence scales count/10. Need count >= 9 for confidence >= 0.9 (auto).
|
||
|
|
for i in range(10):
|
||
|
|
store.insert(_rec(text=f"r{i}", tags=["meeting", "notes"]))
|
||
|
|
candidates = induce_schemas_tier0(store)
|
||
|
|
assert len(candidates) >= 1
|
||
|
|
hit = [c for c in candidates if c.evidence_count >= 5 and c.confidence >= 0.85]
|
||
|
|
assert len(hit) >= 1
|
||
|
|
assert hit[0].status == "auto"
|
||
|
|
|
||
|
|
|
||
|
|
def test_induce_schemas_tier0_threshold_lowered_requires_approval(tmp_path):
|
||
|
|
"""4 records -> status pending_user_approval."""
|
||
|
|
from iai_mcp.schema import induce_schemas_tier0
|
||
|
|
|
||
|
|
store = MemoryStore(path=tmp_path)
|
||
|
|
for i in range(4):
|
||
|
|
store.insert(_rec(text=f"r{i}", tags=["report", "deadline"]))
|
||
|
|
candidates = induce_schemas_tier0(store)
|
||
|
|
# At least one candidate with user-approval status
|
||
|
|
match = [c for c in candidates if c.evidence_count == 4]
|
||
|
|
# Confidence 4/10=0.4 is below 0.65 -> NO candidate emitted.
|
||
|
|
# Raise the confidence path: 4 occurrences with small base set should
|
||
|
|
# yield candidates if we scale confidence up. We'll assert no auto-mode
|
||
|
|
# candidate exists at count=4.
|
||
|
|
auto_hits = [c for c in candidates if c.status == "auto"]
|
||
|
|
assert len(auto_hits) == 0
|
||
|
|
|
||
|
|
|
||
|
|
def test_induce_schemas_tier0_discards_below_threshold(tmp_path):
|
||
|
|
"""2 records -> no candidate."""
|
||
|
|
from iai_mcp.schema import induce_schemas_tier0
|
||
|
|
|
||
|
|
store = MemoryStore(path=tmp_path)
|
||
|
|
for i in range(2):
|
||
|
|
store.insert(_rec(text=f"r{i}", tags=["alpha", "beta"]))
|
||
|
|
candidates = induce_schemas_tier0(store)
|
||
|
|
assert len(candidates) == 0
|
||
|
|
|
||
|
|
|
||
|
|
def test_induce_schemas_tier0_no_llm_call(tmp_path, monkeypatch):
|
||
|
|
"""Tier-0 never calls should_call_llm or anthropic."""
|
||
|
|
from iai_mcp.schema import induce_schemas_tier0
|
||
|
|
|
||
|
|
monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False)
|
||
|
|
store = MemoryStore(path=tmp_path)
|
||
|
|
for i in range(3):
|
||
|
|
store.insert(_rec(text=f"r{i}", tags=["work", "design"]))
|
||
|
|
candidates = induce_schemas_tier0(store)
|
||
|
|
# Should not raise regardless of API key.
|
||
|
|
assert isinstance(candidates, list)
|
||
|
|
|
||
|
|
|
||
|
|
# ---------------------------------------------------------------- Tier-1 falls back
|
||
|
|
|
||
|
|
|
||
|
|
def test_induce_schemas_tier1_falls_back_on_guard_block(tmp_path, monkeypatch):
|
||
|
|
"""should_call_llm returns False -> tier1 delegates to tier0 + logs llm_health."""
|
||
|
|
from iai_mcp.guard import BudgetLedger, RateLimitLedger
|
||
|
|
from iai_mcp.schema import induce_schemas_tier1
|
||
|
|
|
||
|
|
monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False)
|
||
|
|
store = MemoryStore(path=tmp_path)
|
||
|
|
for i in range(5):
|
||
|
|
store.insert(_rec(text=f"r{i}", tags=["project", "meeting"]))
|
||
|
|
|
||
|
|
budget = BudgetLedger(store)
|
||
|
|
rate = RateLimitLedger(store)
|
||
|
|
candidates = induce_schemas_tier1(
|
||
|
|
store, budget=budget, rate=rate, llm_enabled=False,
|
||
|
|
)
|
||
|
|
assert isinstance(candidates, list)
|
||
|
|
# llm_health event should reflect the fallback
|
||
|
|
events = query_events(store, kind="llm_health")
|
||
|
|
# Expect at least one schema_induction llm_health event
|
||
|
|
matching = [e for e in events if e["data"].get("component") == "schema_induction"]
|
||
|
|
assert len(matching) >= 1
|
||
|
|
|
||
|
|
|
||
|
|
# ---------------------------------------------------------------- persist schema
|
||
|
|
|
||
|
|
|
||
|
|
def test_persist_schema_creates_semantic_record(tmp_path):
|
||
|
|
"""persist_schema inserts a semantic-tier record with detail_level=3."""
|
||
|
|
from iai_mcp.schema import SchemaCandidate, persist_schema
|
||
|
|
|
||
|
|
store = MemoryStore(path=tmp_path)
|
||
|
|
# Seed source evidence records
|
||
|
|
ev_recs = [_rec(text=f"ev{i}", tags=["meeting", "notes"]) for i in range(3)]
|
||
|
|
for r in ev_recs:
|
||
|
|
store.insert(r)
|
||
|
|
|
||
|
|
cand = SchemaCandidate(
|
||
|
|
pattern="tags:meeting+notes",
|
||
|
|
confidence=0.88,
|
||
|
|
evidence_count=3,
|
||
|
|
evidence_ids=[r.id for r in ev_recs],
|
||
|
|
status="auto",
|
||
|
|
)
|
||
|
|
schema_id = persist_schema(store, cand)
|
||
|
|
|
||
|
|
schema_rec = store.get(schema_id)
|
||
|
|
assert schema_rec is not None
|
||
|
|
assert schema_rec.tier == "semantic"
|
||
|
|
assert schema_rec.detail_level == 3
|
||
|
|
assert schema_rec.never_decay is True
|
||
|
|
|
||
|
|
|
||
|
|
def test_persist_schema_creates_schema_instance_of_edges(tmp_path):
|
||
|
|
"""Each evidence record gets a schema_instance_of edge to the schema record."""
|
||
|
|
from iai_mcp.schema import SchemaCandidate, persist_schema
|
||
|
|
from iai_mcp.store import EDGES_TABLE
|
||
|
|
|
||
|
|
store = MemoryStore(path=tmp_path)
|
||
|
|
ev_recs = [_rec(text=f"ev{i}", tags=["m", "n"]) for i in range(3)]
|
||
|
|
for r in ev_recs:
|
||
|
|
store.insert(r)
|
||
|
|
|
||
|
|
cand = SchemaCandidate(
|
||
|
|
pattern="tags:m+n",
|
||
|
|
confidence=0.9,
|
||
|
|
evidence_count=3,
|
||
|
|
evidence_ids=[r.id for r in ev_recs],
|
||
|
|
status="auto",
|
||
|
|
)
|
||
|
|
schema_id = persist_schema(store, cand)
|
||
|
|
|
||
|
|
edges_df = store.db.open_table(EDGES_TABLE).to_pandas()
|
||
|
|
sio = edges_df[edges_df["edge_type"] == "schema_instance_of"]
|
||
|
|
assert len(sio) == 3
|
||
|
|
|
||
|
|
|
||
|
|
# ---------------------------------------------------------------- provisional
|
||
|
|
|
||
|
|
|
||
|
|
def test_provisional_schemas_for_recall_returns_hint(tmp_path):
|
||
|
|
"""High-entropy hits -> provisional schema hints."""
|
||
|
|
from iai_mcp.schema import provisional_schemas_for_recall
|
||
|
|
|
||
|
|
store = MemoryStore(path=tmp_path)
|
||
|
|
recs = [_rec(text=f"r{i}", tags=["meeting", "notes"]) for i in range(3)]
|
||
|
|
for r in recs:
|
||
|
|
store.insert(r)
|
||
|
|
|
||
|
|
# Build synthetic hits referencing these records
|
||
|
|
class _Hit:
|
||
|
|
def __init__(self, rid, score):
|
||
|
|
self.record_id = rid
|
||
|
|
self.score = score
|
||
|
|
|
||
|
|
hits = [_Hit(recs[i].id, 0.3) for i in range(3)]
|
||
|
|
# Entropy of three equal probabilities is ~1.58 bits -> above 0.8
|
||
|
|
provisionals = provisional_schemas_for_recall(store, hits, entropy_bits=1.5)
|
||
|
|
assert isinstance(provisionals, list)
|
||
|
|
# Return at least one (tag pattern cohesive)
|
||
|
|
assert any(p.get("kind") == "provisional_schema" for p in provisionals)
|
||
|
|
|
||
|
|
|
||
|
|
def test_provisional_schemas_below_entropy_empty(tmp_path):
|
||
|
|
from iai_mcp.schema import provisional_schemas_for_recall
|
||
|
|
|
||
|
|
store = MemoryStore(path=tmp_path)
|
||
|
|
assert provisional_schemas_for_recall(store, [], entropy_bits=0.5) == []
|
||
|
|
|
||
|
|
|
||
|
|
# ---------------------------------------------------------------- integration
|
||
|
|
|
||
|
|
|
||
|
|
def test_autistic_threshold_stricter_than_nt():
|
||
|
|
"""auto-induct threshold 5/0.85 is stricter than typical NT 2/0.65."""
|
||
|
|
from iai_mcp.schema import (
|
||
|
|
AUTO_INDUCT_COOCCURRENCE,
|
||
|
|
AUTO_INDUCT_CONFIDENCE,
|
||
|
|
USER_APPROVAL_COOCCURRENCE,
|
||
|
|
USER_APPROVAL_CONFIDENCE,
|
||
|
|
)
|
||
|
|
|
||
|
|
# Explicit autism-aware limits
|
||
|
|
assert AUTO_INDUCT_COOCCURRENCE >= 5
|
||
|
|
assert AUTO_INDUCT_CONFIDENCE >= 0.85
|
||
|
|
assert USER_APPROVAL_COOCCURRENCE == 3
|
||
|
|
assert USER_APPROVAL_CONFIDENCE == 0.65
|