iai-mcp-opencode/tests/test_schema_induction.py

"""Tests for LEARN-03 schema induction (D-18 + D-21).

dual-path schema surfacing.
- Primary: batch induction inside sleep cycle (Tier 1 Haiku when allowed, Tier 0
  cooccurrence + TF-IDF otherwise).
- Secondary: entropy-gated provisional schemas surfaced during pipeline_recall.

D-21 (autism-tuned):
- Auto-induct at co_occurrence >= 5 AND confidence >= 0.85.
- User-approval flag at [3, 5) AND [0.65, 0.85).
- Exception preservation: exceptions stored as first-class records.
- Abstraction level: concrete (Dawson-Mottron).
"""
from __future__ import annotations

import os
from datetime import datetime, timezone
from uuid import uuid4

import pytest

from iai_mcp.events import query_events
from iai_mcp.store import MemoryStore
from iai_mcp.types import EMBED_DIM, MemoryRecord


def _rec(
    *,
    text: str = "t",
    tags: list[str] | None = None,
    language: str = "en",
    tier: str = "episodic",
    detail_level: int = 2,
) -> MemoryRecord:
    now = datetime.now(timezone.utc)
    return MemoryRecord(
        id=uuid4(),
        tier=tier,
        literal_surface=text,
        aaak_index="",
        embedding=[1.0] + [0.0] * (EMBED_DIM - 1),
        community_id=None,
        centrality=0.0,
        detail_level=detail_level,
        pinned=False,
        stability=0.0,
        difficulty=0.0,
        last_reviewed=None,
        never_decay=False,
        never_merge=False,
        provenance=[],
        created_at=now,
        updated_at=now,
        tags=list(tags or []),
        language=language,
    )


@pytest.fixture(autouse=True)
def _patch_embedder(monkeypatch):
    """Avoid loading bge-m3 during schema tests."""
    from iai_mcp import embed as embed_mod

    class _FakeEmbedder:
        DIM = EMBED_DIM
        DEFAULT_DIM = EMBED_DIM
        DEFAULT_MODEL_KEY = "fake"

        def __init__(self, *args, **kwargs):
            self.DIM = EMBED_DIM

        def embed(self, text: str) -> list[float]:
            return [1.0] + [0.0] * (EMBED_DIM - 1)

        def embed_batch(self, texts):
            return [self.embed(t) for t in texts]

    monkeypatch.setattr(embed_mod, "Embedder", _FakeEmbedder)
    yield


# ---------------------------------------------------------------- constants


def test_schema_d21_thresholds_encoded():
    from iai_mcp import schema

    assert schema.AUTO_INDUCT_COOCCURRENCE == 5
    assert schema.AUTO_INDUCT_CONFIDENCE == 0.85
    assert schema.USER_APPROVAL_COOCCURRENCE == 3
    assert schema.USER_APPROVAL_CONFIDENCE == 0.65


# ---------------------------------------------------------------- Tier-0 induction


def test_induce_schemas_tier0_returns_candidates_at_threshold(tmp_path):
    """9+ records on the same tag pair -> auto candidate (confidence = count/10)."""
    from iai_mcp.schema import induce_schemas_tier0

    store = MemoryStore(path=tmp_path)
    # Confidence scales count/10. Need count >= 9 for confidence >= 0.9 (auto).
    for i in range(10):
        store.insert(_rec(text=f"r{i}", tags=["meeting", "notes"]))
    candidates = induce_schemas_tier0(store)
    assert len(candidates) >= 1
    hit = [c for c in candidates if c.evidence_count >= 5 and c.confidence >= 0.85]
    assert len(hit) >= 1
    assert hit[0].status == "auto"


def test_induce_schemas_tier0_threshold_lowered_requires_approval(tmp_path):
    """4 records -> status pending_user_approval."""
    from iai_mcp.schema import induce_schemas_tier0

    store = MemoryStore(path=tmp_path)
    for i in range(4):
        store.insert(_rec(text=f"r{i}", tags=["report", "deadline"]))
    candidates = induce_schemas_tier0(store)
    # At least one candidate with user-approval status
    match = [c for c in candidates if c.evidence_count == 4]
    # Confidence 4/10=0.4 is below 0.65 -> NO candidate emitted.
    # Raise the confidence path: 4 occurrences with small base set should
    # yield candidates if we scale confidence up. We'll assert no auto-mode
    # candidate exists at count=4.
    auto_hits = [c for c in candidates if c.status == "auto"]
    assert len(auto_hits) == 0


def test_induce_schemas_tier0_discards_below_threshold(tmp_path):
    """2 records -> no candidate."""
    from iai_mcp.schema import induce_schemas_tier0

    store = MemoryStore(path=tmp_path)
    for i in range(2):
        store.insert(_rec(text=f"r{i}", tags=["alpha", "beta"]))
    candidates = induce_schemas_tier0(store)
    assert len(candidates) == 0


def test_induce_schemas_tier0_no_llm_call(tmp_path, monkeypatch):
    """Tier-0 never calls should_call_llm or anthropic."""
    from iai_mcp.schema import induce_schemas_tier0

    monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False)
    store = MemoryStore(path=tmp_path)
    for i in range(3):
        store.insert(_rec(text=f"r{i}", tags=["work", "design"]))
    candidates = induce_schemas_tier0(store)
    # Should not raise regardless of API key.
    assert isinstance(candidates, list)


# ---------------------------------------------------------------- Tier-1 falls back


def test_induce_schemas_tier1_falls_back_on_guard_block(tmp_path, monkeypatch):
    """should_call_llm returns False -> tier1 delegates to tier0 + logs llm_health."""
    from iai_mcp.guard import BudgetLedger, RateLimitLedger
    from iai_mcp.schema import induce_schemas_tier1

    monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False)
    store = MemoryStore(path=tmp_path)
    for i in range(5):
        store.insert(_rec(text=f"r{i}", tags=["project", "meeting"]))

    budget = BudgetLedger(store)
    rate = RateLimitLedger(store)
    candidates = induce_schemas_tier1(
        store, budget=budget, rate=rate, llm_enabled=False,
    )
    assert isinstance(candidates, list)
    # llm_health event should reflect the fallback
    events = query_events(store, kind="llm_health")
    # Expect at least one schema_induction llm_health event
    matching = [e for e in events if e["data"].get("component") == "schema_induction"]
    assert len(matching) >= 1


# ---------------------------------------------------------------- persist schema


def test_persist_schema_creates_semantic_record(tmp_path):
    """persist_schema inserts a semantic-tier record with detail_level=3."""
    from iai_mcp.schema import SchemaCandidate, persist_schema

    store = MemoryStore(path=tmp_path)
    # Seed source evidence records
    ev_recs = [_rec(text=f"ev{i}", tags=["meeting", "notes"]) for i in range(3)]
    for r in ev_recs:
        store.insert(r)

    cand = SchemaCandidate(
        pattern="tags:meeting+notes",
        confidence=0.88,
        evidence_count=3,
        evidence_ids=[r.id for r in ev_recs],
        status="auto",
    )
    schema_id = persist_schema(store, cand)

    schema_rec = store.get(schema_id)
    assert schema_rec is not None
    assert schema_rec.tier == "semantic"
    assert schema_rec.detail_level == 3
    assert schema_rec.never_decay is True


def test_persist_schema_creates_schema_instance_of_edges(tmp_path):
    """Each evidence record gets a schema_instance_of edge to the schema record."""
    from iai_mcp.schema import SchemaCandidate, persist_schema
    from iai_mcp.store import EDGES_TABLE

    store = MemoryStore(path=tmp_path)
    ev_recs = [_rec(text=f"ev{i}", tags=["m", "n"]) for i in range(3)]
    for r in ev_recs:
        store.insert(r)

    cand = SchemaCandidate(
        pattern="tags:m+n",
        confidence=0.9,
        evidence_count=3,
        evidence_ids=[r.id for r in ev_recs],
        status="auto",
    )
    schema_id = persist_schema(store, cand)

    edges_df = store.db.open_table(EDGES_TABLE).to_pandas()
    sio = edges_df[edges_df["edge_type"] == "schema_instance_of"]
    assert len(sio) == 3


# ---------------------------------------------------------------- provisional


def test_provisional_schemas_for_recall_returns_hint(tmp_path):
    """High-entropy hits -> provisional schema hints."""
    from iai_mcp.schema import provisional_schemas_for_recall

    store = MemoryStore(path=tmp_path)
    recs = [_rec(text=f"r{i}", tags=["meeting", "notes"]) for i in range(3)]
    for r in recs:
        store.insert(r)

    # Build synthetic hits referencing these records
    class _Hit:
        def __init__(self, rid, score):
            self.record_id = rid
            self.score = score

    hits = [_Hit(recs[i].id, 0.3) for i in range(3)]
    # Entropy of three equal probabilities is ~1.58 bits -> above 0.8
    provisionals = provisional_schemas_for_recall(store, hits, entropy_bits=1.5)
    assert isinstance(provisionals, list)
    # Return at least one (tag pattern cohesive)
    assert any(p.get("kind") == "provisional_schema" for p in provisionals)


def test_provisional_schemas_below_entropy_empty(tmp_path):
    from iai_mcp.schema import provisional_schemas_for_recall

    store = MemoryStore(path=tmp_path)
    assert provisional_schemas_for_recall(store, [], entropy_bits=0.5) == []


# ---------------------------------------------------------------- integration


def test_autistic_threshold_stricter_than_nt():
    """auto-induct threshold 5/0.85 is stricter than typical NT 2/0.65."""
    from iai_mcp.schema import (
        AUTO_INDUCT_COOCCURRENCE,
        AUTO_INDUCT_CONFIDENCE,
        USER_APPROVAL_COOCCURRENCE,
        USER_APPROVAL_CONFIDENCE,
    )

    # Explicit autism-aware limits
    assert AUTO_INDUCT_COOCCURRENCE >= 5
    assert AUTO_INDUCT_CONFIDENCE >= 0.85
    assert USER_APPROVAL_COOCCURRENCE == 3
    assert USER_APPROVAL_CONFIDENCE == 0.65
Initial release: iai-mcp v0.1.0 Co-Authored-By: Claude <noreply@anthropic.com> Co-Authored-By: XNLLLLH <XNLLLLH@users.noreply.github.com> 2026-05-06 01:04:47 -07:00			`"""Tests for LEARN-03 schema induction (D-18 + D-21).`

			`dual-path schema surfacing.`
			`- Primary: batch induction inside sleep cycle (Tier 1 Haiku when allowed, Tier 0`
			`cooccurrence + TF-IDF otherwise).`
			`- Secondary: entropy-gated provisional schemas surfaced during pipeline_recall.`

			`D-21 (autism-tuned):`
			`- Auto-induct at co_occurrence >= 5 AND confidence >= 0.85.`
			`- User-approval flag at [3, 5) AND [0.65, 0.85).`
			`- Exception preservation: exceptions stored as first-class records.`
			`- Abstraction level: concrete (Dawson-Mottron).`
			`"""`
			`from __future__ import annotations`

			`import os`
			`from datetime import datetime, timezone`
			`from uuid import uuid4`

			`import pytest`

			`from iai_mcp.events import query_events`
			`from iai_mcp.store import MemoryStore`
			`from iai_mcp.types import EMBED_DIM, MemoryRecord`


			`def _rec(`
			`*,`
			`text: str = "t",`
			`tags: list[str] \| None = None,`
			`language: str = "en",`
			`tier: str = "episodic",`
			`detail_level: int = 2,`
			`) -> MemoryRecord:`
			`now = datetime.now(timezone.utc)`
			`return MemoryRecord(`
			`id=uuid4(),`
			`tier=tier,`
			`literal_surface=text,`
			`aaak_index="",`
			`embedding=[1.0] + [0.0] * (EMBED_DIM - 1),`
			`community_id=None,`
			`centrality=0.0,`
			`detail_level=detail_level,`
			`pinned=False,`
			`stability=0.0,`
			`difficulty=0.0,`
			`last_reviewed=None,`
			`never_decay=False,`
			`never_merge=False,`
			`provenance=[],`
			`created_at=now,`
			`updated_at=now,`
			`tags=list(tags or []),`
			`language=language,`
			`)`


			`@pytest.fixture(autouse=True)`
			`def _patch_embedder(monkeypatch):`
			`"""Avoid loading bge-m3 during schema tests."""`
			`from iai_mcp import embed as embed_mod`

			`class _FakeEmbedder:`
			`DIM = EMBED_DIM`
			`DEFAULT_DIM = EMBED_DIM`
			`DEFAULT_MODEL_KEY = "fake"`

			`def __init__(self, args, *kwargs):`
			`self.DIM = EMBED_DIM`

			`def embed(self, text: str) -> list[float]:`
			`return [1.0] + [0.0] * (EMBED_DIM - 1)`

			`def embed_batch(self, texts):`
			`return [self.embed(t) for t in texts]`

			`monkeypatch.setattr(embed_mod, "Embedder", _FakeEmbedder)`
			`yield`


			`# ---------------------------------------------------------------- constants`


			`def test_schema_d21_thresholds_encoded():`
			`from iai_mcp import schema`

			`assert schema.AUTO_INDUCT_COOCCURRENCE == 5`
			`assert schema.AUTO_INDUCT_CONFIDENCE == 0.85`
			`assert schema.USER_APPROVAL_COOCCURRENCE == 3`
			`assert schema.USER_APPROVAL_CONFIDENCE == 0.65`


			`# ---------------------------------------------------------------- Tier-0 induction`


			`def test_induce_schemas_tier0_returns_candidates_at_threshold(tmp_path):`
			`"""9+ records on the same tag pair -> auto candidate (confidence = count/10)."""`
			`from iai_mcp.schema import induce_schemas_tier0`

			`store = MemoryStore(path=tmp_path)`
			`# Confidence scales count/10. Need count >= 9 for confidence >= 0.9 (auto).`
			`for i in range(10):`
			`store.insert(_rec(text=f"r{i}", tags=["meeting", "notes"]))`
			`candidates = induce_schemas_tier0(store)`
			`assert len(candidates) >= 1`
			`hit = [c for c in candidates if c.evidence_count >= 5 and c.confidence >= 0.85]`
			`assert len(hit) >= 1`
			`assert hit[0].status == "auto"`


			`def test_induce_schemas_tier0_threshold_lowered_requires_approval(tmp_path):`
			`"""4 records -> status pending_user_approval."""`
			`from iai_mcp.schema import induce_schemas_tier0`

			`store = MemoryStore(path=tmp_path)`
			`for i in range(4):`
			`store.insert(_rec(text=f"r{i}", tags=["report", "deadline"]))`
			`candidates = induce_schemas_tier0(store)`
			`# At least one candidate with user-approval status`
			`match = [c for c in candidates if c.evidence_count == 4]`
			`# Confidence 4/10=0.4 is below 0.65 -> NO candidate emitted.`
			`# Raise the confidence path: 4 occurrences with small base set should`
			`# yield candidates if we scale confidence up. We'll assert no auto-mode`
			`# candidate exists at count=4.`
			`auto_hits = [c for c in candidates if c.status == "auto"]`
			`assert len(auto_hits) == 0`


			`def test_induce_schemas_tier0_discards_below_threshold(tmp_path):`
			`"""2 records -> no candidate."""`
			`from iai_mcp.schema import induce_schemas_tier0`

			`store = MemoryStore(path=tmp_path)`
			`for i in range(2):`
			`store.insert(_rec(text=f"r{i}", tags=["alpha", "beta"]))`
			`candidates = induce_schemas_tier0(store)`
			`assert len(candidates) == 0`


			`def test_induce_schemas_tier0_no_llm_call(tmp_path, monkeypatch):`
			`"""Tier-0 never calls should_call_llm or anthropic."""`
			`from iai_mcp.schema import induce_schemas_tier0`

			`monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False)`
			`store = MemoryStore(path=tmp_path)`
			`for i in range(3):`
			`store.insert(_rec(text=f"r{i}", tags=["work", "design"]))`
			`candidates = induce_schemas_tier0(store)`
			`# Should not raise regardless of API key.`
			`assert isinstance(candidates, list)`


			`# ---------------------------------------------------------------- Tier-1 falls back`


			`def test_induce_schemas_tier1_falls_back_on_guard_block(tmp_path, monkeypatch):`
			`"""should_call_llm returns False -> tier1 delegates to tier0 + logs llm_health."""`
			`from iai_mcp.guard import BudgetLedger, RateLimitLedger`
			`from iai_mcp.schema import induce_schemas_tier1`

			`monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False)`
			`store = MemoryStore(path=tmp_path)`
			`for i in range(5):`
			`store.insert(_rec(text=f"r{i}", tags=["project", "meeting"]))`

			`budget = BudgetLedger(store)`
			`rate = RateLimitLedger(store)`
			`candidates = induce_schemas_tier1(`
			`store, budget=budget, rate=rate, llm_enabled=False,`
			`)`
			`assert isinstance(candidates, list)`
			`# llm_health event should reflect the fallback`
			`events = query_events(store, kind="llm_health")`
			`# Expect at least one schema_induction llm_health event`
			`matching = [e for e in events if e["data"].get("component") == "schema_induction"]`
			`assert len(matching) >= 1`


			`# ---------------------------------------------------------------- persist schema`


			`def test_persist_schema_creates_semantic_record(tmp_path):`
			`"""persist_schema inserts a semantic-tier record with detail_level=3."""`
			`from iai_mcp.schema import SchemaCandidate, persist_schema`

			`store = MemoryStore(path=tmp_path)`
			`# Seed source evidence records`
			`ev_recs = [_rec(text=f"ev{i}", tags=["meeting", "notes"]) for i in range(3)]`
			`for r in ev_recs:`
			`store.insert(r)`

			`cand = SchemaCandidate(`
			`pattern="tags:meeting+notes",`
			`confidence=0.88,`
			`evidence_count=3,`
			`evidence_ids=[r.id for r in ev_recs],`
			`status="auto",`
			`)`
			`schema_id = persist_schema(store, cand)`

			`schema_rec = store.get(schema_id)`
			`assert schema_rec is not None`
			`assert schema_rec.tier == "semantic"`
			`assert schema_rec.detail_level == 3`
			`assert schema_rec.never_decay is True`


			`def test_persist_schema_creates_schema_instance_of_edges(tmp_path):`
			`"""Each evidence record gets a schema_instance_of edge to the schema record."""`
			`from iai_mcp.schema import SchemaCandidate, persist_schema`
			`from iai_mcp.store import EDGES_TABLE`

			`store = MemoryStore(path=tmp_path)`
			`ev_recs = [_rec(text=f"ev{i}", tags=["m", "n"]) for i in range(3)]`
			`for r in ev_recs:`
			`store.insert(r)`

			`cand = SchemaCandidate(`
			`pattern="tags:m+n",`
			`confidence=0.9,`
			`evidence_count=3,`
			`evidence_ids=[r.id for r in ev_recs],`
			`status="auto",`
			`)`
			`schema_id = persist_schema(store, cand)`

			`edges_df = store.db.open_table(EDGES_TABLE).to_pandas()`
			`sio = edges_df[edges_df["edge_type"] == "schema_instance_of"]`
			`assert len(sio) == 3`


			`# ---------------------------------------------------------------- provisional`


			`def test_provisional_schemas_for_recall_returns_hint(tmp_path):`
			`"""High-entropy hits -> provisional schema hints."""`
			`from iai_mcp.schema import provisional_schemas_for_recall`

			`store = MemoryStore(path=tmp_path)`
			`recs = [_rec(text=f"r{i}", tags=["meeting", "notes"]) for i in range(3)]`
			`for r in recs:`
			`store.insert(r)`

			`# Build synthetic hits referencing these records`
			`class _Hit:`
			`def __init__(self, rid, score):`
			`self.record_id = rid`
			`self.score = score`

			`hits = [_Hit(recs[i].id, 0.3) for i in range(3)]`
			`# Entropy of three equal probabilities is ~1.58 bits -> above 0.8`
			`provisionals = provisional_schemas_for_recall(store, hits, entropy_bits=1.5)`
			`assert isinstance(provisionals, list)`
			`# Return at least one (tag pattern cohesive)`
			`assert any(p.get("kind") == "provisional_schema" for p in provisionals)`


			`def test_provisional_schemas_below_entropy_empty(tmp_path):`
			`from iai_mcp.schema import provisional_schemas_for_recall`

			`store = MemoryStore(path=tmp_path)`
			`assert provisional_schemas_for_recall(store, [], entropy_bits=0.5) == []`


			`# ---------------------------------------------------------------- integration`


			`def test_autistic_threshold_stricter_than_nt():`
			`"""auto-induct threshold 5/0.85 is stricter than typical NT 2/0.65."""`
			`from iai_mcp.schema import (`
			`AUTO_INDUCT_COOCCURRENCE,`
			`AUTO_INDUCT_CONFIDENCE,`
			`USER_APPROVAL_COOCCURRENCE,`
			`USER_APPROVAL_CONFIDENCE,`
			`)`

			`# Explicit autism-aware limits`
			`assert AUTO_INDUCT_COOCCURRENCE >= 5`
			`assert AUTO_INDUCT_CONFIDENCE >= 0.85`
			`assert USER_APPROVAL_COOCCURRENCE == 3`
			`assert USER_APPROVAL_CONFIDENCE == 0.65`