iai-mcp-opencode/tests/test_aaak.py
Areg Noya f6b876fbe7 Initial release: iai-mcp v0.1.0
Co-Authored-By: Claude <noreply@anthropic.com>
Co-Authored-By: XNLLLLH <XNLLLLH@users.noreply.github.com>
2026-05-06 01:04:47 -07:00

189 lines
5.6 KiB
Python

"""Tests for the AAAK index generator + English-raw enforcement (D-08, TOK-10).
D-08 constitutional rule:
- Storage is RAW VERBATIM English always.
- AAAK is a RETRIEVAL VIEW only: wing/room/entities/tags metadata string.
- The index MUST NOT contain literal_surface content.
TOK-10:
- Non-English literal_surface must be flagged with a `raw:<lang>` tag; unflagged
non-English content raises ValueError at write time via enforce_english_raw.
"""
from __future__ import annotations
from datetime import datetime, timezone
from uuid import UUID, uuid4
import pytest
from iai_mcp.aaak import (
enforce_english_raw,
generate_aaak_index,
parse_aaak_index,
)
from iai_mcp.types import EMBED_DIM, MemoryRecord
def _make(
tier: str = "episodic",
text: str = "hello world",
tags: list[str] | None = None,
community_id: UUID | None = None,
language: str = "en",
) -> MemoryRecord:
return MemoryRecord(
id=uuid4(),
tier=tier,
literal_surface=text,
aaak_index="",
embedding=[0.1] * EMBED_DIM,
community_id=community_id,
centrality=0.0,
detail_level=2,
pinned=False,
stability=0.0,
difficulty=0.0,
last_reviewed=None,
never_decay=False,
never_merge=False,
provenance=[],
created_at=datetime.now(timezone.utc),
updated_at=datetime.now(timezone.utc),
tags=list(tags) if tags else [],
language=language,
)
# ------------------------------------------------ generate_aaak_index format
def test_aaak_index_has_exactly_three_slashes():
"""Format invariant: W:<>/R:<>/E:<>/T:<> -> 3 separators regardless of content."""
r = _make()
idx = generate_aaak_index(r)
assert idx.count("/") == 3
def test_aaak_index_starts_with_wing_marker():
r = _make(tier="semantic")
idx = generate_aaak_index(r)
assert idx.startswith("W:S/")
def test_aaak_index_has_four_key_value_segments():
r = _make(tier="episodic", tags=["entity:Alice", "project", "raw:en"])
idx = generate_aaak_index(r)
parts = idx.split("/")
assert len(parts) == 4
assert parts[0].startswith("W:")
assert parts[1].startswith("R:")
assert parts[2].startswith("E:")
assert parts[3].startswith("T:")
def test_aaak_index_includes_entity_tag_stripped():
r = _make(tags=["entity:Alice", "entity:IAI-MCP", "project"])
idx = generate_aaak_index(r)
# entity: prefix stripped; entities comma-joined
assert "Alice" in idx.split("/E:")[1]
assert "IAI-MCP" in idx.split("/E:")[1]
def test_aaak_index_deterministic():
"""Same record -> same index on repeat calls."""
r = _make(tags=["entity:X", "flag"])
assert generate_aaak_index(r) == generate_aaak_index(r)
# -------------------------------------------------------------- no-leak
def test_aaak_index_does_not_contain_literal_surface():
"""Constitutional: literal_surface MUST NOT appear anywhere in the index."""
verbatim = "Alice mentioned the SECRET_PASSWORD_ABC_XYZ on day 3"
r = _make(text=verbatim, tags=["entity:Alice", "project"])
idx = generate_aaak_index(r)
assert verbatim not in idx
assert "SECRET_PASSWORD_ABC_XYZ" not in idx
def test_aaak_index_unknown_community_marker():
"""community_id=None -> room becomes 'unknown'."""
r = _make(community_id=None)
idx = generate_aaak_index(r)
assert "R:unknown" in idx
def test_aaak_index_dash_when_no_entities():
r = _make(tags=["project"])
idx = generate_aaak_index(r)
# No entity: tags -> E:-
assert "/E:-/" in idx
# -------------------------------------------------------- parse round-trip
def test_parse_aaak_index_round_trips_entities_and_tags():
"""parse(generate(r)) recovers the entity + tag lists."""
r = _make(tier="semantic", tags=["entity:Alice", "entity:IAI", "project", "urgent"])
idx = generate_aaak_index(r)
parsed = parse_aaak_index(idx)
assert parsed["wing"] == ["S"]
assert parsed["entities"] == ["Alice", "IAI"]
assert set(parsed["tags"]) == {"project", "urgent"}
def test_parse_aaak_dash_segments_become_empty_lists():
r = _make(tags=[])
idx = generate_aaak_index(r)
parsed = parse_aaak_index(idx)
assert parsed["entities"] == []
assert parsed["tags"] == []
# ------------------------------------------ TOK-10 English-raw enforcement
def test_enforce_english_raw_accepts_pure_english():
r = _make(text="Alice said the IAI-MCP project is go")
# Should not raise
enforce_english_raw(r)
def test_enforce_english_raw_rejects_cyrillic_without_tag():
r = _make(text="Alice said: пусть сохранится точно", tags=["project"])
with pytest.raises(ValueError) as exc:
enforce_english_raw(r)
assert "constitutional" in str(exc.value)
def test_enforce_english_raw_accepts_cyrillic_with_raw_tag():
r = _make(
text="Alice said: пусть сохранится точно",
tags=["raw:ru", "project"],
)
# With explicit raw:ru declaration the rule is satisfied.
enforce_english_raw(r)
def test_enforce_english_raw_rejects_cjk_without_tag():
r = _make(text="Hello 世界 verbatim", tags=[])
with pytest.raises(ValueError):
enforce_english_raw(r)
def test_enforce_english_raw_rejects_hiragana_without_tag():
r = _make(text="Hello こんにちは world", tags=[])
with pytest.raises(ValueError):
enforce_english_raw(r)
def test_enforce_english_raw_accepts_cjk_with_raw_tag():
r = _make(text="Hello 世界", tags=["raw:zh"])
enforce_english_raw(r)
def test_enforce_english_raw_empty_text_passes():
r = _make(text="")
enforce_english_raw(r)