232 lines
7.5 KiB
Python
232 lines
7.5 KiB
Python
|
|
"""Tests for -> migration.
|
|||
|
|
|
|||
|
|
Strategy: the new records table already accepts schema_version=1 rows via
|
|||
|
|
the back-compat read path. We seed a store with v1 records (schema_version=1,
|
|||
|
|
blank language, current-dim embedding) and assert migrate_v1_to_v2:
|
|||
|
|
- Backfills language via langdetect
|
|||
|
|
- Re-embeds with the configured embedder (bge-m3 by default)
|
|||
|
|
- Sets s5_trust_score=0.5 and profile_modulation_gain={}
|
|||
|
|
- Bumps schema_version=2
|
|||
|
|
- Emits a migration_v1_to_v2 event
|
|||
|
|
- Is idempotent
|
|||
|
|
- Preserves literal_surface byte-for-byte
|
|||
|
|
|
|||
|
|
Because bge-m3 is 1024d and the store in these tests is 1024d by default,
|
|||
|
|
re-embedding keeps the same dim. We use IAI_MCP_EMBED_MODEL=bge-small-en-v1.5
|
|||
|
|
in a few tests where dim delta is not the property under test -- the
|
|||
|
|
migration still re-embeds, just to a 384d target.
|
|||
|
|
"""
|
|||
|
|
from __future__ import annotations
|
|||
|
|
|
|||
|
|
from datetime import datetime, timezone
|
|||
|
|
from uuid import UUID, uuid4
|
|||
|
|
|
|||
|
|
import pytest
|
|||
|
|
|
|||
|
|
from iai_mcp.types import EMBED_DIM, MemoryRecord, SCHEMA_VERSION_LEGACY
|
|||
|
|
|
|||
|
|
|
|||
|
|
def _v1_record(
|
|||
|
|
text: str,
|
|||
|
|
*,
|
|||
|
|
language: str = "",
|
|||
|
|
tags: list[str] | None = None,
|
|||
|
|
dim: int = EMBED_DIM,
|
|||
|
|
) -> MemoryRecord:
|
|||
|
|
"""Construct a legacy-looking v1 record.
|
|||
|
|
|
|||
|
|
language="" + schema_version=1 simulates a Phase-1 row; __post_init__
|
|||
|
|
requires non-empty language for Phase 2, so we set it to a placeholder
|
|||
|
|
during construction and then clear it via attribute assignment for the
|
|||
|
|
simulated-v1 state.
|
|||
|
|
"""
|
|||
|
|
r = MemoryRecord(
|
|||
|
|
id=uuid4(),
|
|||
|
|
tier="episodic",
|
|||
|
|
literal_surface=text,
|
|||
|
|
aaak_index="",
|
|||
|
|
embedding=[0.1] * dim,
|
|||
|
|
community_id=None,
|
|||
|
|
centrality=0.0,
|
|||
|
|
detail_level=2,
|
|||
|
|
pinned=False,
|
|||
|
|
stability=0.0,
|
|||
|
|
difficulty=0.0,
|
|||
|
|
last_reviewed=None,
|
|||
|
|
never_decay=False,
|
|||
|
|
never_merge=False,
|
|||
|
|
provenance=[{"ts": "2026-04-16T00:00:00Z", "cue": "seed", "session_id": "phase1"}],
|
|||
|
|
created_at=datetime.now(timezone.utc),
|
|||
|
|
updated_at=datetime.now(timezone.utc),
|
|||
|
|
tags=list(tags) if tags else [],
|
|||
|
|
language="en", # pass __post_init__ first
|
|||
|
|
schema_version=SCHEMA_VERSION_LEGACY,
|
|||
|
|
)
|
|||
|
|
# Post-construction: simulate "legacy empty language" state.
|
|||
|
|
if language:
|
|||
|
|
r.language = language
|
|||
|
|
else:
|
|||
|
|
r.language = "" # legacy-looking
|
|||
|
|
return r
|
|||
|
|
|
|||
|
|
|
|||
|
|
# --------------------------------------------------------- core migration
|
|||
|
|
|
|||
|
|
|
|||
|
|
def test_migrate_v1_to_v2_sets_defaults(tmp_path):
|
|||
|
|
from iai_mcp.migrate import migrate_v1_to_v2
|
|||
|
|
from iai_mcp.store import MemoryStore
|
|||
|
|
|
|||
|
|
store = MemoryStore(path=tmp_path)
|
|||
|
|
r = _v1_record("English legacy record for migration test with enough words")
|
|||
|
|
store.insert(r)
|
|||
|
|
result = migrate_v1_to_v2(store)
|
|||
|
|
assert result["records_migrated"] >= 1
|
|||
|
|
|
|||
|
|
migrated = store.get(r.id)
|
|||
|
|
assert migrated is not None
|
|||
|
|
assert migrated.s5_trust_score == 0.5
|
|||
|
|
assert migrated.profile_modulation_gain == {}
|
|||
|
|
# SCHEMA_VERSION_CURRENT bumped from 2 -> 4 (TEM factorization).
|
|||
|
|
# migrate_v1_to_v2 still writes the current default; what matters is "no longer v1".
|
|||
|
|
from iai_mcp.types import SCHEMA_VERSION_CURRENT
|
|||
|
|
assert migrated.schema_version == SCHEMA_VERSION_CURRENT
|
|||
|
|
assert migrated.schema_version >= 2
|
|||
|
|
|
|||
|
|
|
|||
|
|
def test_migrate_v1_to_v2_detects_language(tmp_path):
|
|||
|
|
from iai_mcp.migrate import migrate_v1_to_v2
|
|||
|
|
from iai_mcp.store import MemoryStore
|
|||
|
|
|
|||
|
|
store = MemoryStore(path=tmp_path)
|
|||
|
|
en = _v1_record("This is a reasonable English sentence with enough words for detection.")
|
|||
|
|
ru = _v1_record("Это осмысленное предложение на русском языке с достаточным количеством слов.")
|
|||
|
|
store.insert(en)
|
|||
|
|
store.insert(ru)
|
|||
|
|
|
|||
|
|
migrate_v1_to_v2(store)
|
|||
|
|
|
|||
|
|
en_mig = store.get(en.id)
|
|||
|
|
ru_mig = store.get(ru.id)
|
|||
|
|
assert en_mig.language == "en"
|
|||
|
|
assert ru_mig.language == "ru"
|
|||
|
|
|
|||
|
|
|
|||
|
|
def test_migrate_v1_to_v2_idempotent(tmp_path):
|
|||
|
|
from iai_mcp.migrate import migrate_v1_to_v2
|
|||
|
|
from iai_mcp.store import MemoryStore
|
|||
|
|
|
|||
|
|
store = MemoryStore(path=tmp_path)
|
|||
|
|
for i in range(5):
|
|||
|
|
store.insert(_v1_record(f"English record number {i} with enough content to detect."))
|
|||
|
|
|
|||
|
|
first = migrate_v1_to_v2(store)
|
|||
|
|
assert first["records_migrated"] >= 5
|
|||
|
|
|
|||
|
|
# Second run: everyone is already v2 -> zero migrated.
|
|||
|
|
second = migrate_v1_to_v2(store)
|
|||
|
|
assert second["records_migrated"] == 0
|
|||
|
|
|
|||
|
|
|
|||
|
|
def test_migrate_dry_run_no_writes(tmp_path):
|
|||
|
|
from iai_mcp.migrate import migrate_v1_to_v2
|
|||
|
|
from iai_mcp.store import MemoryStore
|
|||
|
|
|
|||
|
|
store = MemoryStore(path=tmp_path)
|
|||
|
|
r = _v1_record("Dry run English text with enough words for language detection.")
|
|||
|
|
store.insert(r)
|
|||
|
|
before = store.get(r.id)
|
|||
|
|
assert before.schema_version == 1
|
|||
|
|
|
|||
|
|
result = migrate_v1_to_v2(store, dry_run=True)
|
|||
|
|
assert result["records_migrated"] >= 1
|
|||
|
|
|
|||
|
|
# Store was not mutated in dry-run.
|
|||
|
|
after = store.get(r.id)
|
|||
|
|
assert after.schema_version == 1 # unchanged
|
|||
|
|
|
|||
|
|
|
|||
|
|
def test_migrate_writes_event(tmp_path):
|
|||
|
|
from iai_mcp.events import query_events
|
|||
|
|
from iai_mcp.migrate import migrate_v1_to_v2
|
|||
|
|
from iai_mcp.store import MemoryStore
|
|||
|
|
|
|||
|
|
store = MemoryStore(path=tmp_path)
|
|||
|
|
store.insert(_v1_record("English content one for migration event test."))
|
|||
|
|
|
|||
|
|
migrate_v1_to_v2(store)
|
|||
|
|
|
|||
|
|
events = query_events(store, kind="migration_v1_to_v2")
|
|||
|
|
assert len(events) == 1
|
|||
|
|
assert events[0]["data"]["record_count"] >= 1
|
|||
|
|
|
|||
|
|
|
|||
|
|
def test_migrate_preserves_literal_surface_verbatim(tmp_path):
|
|||
|
|
"""MEM-01 constitutional: migration MUST NOT rewrite literal_surface."""
|
|||
|
|
from iai_mcp.migrate import migrate_v1_to_v2
|
|||
|
|
from iai_mcp.store import MemoryStore
|
|||
|
|
|
|||
|
|
store = MemoryStore(path=tmp_path)
|
|||
|
|
verbatim = "SECRET_PHRASE_ABC_XYZ must survive the migration byte-for-byte exactly."
|
|||
|
|
r = _v1_record(verbatim)
|
|||
|
|
store.insert(r)
|
|||
|
|
|
|||
|
|
migrate_v1_to_v2(store)
|
|||
|
|
|
|||
|
|
migrated = store.get(r.id)
|
|||
|
|
assert migrated.literal_surface == verbatim
|
|||
|
|
|
|||
|
|
|
|||
|
|
def test_migrate_preserves_provenance(tmp_path):
|
|||
|
|
from iai_mcp.migrate import migrate_v1_to_v2
|
|||
|
|
from iai_mcp.store import MemoryStore
|
|||
|
|
|
|||
|
|
store = MemoryStore(path=tmp_path)
|
|||
|
|
r = _v1_record("English content for provenance preservation test through migration.")
|
|||
|
|
store.insert(r)
|
|||
|
|
|
|||
|
|
migrate_v1_to_v2(store)
|
|||
|
|
|
|||
|
|
migrated = store.get(r.id)
|
|||
|
|
assert len(migrated.provenance) == 1
|
|||
|
|
assert migrated.provenance[0]["cue"] == "seed"
|
|||
|
|
assert migrated.provenance[0]["session_id"] == "phase1"
|
|||
|
|
|
|||
|
|
|
|||
|
|
def test_migrate_skips_existing_v2_records(tmp_path):
|
|||
|
|
"""Mixed store: v1 records migrate, v2 records are skipped."""
|
|||
|
|
from iai_mcp.migrate import migrate_v1_to_v2
|
|||
|
|
from iai_mcp.store import MemoryStore
|
|||
|
|
|
|||
|
|
store = MemoryStore(path=tmp_path)
|
|||
|
|
|
|||
|
|
# A v2 record (default construction gives schema_version=2).
|
|||
|
|
v2 = _v1_record("Already migrated record with language tag.", language="en")
|
|||
|
|
v2.schema_version = 2
|
|||
|
|
store.insert(v2)
|
|||
|
|
|
|||
|
|
# A v1 record.
|
|||
|
|
v1 = _v1_record("Legacy v1 record with enough content for detection.")
|
|||
|
|
store.insert(v1)
|
|||
|
|
|
|||
|
|
result = migrate_v1_to_v2(store)
|
|||
|
|
# Only the v1 record should be migrated.
|
|||
|
|
assert result["records_migrated"] == 1
|
|||
|
|
|
|||
|
|
# v2 record is unchanged.
|
|||
|
|
v2_got = store.get(v2.id)
|
|||
|
|
assert v2_got.schema_version == 2
|
|||
|
|
|
|||
|
|
|
|||
|
|
def test_migrate_result_carries_model_info(tmp_path):
|
|||
|
|
from iai_mcp.migrate import migrate_v1_to_v2
|
|||
|
|
from iai_mcp.store import MemoryStore
|
|||
|
|
|
|||
|
|
store = MemoryStore(path=tmp_path)
|
|||
|
|
store.insert(_v1_record("English content for the migration model info check."))
|
|||
|
|
|
|||
|
|
result = migrate_v1_to_v2(store)
|
|||
|
|
assert "previous_model" in result
|
|||
|
|
assert "new_model" in result
|
|||
|
|
assert "duration_sec" in result
|