Co-Authored-By: Claude <noreply@anthropic.com> Co-Authored-By: XNLLLLH <XNLLLLH@users.noreply.github.com>
231 lines
7.5 KiB
Python
231 lines
7.5 KiB
Python
"""Tests for -> migration.
|
||
|
||
Strategy: the new records table already accepts schema_version=1 rows via
|
||
the back-compat read path. We seed a store with v1 records (schema_version=1,
|
||
blank language, current-dim embedding) and assert migrate_v1_to_v2:
|
||
- Backfills language via langdetect
|
||
- Re-embeds with the configured embedder (bge-m3 by default)
|
||
- Sets s5_trust_score=0.5 and profile_modulation_gain={}
|
||
- Bumps schema_version=2
|
||
- Emits a migration_v1_to_v2 event
|
||
- Is idempotent
|
||
- Preserves literal_surface byte-for-byte
|
||
|
||
Because bge-m3 is 1024d and the store in these tests is 1024d by default,
|
||
re-embedding keeps the same dim. We use IAI_MCP_EMBED_MODEL=bge-small-en-v1.5
|
||
in a few tests where dim delta is not the property under test -- the
|
||
migration still re-embeds, just to a 384d target.
|
||
"""
|
||
from __future__ import annotations
|
||
|
||
from datetime import datetime, timezone
|
||
from uuid import UUID, uuid4
|
||
|
||
import pytest
|
||
|
||
from iai_mcp.types import EMBED_DIM, MemoryRecord, SCHEMA_VERSION_LEGACY
|
||
|
||
|
||
def _v1_record(
|
||
text: str,
|
||
*,
|
||
language: str = "",
|
||
tags: list[str] | None = None,
|
||
dim: int = EMBED_DIM,
|
||
) -> MemoryRecord:
|
||
"""Construct a legacy-looking v1 record.
|
||
|
||
language="" + schema_version=1 simulates a Phase-1 row; __post_init__
|
||
requires non-empty language for Phase 2, so we set it to a placeholder
|
||
during construction and then clear it via attribute assignment for the
|
||
simulated-v1 state.
|
||
"""
|
||
r = MemoryRecord(
|
||
id=uuid4(),
|
||
tier="episodic",
|
||
literal_surface=text,
|
||
aaak_index="",
|
||
embedding=[0.1] * dim,
|
||
community_id=None,
|
||
centrality=0.0,
|
||
detail_level=2,
|
||
pinned=False,
|
||
stability=0.0,
|
||
difficulty=0.0,
|
||
last_reviewed=None,
|
||
never_decay=False,
|
||
never_merge=False,
|
||
provenance=[{"ts": "2026-04-16T00:00:00Z", "cue": "seed", "session_id": "phase1"}],
|
||
created_at=datetime.now(timezone.utc),
|
||
updated_at=datetime.now(timezone.utc),
|
||
tags=list(tags) if tags else [],
|
||
language="en", # pass __post_init__ first
|
||
schema_version=SCHEMA_VERSION_LEGACY,
|
||
)
|
||
# Post-construction: simulate "legacy empty language" state.
|
||
if language:
|
||
r.language = language
|
||
else:
|
||
r.language = "" # legacy-looking
|
||
return r
|
||
|
||
|
||
# --------------------------------------------------------- core migration
|
||
|
||
|
||
def test_migrate_v1_to_v2_sets_defaults(tmp_path):
|
||
from iai_mcp.migrate import migrate_v1_to_v2
|
||
from iai_mcp.store import MemoryStore
|
||
|
||
store = MemoryStore(path=tmp_path)
|
||
r = _v1_record("English legacy record for migration test with enough words")
|
||
store.insert(r)
|
||
result = migrate_v1_to_v2(store)
|
||
assert result["records_migrated"] >= 1
|
||
|
||
migrated = store.get(r.id)
|
||
assert migrated is not None
|
||
assert migrated.s5_trust_score == 0.5
|
||
assert migrated.profile_modulation_gain == {}
|
||
# SCHEMA_VERSION_CURRENT bumped from 2 -> 4 (TEM factorization).
|
||
# migrate_v1_to_v2 still writes the current default; what matters is "no longer v1".
|
||
from iai_mcp.types import SCHEMA_VERSION_CURRENT
|
||
assert migrated.schema_version == SCHEMA_VERSION_CURRENT
|
||
assert migrated.schema_version >= 2
|
||
|
||
|
||
def test_migrate_v1_to_v2_detects_language(tmp_path):
|
||
from iai_mcp.migrate import migrate_v1_to_v2
|
||
from iai_mcp.store import MemoryStore
|
||
|
||
store = MemoryStore(path=tmp_path)
|
||
en = _v1_record("This is a reasonable English sentence with enough words for detection.")
|
||
ru = _v1_record("Это осмысленное предложение на русском языке с достаточным количеством слов.")
|
||
store.insert(en)
|
||
store.insert(ru)
|
||
|
||
migrate_v1_to_v2(store)
|
||
|
||
en_mig = store.get(en.id)
|
||
ru_mig = store.get(ru.id)
|
||
assert en_mig.language == "en"
|
||
assert ru_mig.language == "ru"
|
||
|
||
|
||
def test_migrate_v1_to_v2_idempotent(tmp_path):
|
||
from iai_mcp.migrate import migrate_v1_to_v2
|
||
from iai_mcp.store import MemoryStore
|
||
|
||
store = MemoryStore(path=tmp_path)
|
||
for i in range(5):
|
||
store.insert(_v1_record(f"English record number {i} with enough content to detect."))
|
||
|
||
first = migrate_v1_to_v2(store)
|
||
assert first["records_migrated"] >= 5
|
||
|
||
# Second run: everyone is already v2 -> zero migrated.
|
||
second = migrate_v1_to_v2(store)
|
||
assert second["records_migrated"] == 0
|
||
|
||
|
||
def test_migrate_dry_run_no_writes(tmp_path):
|
||
from iai_mcp.migrate import migrate_v1_to_v2
|
||
from iai_mcp.store import MemoryStore
|
||
|
||
store = MemoryStore(path=tmp_path)
|
||
r = _v1_record("Dry run English text with enough words for language detection.")
|
||
store.insert(r)
|
||
before = store.get(r.id)
|
||
assert before.schema_version == 1
|
||
|
||
result = migrate_v1_to_v2(store, dry_run=True)
|
||
assert result["records_migrated"] >= 1
|
||
|
||
# Store was not mutated in dry-run.
|
||
after = store.get(r.id)
|
||
assert after.schema_version == 1 # unchanged
|
||
|
||
|
||
def test_migrate_writes_event(tmp_path):
|
||
from iai_mcp.events import query_events
|
||
from iai_mcp.migrate import migrate_v1_to_v2
|
||
from iai_mcp.store import MemoryStore
|
||
|
||
store = MemoryStore(path=tmp_path)
|
||
store.insert(_v1_record("English content one for migration event test."))
|
||
|
||
migrate_v1_to_v2(store)
|
||
|
||
events = query_events(store, kind="migration_v1_to_v2")
|
||
assert len(events) == 1
|
||
assert events[0]["data"]["record_count"] >= 1
|
||
|
||
|
||
def test_migrate_preserves_literal_surface_verbatim(tmp_path):
|
||
"""MEM-01 constitutional: migration MUST NOT rewrite literal_surface."""
|
||
from iai_mcp.migrate import migrate_v1_to_v2
|
||
from iai_mcp.store import MemoryStore
|
||
|
||
store = MemoryStore(path=tmp_path)
|
||
verbatim = "SECRET_PHRASE_ABC_XYZ must survive the migration byte-for-byte exactly."
|
||
r = _v1_record(verbatim)
|
||
store.insert(r)
|
||
|
||
migrate_v1_to_v2(store)
|
||
|
||
migrated = store.get(r.id)
|
||
assert migrated.literal_surface == verbatim
|
||
|
||
|
||
def test_migrate_preserves_provenance(tmp_path):
|
||
from iai_mcp.migrate import migrate_v1_to_v2
|
||
from iai_mcp.store import MemoryStore
|
||
|
||
store = MemoryStore(path=tmp_path)
|
||
r = _v1_record("English content for provenance preservation test through migration.")
|
||
store.insert(r)
|
||
|
||
migrate_v1_to_v2(store)
|
||
|
||
migrated = store.get(r.id)
|
||
assert len(migrated.provenance) == 1
|
||
assert migrated.provenance[0]["cue"] == "seed"
|
||
assert migrated.provenance[0]["session_id"] == "phase1"
|
||
|
||
|
||
def test_migrate_skips_existing_v2_records(tmp_path):
|
||
"""Mixed store: v1 records migrate, v2 records are skipped."""
|
||
from iai_mcp.migrate import migrate_v1_to_v2
|
||
from iai_mcp.store import MemoryStore
|
||
|
||
store = MemoryStore(path=tmp_path)
|
||
|
||
# A v2 record (default construction gives schema_version=2).
|
||
v2 = _v1_record("Already migrated record with language tag.", language="en")
|
||
v2.schema_version = 2
|
||
store.insert(v2)
|
||
|
||
# A v1 record.
|
||
v1 = _v1_record("Legacy v1 record with enough content for detection.")
|
||
store.insert(v1)
|
||
|
||
result = migrate_v1_to_v2(store)
|
||
# Only the v1 record should be migrated.
|
||
assert result["records_migrated"] == 1
|
||
|
||
# v2 record is unchanged.
|
||
v2_got = store.get(v2.id)
|
||
assert v2_got.schema_version == 2
|
||
|
||
|
||
def test_migrate_result_carries_model_info(tmp_path):
|
||
from iai_mcp.migrate import migrate_v1_to_v2
|
||
from iai_mcp.store import MemoryStore
|
||
|
||
store = MemoryStore(path=tmp_path)
|
||
store.insert(_v1_record("English content for the migration model info check."))
|
||
|
||
result = migrate_v1_to_v2(store)
|
||
assert "previous_model" in result
|
||
assert "new_model" in result
|
||
assert "duration_sec" in result
|