Initial release: iai-mcp v0.1.0
Co-Authored-By: Claude <noreply@anthropic.com> Co-Authored-By: XNLLLLH <XNLLLLH@users.noreply.github.com>
This commit is contained in:
commit
f6b876fbe7
332 changed files with 97258 additions and 0 deletions
105
tests/test_bench_trajectory.py
Normal file
105
tests/test_bench_trajectory.py
Normal file
|
|
@ -0,0 +1,105 @@
|
|||
"""Tests for bench/trajectory.py (Plan 02-04 Task 4, D-33).
|
||||
|
||||
D-33 (benchmark corpus): 30-session synthetic corpus (autism/NT interaction
|
||||
pattern models), reproducible from seed=42. Diverse-language fixture:
|
||||
corpus includes English + Russian + Japanese + Arabic + German records for
|
||||
corpus-shape variance testing — NOT a multilingual product mandate. Brain
|
||||
is English-only since (default bge-small-en-v1.5).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
def test_synthetic_corpus_generates_30_sessions():
|
||||
from bench.trajectory import generate_synthetic_corpus
|
||||
|
||||
corpus = generate_synthetic_corpus(n_sessions=30, seed=42)
|
||||
assert len(corpus) == 30
|
||||
for s in corpus:
|
||||
assert "session_id" in s
|
||||
assert "records" in s
|
||||
assert "curiosity_events" in s
|
||||
assert "trajectory_metrics" in s
|
||||
|
||||
|
||||
def test_synthetic_corpus_deterministic_from_seed():
|
||||
from bench.trajectory import generate_synthetic_corpus
|
||||
|
||||
a = generate_synthetic_corpus(n_sessions=5, seed=42)
|
||||
b = generate_synthetic_corpus(n_sessions=5, seed=42)
|
||||
# Session ids are deterministic under fixed seed.
|
||||
assert [s["session_id"] for s in a] == [s["session_id"] for s in b]
|
||||
|
||||
|
||||
def test_synthetic_corpus_multilingual():
|
||||
"""Diverse-language fixture: corpus-shape variance check.
|
||||
|
||||
NOT a product mandate — IAI-MCP brain is English-only since Plan 05-08.
|
||||
The presence of non-English samples here exercises corpus-shape
|
||||
variance in trajectory aggregation, nothing more.
|
||||
"""
|
||||
from bench.trajectory import generate_synthetic_corpus
|
||||
|
||||
corpus = generate_synthetic_corpus(n_sessions=30, seed=42)
|
||||
languages: set[str] = set()
|
||||
for s in corpus:
|
||||
for r in s["records"]:
|
||||
languages.add(r.get("language", "en"))
|
||||
# At minimum: en + one non-English (ru/ja/ar/de) must appear.
|
||||
assert "en" in languages
|
||||
non_english = languages - {"en"}
|
||||
assert len(non_english) >= 1, (
|
||||
f"diverse-language fixture has only languages={languages}"
|
||||
)
|
||||
# Aspirational: at least 4 distinct languages over 30 sessions
|
||||
# (corpus-shape diversity, not a multilingual product claim).
|
||||
assert len(languages) >= 4
|
||||
|
||||
|
||||
def test_synthetic_corpus_covers_six_metrics():
|
||||
"""Each session emits trajectory data for all six metric slots."""
|
||||
from bench.trajectory import generate_synthetic_corpus
|
||||
|
||||
corpus = generate_synthetic_corpus(n_sessions=30, seed=42)
|
||||
metric_keys: set[str] = set()
|
||||
for s in corpus:
|
||||
for k in s["trajectory_metrics"]:
|
||||
metric_keys.add(k)
|
||||
assert metric_keys >= {"m1", "m2", "m3", "m4", "m5", "m6"}
|
||||
|
||||
|
||||
def test_trajectory_bench_runs_over_corpus(tmp_path):
|
||||
from bench.trajectory import (
|
||||
generate_synthetic_corpus,
|
||||
run_trajectory_bench,
|
||||
)
|
||||
|
||||
corpus = generate_synthetic_corpus(n_sessions=6, seed=42)
|
||||
out = run_trajectory_bench(corpus, store_path=tmp_path)
|
||||
assert "m1_trend" in out
|
||||
assert "m2_trend" in out
|
||||
assert "m3_trend" in out
|
||||
assert "m4_trend" in out
|
||||
assert "m5_trend" in out
|
||||
assert "m6_trend" in out
|
||||
assert "passed" in out
|
||||
|
||||
|
||||
def test_trajectory_bench_main_runs(tmp_path, capsys):
|
||||
from bench.trajectory import main
|
||||
|
||||
# Main defaults to synthetic; tiny n_sessions for CI speed.
|
||||
code = main(n_sessions=5, store_path=tmp_path)
|
||||
assert code in (0, 1)
|
||||
|
||||
|
||||
def test_trajectory_bench_accepts_real_logs_flag(tmp_path):
|
||||
"""CLI flag accepts --real-logs=path; when absent, falls back to synthetic."""
|
||||
from bench.trajectory import main
|
||||
|
||||
# Missing path -> falls back to synthetic.
|
||||
code = main(
|
||||
n_sessions=3, real_logs_path=None, store_path=tmp_path,
|
||||
)
|
||||
assert code in (0, 1)
|
||||
Loading…
Add table
Add a link
Reference in a new issue