Initial release: iai-mcp v0.1.0
Co-Authored-By: Claude <noreply@anthropic.com> Co-Authored-By: XNLLLLH <XNLLLLH@users.noreply.github.com>
This commit is contained in:
commit
f6b876fbe7
332 changed files with 97258 additions and 0 deletions
70
tests/test_bench_ram_regression.py
Normal file
70
tests/test_bench_ram_regression.py
Normal file
|
|
@ -0,0 +1,70 @@
|
|||
"""OPS-11 regression guard: small-N RAM bench stays under threshold.
|
||||
|
||||
Plan 05-05 (D5-08) — CI-runnable guard for bench/memory_footprint.py. The
|
||||
large-N target (RSS <= 300 MB at N=10k warm on 16+ GB machine) runs
|
||||
ad-hoc from the published bench report; this test exercises the small-N path
|
||||
(N=100-500 with a 64d embedding) so CI catches harness drift without
|
||||
spinning up a 10k-record LanceDB table.
|
||||
|
||||
See:
|
||||
- bench/memory_footprint.py — the harness under guard
|
||||
- internal architecture spec
|
||||
Task 1 for the behavior contract
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
def test_memory_footprint_small_n_under_threshold(tmp_path: Path):
|
||||
"""Smoke: small-N run populates rss_mb_peak under a generous ceiling.
|
||||
|
||||
The 300 MB large-N target is NOT asserted here — a fresh LanceDB +
|
||||
NetworkX graph at N=500 already allocates more than that on macOS
|
||||
when bge-m3 is loaded via embed import. This guard only asserts that
|
||||
the harness returns a plausible positive reading and respects the
|
||||
JSON schema the BENCH_REPORT consumes.
|
||||
"""
|
||||
from bench.memory_footprint import run_memory_footprint
|
||||
|
||||
out = run_memory_footprint(n=100, store_path=tmp_path / "store", dim=64)
|
||||
|
||||
# Shape: every key promised in the module docstring is present.
|
||||
assert "n" in out
|
||||
assert "rss_mb_peak" in out
|
||||
assert "threshold_mb" in out
|
||||
assert "passed" in out
|
||||
assert "platform" in out
|
||||
|
||||
# Values: rss is a real positive reading; threshold is the design target.
|
||||
assert out["n"] == 100
|
||||
assert isinstance(out["rss_mb_peak"], float)
|
||||
assert out["rss_mb_peak"] > 0.0
|
||||
assert out["threshold_mb"] == 300.0
|
||||
|
||||
# Generous outer bound — catches a clearly broken reading (e.g. reporting
|
||||
# nanoseconds as MB). The tight 300 MB fence belongs to the large-N run.
|
||||
assert out["rss_mb_peak"] < 4000.0, (
|
||||
f"small-N RSS {out['rss_mb_peak']} MB suspicious"
|
||||
)
|
||||
|
||||
|
||||
def test_memory_footprint_main_exits_int(tmp_path: Path):
|
||||
"""CLI entry-point returns 0 or 1 (bench CI contract)."""
|
||||
from bench import memory_footprint
|
||||
|
||||
code = memory_footprint.main(argv=["--n", "50", "--dim", "32"])
|
||||
assert code in (0, 1)
|
||||
|
||||
|
||||
def test_memory_footprint_platform_units_documented(tmp_path: Path):
|
||||
"""Harness records the platform it measured on — macOS bytes vs Linux KB
|
||||
is an correctness trap; the JSON output must carry the marker so
|
||||
downstream reports can reproduce the unit conversion.
|
||||
"""
|
||||
from bench.memory_footprint import run_memory_footprint
|
||||
|
||||
out = run_memory_footprint(n=50, store_path=tmp_path / "store2", dim=32)
|
||||
assert out["platform"] in ("darwin", "linux", "win32")
|
||||
Loading…
Add table
Add a link
Reference in a new issue