Initial release: iai-mcp v0.1.0
Co-Authored-By: Claude <noreply@anthropic.com> Co-Authored-By: XNLLLLH <XNLLLLH@users.noreply.github.com>
This commit is contained in:
commit
f6b876fbe7
332 changed files with 97258 additions and 0 deletions
117
tests/test_bench_total_session_cost.py
Normal file
117
tests/test_bench_total_session_cost.py
Normal file
|
|
@ -0,0 +1,117 @@
|
|||
"""OPS-12 regression guard: 3-turn sanity for total_session_cost.
|
||||
|
||||
Plan 05-05 (D5-08) — CI-runnable guard for bench/total_session_cost.py.
|
||||
The full 10-turn script runs ad-hoc on this dev Mac and populates
|
||||
the published bench report rows; this test exercises the shape
|
||||
contracts and the minimal-vs-standard invariant at CI speed.
|
||||
|
||||
Acceptance contracts:
|
||||
- minimal total <= standard total (TOK-11 sanity; if not, Plan 05-03
|
||||
regressed somewhere)
|
||||
- per_turn list has exactly 10 entries (fixed D5-08 script)
|
||||
- counter mode honest-disclosed in JSON (anthropic-count-tokens |
|
||||
tiktoken-cl100k-proxy | heuristic-char4)
|
||||
- reference-gate failure flips passed=False
|
||||
|
||||
See:
|
||||
- bench/total_session_cost.py — the harness under guard
|
||||
- bench/tokens.py — 3-tier counter fallback pattern reused here
|
||||
- internal architecture spec
|
||||
Task 3 for the behavior contract
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
def test_total_session_cost_reports_per_turn():
|
||||
"""M-07 script is the fixed D5-08 10-turn sequence."""
|
||||
from bench.total_session_cost import run_total_session_cost
|
||||
|
||||
out = run_total_session_cost(wake_depth="minimal")
|
||||
|
||||
assert "per_turn" in out
|
||||
assert isinstance(out["per_turn"], list)
|
||||
assert len(out["per_turn"]) == 10, (
|
||||
f"D5-08 script has 10 turns; got {len(out['per_turn'])}"
|
||||
)
|
||||
assert out["total_tokens"] == sum(out["per_turn"])
|
||||
assert out["adapter"] == "iai-mcp"
|
||||
assert out["wake_depth"] == "minimal"
|
||||
|
||||
|
||||
def test_total_session_cost_minimal_le_standard():
|
||||
"""TOK-11 invariant: wake_depth=minimal must not cost more than
|
||||
wake_depth=standard over the same 10-turn script. If this fails,
|
||||
Plan 05-03's lazy session-start work regressed.
|
||||
"""
|
||||
from bench.total_session_cost import run_total_session_cost
|
||||
|
||||
minimal = run_total_session_cost(wake_depth="minimal")
|
||||
standard = run_total_session_cost(wake_depth="standard")
|
||||
|
||||
assert minimal["total_tokens"] <= standard["total_tokens"], (
|
||||
f"minimal {minimal['total_tokens']} > standard {standard['total_tokens']}"
|
||||
" — TOK-11 regression"
|
||||
)
|
||||
|
||||
|
||||
def test_total_session_cost_counter_mode_disclosed():
|
||||
"""BENCH_REPORT honesty: every JSON output must name the counter mode
|
||||
used so downstream reports can flag non-official numbers."""
|
||||
from bench.total_session_cost import run_total_session_cost
|
||||
|
||||
out = run_total_session_cost(wake_depth="minimal")
|
||||
assert out["mode"] in (
|
||||
"anthropic-count-tokens",
|
||||
"tiktoken-cl100k-proxy",
|
||||
"heuristic-char4",
|
||||
"injected",
|
||||
)
|
||||
|
||||
|
||||
def test_total_session_cost_fails_when_above_ref():
|
||||
"""When the reference-adapter number is explicitly lower than IAI's,
|
||||
the comparative gate flips passed=False. Tests supply an
|
||||
impossibly-low ref so the assertion is host-independent.
|
||||
"""
|
||||
from bench.total_session_cost import run_total_session_cost
|
||||
|
||||
out = run_total_session_cost(wake_depth="standard", mempalace_ref=1)
|
||||
assert out["passed"] is False
|
||||
assert out["refs"]["mempalace"] == 1
|
||||
|
||||
|
||||
def test_total_session_cost_passes_without_refs():
|
||||
"""When no reference numbers supplied, passed=True is the degenerate
|
||||
answer (the bench still records IAI totals for BENCH_REPORT to pick
|
||||
up). Honest-disclosure about ref absence lives in the report prose."""
|
||||
from bench.total_session_cost import run_total_session_cost
|
||||
|
||||
out = run_total_session_cost(wake_depth="minimal")
|
||||
assert out["passed"] is True
|
||||
assert out["refs"] == {}
|
||||
|
||||
|
||||
def test_total_session_cost_main_exits_int():
|
||||
"""CLI entry-point returns 0 or 1 (bench CI contract)."""
|
||||
from bench import total_session_cost
|
||||
|
||||
code = total_session_cost.main(argv=["--wake-depth", "minimal"])
|
||||
assert code in (0, 1)
|
||||
|
||||
|
||||
def test_total_session_cost_injected_counter():
|
||||
"""Test-only counter injection: caller can pass a deterministic
|
||||
token-count function so the test is not hostage to the proxy
|
||||
tokeniser's drift."""
|
||||
from bench.total_session_cost import run_total_session_cost
|
||||
|
||||
def _fixed(text: str) -> int:
|
||||
return max(1, len(text)) # 1-char-per-token for deterministic checks
|
||||
|
||||
out = run_total_session_cost(
|
||||
wake_depth="minimal", count_tokens_fn=_fixed,
|
||||
)
|
||||
assert out["mode"] == "injected"
|
||||
assert out["total_tokens"] >= 10 # at least 1/turn * 10 turns
|
||||
Loading…
Add table
Add a link
Reference in a new issue