Initial release: iai-mcp v0.1.0
Co-Authored-By: Claude <noreply@anthropic.com> Co-Authored-By: XNLLLLH <XNLLLLH@users.noreply.github.com>
This commit is contained in:
commit
f6b876fbe7
332 changed files with 97258 additions and 0 deletions
167
tests/test_bench_total_session_cost_adapters.py
Normal file
167
tests/test_bench_total_session_cost_adapters.py
Normal file
|
|
@ -0,0 +1,167 @@
|
|||
"""Plan 05-06 Task 3 — mempalace / claude-mem subprocess adapters in
|
||||
``bench/total_session_cost.py``.
|
||||
|
||||
These adapters let the reference column carry a live measurement
|
||||
from the mempalace CLI when it is installed locally, falling back to
|
||||
honest "adapter unavailable" disclosure when absent. They never block
|
||||
the bench: subprocess timeouts and non-zero exits return None and emit
|
||||
a ``bench_adapter_unavailable`` stderr event.
|
||||
|
||||
Covered contracts:
|
||||
|
||||
Test 1 _run_mempalace_adapter signature exists and accepts the 10-turn script
|
||||
Test 2 mempalace CLI absent -> None + stderr event, no exception
|
||||
Test 3 mempalace CLI present -> sums per-turn token counts via the 3-tier counter
|
||||
Test 4 --measure-mempalace flag wires the live adapter into refs["mempalace_measured"]
|
||||
Test 5 _run_claude_mem_adapter mirrors mempalace shape for forward compat
|
||||
Test 6 manual --ref-mempalace alongside --measure-mempalace keeps both values,
|
||||
but LIVE measurement is the comparator for the `passed` flag
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import subprocess
|
||||
from unittest import mock
|
||||
|
||||
import pytest
|
||||
|
||||
from bench.total_session_cost import (
|
||||
_SCRIPT,
|
||||
_run_claude_mem_adapter,
|
||||
_run_mempalace_adapter,
|
||||
main,
|
||||
run_total_session_cost,
|
||||
)
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------- helpers
|
||||
|
||||
|
||||
def _fixed_counter(text: str) -> int:
|
||||
"""Deterministic counter: 1 token per word. Keeps assertions stable
|
||||
across tiktoken / anthropic / char4 drift."""
|
||||
return max(1, len(text.split()))
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------- Test 1
|
||||
|
||||
|
||||
def test_mempalace_adapter_signature():
|
||||
# Signature must accept the canonical 10-turn script and a counter.
|
||||
result = _run_mempalace_adapter(_SCRIPT, _fixed_counter)
|
||||
# Will be None on a machine without mempalace *responding cleanly*, but
|
||||
# the function must exist and not raise — callers depend on that contract.
|
||||
assert result is None or isinstance(result, int)
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------- Test 2
|
||||
|
||||
|
||||
def test_mempalace_adapter_absent_cli_returns_none(capsys):
|
||||
with mock.patch("bench.total_session_cost.shutil.which", return_value=None):
|
||||
result = _run_mempalace_adapter(_SCRIPT, _fixed_counter)
|
||||
assert result is None
|
||||
err = capsys.readouterr().err
|
||||
assert "bench_adapter_unavailable" in err
|
||||
assert "mempalace" in err
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------- Test 3
|
||||
|
||||
|
||||
def test_mempalace_adapter_live_run_sums_stdout_tokens():
|
||||
"""With ``shutil.which`` finding the CLI and ``subprocess.run`` returning
|
||||
deterministic stdout, the adapter sums the token counts across all 10
|
||||
turns using the injected counter."""
|
||||
|
||||
def fake_which(name):
|
||||
return "/fake/bin/mempalace" if name == "mempalace" else None
|
||||
|
||||
def fake_run(*args, **kwargs):
|
||||
# stdout carries 3 words per turn -> 3 tokens per turn under _fixed_counter.
|
||||
return subprocess.CompletedProcess(
|
||||
args=args[0] if args else [],
|
||||
returncode=0,
|
||||
stdout="one two three",
|
||||
stderr="",
|
||||
)
|
||||
|
||||
with mock.patch("bench.total_session_cost.shutil.which", side_effect=fake_which), \
|
||||
mock.patch("bench.total_session_cost.subprocess.run", side_effect=fake_run):
|
||||
result = _run_mempalace_adapter(_SCRIPT, _fixed_counter)
|
||||
assert result == 3 * len(_SCRIPT)
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------- Test 4
|
||||
|
||||
|
||||
def test_measure_mempalace_flag_populates_refs(monkeypatch, capsys):
|
||||
"""End-to-end: running `main` with --measure-mempalace populates
|
||||
refs["mempalace_measured"] when the adapter returns a number."""
|
||||
|
||||
def fake_which(name):
|
||||
return "/fake/bin/mempalace" if name == "mempalace" else None
|
||||
|
||||
def fake_run(*args, **kwargs):
|
||||
return subprocess.CompletedProcess(
|
||||
args=args[0] if args else [],
|
||||
returncode=0,
|
||||
stdout="hello world",
|
||||
stderr="",
|
||||
)
|
||||
|
||||
with mock.patch("bench.total_session_cost.shutil.which", side_effect=fake_which), \
|
||||
mock.patch("bench.total_session_cost.subprocess.run", side_effect=fake_run):
|
||||
rc = main(["--wake-depth", "minimal", "--measure-mempalace"])
|
||||
|
||||
captured = capsys.readouterr()
|
||||
result = json.loads(captured.out.strip())
|
||||
assert "mempalace_measured" in result["refs"]
|
||||
assert isinstance(result["refs"]["mempalace_measured"], int)
|
||||
assert result["refs"]["mempalace_measured"] > 0
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------- Test 5
|
||||
|
||||
|
||||
def test_claude_mem_adapter_mirrors_mempalace_shape(capsys):
|
||||
"""The claude-mem adapter has the same signature and absent-CLI fallback
|
||||
as the mempalace adapter, even though claude-mem is not installed
|
||||
locally. This keeps the forward-compat path live."""
|
||||
with mock.patch("bench.total_session_cost.shutil.which", return_value=None):
|
||||
result = _run_claude_mem_adapter(_SCRIPT, _fixed_counter)
|
||||
assert result is None
|
||||
err = capsys.readouterr().err
|
||||
assert "bench_adapter_unavailable" in err
|
||||
assert "claude-mem" in err
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------- Test 6
|
||||
|
||||
|
||||
def test_live_measurement_wins_over_manual_ref():
|
||||
"""When both ``--measure-mempalace`` and ``--ref-mempalace <int>`` are
|
||||
supplied, the live measurement lands in ``refs["mempalace_measured"]``
|
||||
and is the comparator for ``passed``; the manual int is recorded in
|
||||
``refs["mempalace_manual"]`` for audit trail."""
|
||||
|
||||
with mock.patch("bench.total_session_cost.shutil.which",
|
||||
side_effect=lambda n: "/fake/bin/mempalace" if n == "mempalace" else None), \
|
||||
mock.patch("bench.total_session_cost.subprocess.run",
|
||||
return_value=subprocess.CompletedProcess(
|
||||
args=[], returncode=0,
|
||||
stdout="token " * 5000, # 5000 tokens across 10 turns
|
||||
stderr="",
|
||||
)):
|
||||
result = run_total_session_cost(
|
||||
wake_depth="minimal",
|
||||
mempalace_ref=10, # manual ref — deliberately tiny to force fail IF used
|
||||
measure_mempalace=True,
|
||||
count_tokens_fn=_fixed_counter,
|
||||
)
|
||||
assert "mempalace_measured" in result["refs"]
|
||||
assert "mempalace_manual" in result["refs"]
|
||||
assert result["refs"]["mempalace_manual"] == 10
|
||||
# LIVE measurement is the gate; with 50000+ tokens live, IAI total
|
||||
# (<~3000) is well below, so passed is True.
|
||||
assert result["passed"] is True
|
||||
Loading…
Add table
Add a link
Reference in a new issue