iai-mcp-opencode/tests/test_bench_total_session_cost_adapters.py

"""Plan 05-06 Task 3 — mempalace / claude-mem subprocess adapters in
``bench/total_session_cost.py``.

These adapters let the reference column carry a live measurement
from the mempalace CLI when it is installed locally, falling back to
honest "adapter unavailable" disclosure when absent. They never block
the bench: subprocess timeouts and non-zero exits return None and emit
a ``bench_adapter_unavailable`` stderr event.

Covered contracts:

    Test 1  _run_mempalace_adapter signature exists and accepts the 10-turn script
    Test 2  mempalace CLI absent -> None + stderr event, no exception
    Test 3  mempalace CLI present -> sums per-turn token counts via the 3-tier counter
    Test 4  --measure-mempalace flag wires the live adapter into refs["mempalace_measured"]
    Test 5  _run_claude_mem_adapter mirrors mempalace shape for forward compat
    Test 6  manual --ref-mempalace alongside --measure-mempalace keeps both values,
            but LIVE measurement is the comparator for the `passed` flag
"""
from __future__ import annotations

import json
import subprocess
from unittest import mock

import pytest

from bench.total_session_cost import (
    _SCRIPT,
    _run_claude_mem_adapter,
    _run_mempalace_adapter,
    main,
    run_total_session_cost,
)


# --------------------------------------------------------------------------- helpers


def _fixed_counter(text: str) -> int:
    """Deterministic counter: 1 token per word. Keeps assertions stable
    across tiktoken / anthropic / char4 drift."""
    return max(1, len(text.split()))


# --------------------------------------------------------------------------- Test 1


def test_mempalace_adapter_signature():
    # Signature must accept the canonical 10-turn script and a counter.
    result = _run_mempalace_adapter(_SCRIPT, _fixed_counter)
    # Will be None on a machine without mempalace *responding cleanly*, but
    # the function must exist and not raise — callers depend on that contract.
    assert result is None or isinstance(result, int)


# --------------------------------------------------------------------------- Test 2


def test_mempalace_adapter_absent_cli_returns_none(capsys):
    with mock.patch("bench.total_session_cost.shutil.which", return_value=None):
        result = _run_mempalace_adapter(_SCRIPT, _fixed_counter)
    assert result is None
    err = capsys.readouterr().err
    assert "bench_adapter_unavailable" in err
    assert "mempalace" in err


# --------------------------------------------------------------------------- Test 3


def test_mempalace_adapter_live_run_sums_stdout_tokens():
    """With ``shutil.which`` finding the CLI and ``subprocess.run`` returning
    deterministic stdout, the adapter sums the token counts across all 10
    turns using the injected counter."""

    def fake_which(name):
        return "/fake/bin/mempalace" if name == "mempalace" else None

    def fake_run(*args, **kwargs):
        # stdout carries 3 words per turn -> 3 tokens per turn under _fixed_counter.
        return subprocess.CompletedProcess(
            args=args[0] if args else [],
            returncode=0,
            stdout="one two three",
            stderr="",
        )

    with mock.patch("bench.total_session_cost.shutil.which", side_effect=fake_which), \
         mock.patch("bench.total_session_cost.subprocess.run", side_effect=fake_run):
        result = _run_mempalace_adapter(_SCRIPT, _fixed_counter)
    assert result == 3 * len(_SCRIPT)


# --------------------------------------------------------------------------- Test 4


def test_measure_mempalace_flag_populates_refs(monkeypatch, capsys):
    """End-to-end: running `main` with --measure-mempalace populates
    refs["mempalace_measured"] when the adapter returns a number."""

    def fake_which(name):
        return "/fake/bin/mempalace" if name == "mempalace" else None

    def fake_run(*args, **kwargs):
        return subprocess.CompletedProcess(
            args=args[0] if args else [],
            returncode=0,
            stdout="hello world",
            stderr="",
        )

    with mock.patch("bench.total_session_cost.shutil.which", side_effect=fake_which), \
         mock.patch("bench.total_session_cost.subprocess.run", side_effect=fake_run):
        rc = main(["--wake-depth", "minimal", "--measure-mempalace"])

    captured = capsys.readouterr()
    result = json.loads(captured.out.strip())
    assert "mempalace_measured" in result["refs"]
    assert isinstance(result["refs"]["mempalace_measured"], int)
    assert result["refs"]["mempalace_measured"] > 0


# --------------------------------------------------------------------------- Test 5


def test_claude_mem_adapter_mirrors_mempalace_shape(capsys):
    """The claude-mem adapter has the same signature and absent-CLI fallback
    as the mempalace adapter, even though claude-mem is not installed
    locally. This keeps the forward-compat path live."""
    with mock.patch("bench.total_session_cost.shutil.which", return_value=None):
        result = _run_claude_mem_adapter(_SCRIPT, _fixed_counter)
    assert result is None
    err = capsys.readouterr().err
    assert "bench_adapter_unavailable" in err
    assert "claude-mem" in err


# --------------------------------------------------------------------------- Test 6


def test_live_measurement_wins_over_manual_ref():
    """When both ``--measure-mempalace`` and ``--ref-mempalace <int>`` are
    supplied, the live measurement lands in ``refs["mempalace_measured"]``
    and is the comparator for ``passed``; the manual int is recorded in
    ``refs["mempalace_manual"]`` for audit trail."""

    with mock.patch("bench.total_session_cost.shutil.which",
                    side_effect=lambda n: "/fake/bin/mempalace" if n == "mempalace" else None), \
         mock.patch("bench.total_session_cost.subprocess.run",
                    return_value=subprocess.CompletedProcess(
                        args=[], returncode=0,
                        stdout="token " * 5000,  # 5000 tokens across 10 turns
                        stderr="",
                    )):
        result = run_total_session_cost(
            wake_depth="minimal",
            mempalace_ref=10,  # manual ref — deliberately tiny to force fail IF used
            measure_mempalace=True,
            count_tokens_fn=_fixed_counter,
        )
    assert "mempalace_measured" in result["refs"]
    assert "mempalace_manual" in result["refs"]
    assert result["refs"]["mempalace_manual"] == 10
    # LIVE measurement is the gate; with 50000+ tokens live, IAI total
    # (<~3000) is well below, so passed is True.
    assert result["passed"] is True