plano/cli/test/test_obs_collector.py
2026-04-17 14:03:47 -07:00

145 lines
4.3 KiB
Python

import time
from datetime import datetime, timezone
from types import SimpleNamespace
from unittest.mock import MagicMock
import pytest
from planoai.obs.collector import LLMCall, LLMCallStore, span_to_llm_call
def _mk_attr(key: str, value):
v = MagicMock()
if isinstance(value, bool):
v.WhichOneof.return_value = "bool_value"
v.bool_value = value
elif isinstance(value, int):
v.WhichOneof.return_value = "int_value"
v.int_value = value
elif isinstance(value, float):
v.WhichOneof.return_value = "double_value"
v.double_value = value
else:
v.WhichOneof.return_value = "string_value"
v.string_value = str(value)
kv = MagicMock()
kv.key = key
kv.value = v
return kv
def _mk_span(
attrs: dict, start_ns: int | None = None, span_id_hex: str = "ab"
) -> MagicMock:
span = MagicMock()
span.attributes = [_mk_attr(k, v) for k, v in attrs.items()]
span.start_time_unix_nano = start_ns or int(time.time() * 1_000_000_000)
span.span_id.hex.return_value = span_id_hex
return span
def test_span_without_llm_model_is_ignored():
span = _mk_span({"http.method": "POST"})
assert span_to_llm_call(span, "plano(llm)") is None
def test_span_with_full_llm_attrs_produces_call():
span = _mk_span(
{
"llm.model": "openai-gpt-5.4",
"model.requested": "router:software-engineering",
"plano.session_id": "sess-abc",
"plano.route.name": "software-engineering",
"llm.is_streaming": False,
"llm.duration_ms": 1234,
"llm.time_to_first_token": 210,
"llm.usage.prompt_tokens": 100,
"llm.usage.completion_tokens": 50,
"llm.usage.total_tokens": 150,
"llm.usage.cached_input_tokens": 30,
"llm.usage.cache_creation_tokens": 5,
"llm.usage.reasoning_tokens": 200,
"http.status_code": 200,
"request_id": "req-42",
}
)
call = span_to_llm_call(span, "plano(llm)")
assert call is not None
assert call.request_id == "req-42"
assert call.model == "openai-gpt-5.4"
assert call.request_model == "router:software-engineering"
assert call.session_id == "sess-abc"
assert call.route_name == "software-engineering"
assert call.is_streaming is False
assert call.duration_ms == 1234.0
assert call.ttft_ms == 210.0
assert call.prompt_tokens == 100
assert call.completion_tokens == 50
assert call.total_tokens == 150
assert call.cached_input_tokens == 30
assert call.cache_creation_tokens == 5
assert call.reasoning_tokens == 200
assert call.status_code == 200
def test_pricing_lookup_attaches_cost():
class StubPricing:
def cost_for_call(self, call):
# Simple: 2 * prompt + 3 * completion, in cents
return 0.02 * (call.prompt_tokens or 0) + 0.03 * (
call.completion_tokens or 0
)
span = _mk_span(
{
"llm.model": "do/openai-gpt-5.4",
"llm.usage.prompt_tokens": 10,
"llm.usage.completion_tokens": 2,
}
)
call = span_to_llm_call(span, "plano(llm)", pricing=StubPricing())
assert call is not None
assert call.cost_usd == pytest.approx(0.26)
def test_tpt_and_tokens_per_sec_derived():
call = LLMCall(
request_id="x",
timestamp=datetime.now(tz=timezone.utc),
model="m",
duration_ms=1000,
ttft_ms=200,
completion_tokens=80,
)
# (1000 - 200) / 80 = 10ms per token => 100 tokens/sec
assert call.tpt_ms == 10.0
assert call.tokens_per_sec == 100.0
def test_tpt_returns_none_when_no_completion_tokens():
call = LLMCall(
request_id="x",
timestamp=datetime.now(tz=timezone.utc),
model="m",
duration_ms=1000,
ttft_ms=200,
completion_tokens=0,
)
assert call.tpt_ms is None
assert call.tokens_per_sec is None
def test_store_evicts_fifo_at_capacity():
store = LLMCallStore(capacity=3)
now = datetime.now(tz=timezone.utc)
for i in range(5):
store.add(
LLMCall(
request_id=f"r{i}",
timestamp=now,
model="m",
)
)
snap = store.snapshot()
assert len(snap) == 3
assert [c.request_id for c in snap] == ["r2", "r3", "r4"]