mirror of
https://github.com/katanemo/plano.git
synced 2026-04-25 00:36:34 +02:00
145 lines
4.3 KiB
Python
145 lines
4.3 KiB
Python
import time
|
|
from datetime import datetime, timezone
|
|
from types import SimpleNamespace
|
|
from unittest.mock import MagicMock
|
|
|
|
import pytest
|
|
|
|
from planoai.obs.collector import LLMCall, LLMCallStore, span_to_llm_call
|
|
|
|
|
|
def _mk_attr(key: str, value):
|
|
v = MagicMock()
|
|
if isinstance(value, bool):
|
|
v.WhichOneof.return_value = "bool_value"
|
|
v.bool_value = value
|
|
elif isinstance(value, int):
|
|
v.WhichOneof.return_value = "int_value"
|
|
v.int_value = value
|
|
elif isinstance(value, float):
|
|
v.WhichOneof.return_value = "double_value"
|
|
v.double_value = value
|
|
else:
|
|
v.WhichOneof.return_value = "string_value"
|
|
v.string_value = str(value)
|
|
kv = MagicMock()
|
|
kv.key = key
|
|
kv.value = v
|
|
return kv
|
|
|
|
|
|
def _mk_span(
|
|
attrs: dict, start_ns: int | None = None, span_id_hex: str = "ab"
|
|
) -> MagicMock:
|
|
span = MagicMock()
|
|
span.attributes = [_mk_attr(k, v) for k, v in attrs.items()]
|
|
span.start_time_unix_nano = start_ns or int(time.time() * 1_000_000_000)
|
|
span.span_id.hex.return_value = span_id_hex
|
|
return span
|
|
|
|
|
|
def test_span_without_llm_model_is_ignored():
|
|
span = _mk_span({"http.method": "POST"})
|
|
assert span_to_llm_call(span, "plano(llm)") is None
|
|
|
|
|
|
def test_span_with_full_llm_attrs_produces_call():
|
|
span = _mk_span(
|
|
{
|
|
"llm.model": "openai-gpt-5.4",
|
|
"model.requested": "router:software-engineering",
|
|
"plano.session_id": "sess-abc",
|
|
"plano.route.name": "software-engineering",
|
|
"llm.is_streaming": False,
|
|
"llm.duration_ms": 1234,
|
|
"llm.time_to_first_token": 210,
|
|
"llm.usage.prompt_tokens": 100,
|
|
"llm.usage.completion_tokens": 50,
|
|
"llm.usage.total_tokens": 150,
|
|
"llm.usage.cached_input_tokens": 30,
|
|
"llm.usage.cache_creation_tokens": 5,
|
|
"llm.usage.reasoning_tokens": 200,
|
|
"http.status_code": 200,
|
|
"request_id": "req-42",
|
|
}
|
|
)
|
|
call = span_to_llm_call(span, "plano(llm)")
|
|
assert call is not None
|
|
assert call.request_id == "req-42"
|
|
assert call.model == "openai-gpt-5.4"
|
|
assert call.request_model == "router:software-engineering"
|
|
assert call.session_id == "sess-abc"
|
|
assert call.route_name == "software-engineering"
|
|
assert call.is_streaming is False
|
|
assert call.duration_ms == 1234.0
|
|
assert call.ttft_ms == 210.0
|
|
assert call.prompt_tokens == 100
|
|
assert call.completion_tokens == 50
|
|
assert call.total_tokens == 150
|
|
assert call.cached_input_tokens == 30
|
|
assert call.cache_creation_tokens == 5
|
|
assert call.reasoning_tokens == 200
|
|
assert call.status_code == 200
|
|
|
|
|
|
def test_pricing_lookup_attaches_cost():
|
|
class StubPricing:
|
|
def cost_for_call(self, call):
|
|
# Simple: 2 * prompt + 3 * completion, in cents
|
|
return 0.02 * (call.prompt_tokens or 0) + 0.03 * (
|
|
call.completion_tokens or 0
|
|
)
|
|
|
|
span = _mk_span(
|
|
{
|
|
"llm.model": "do/openai-gpt-5.4",
|
|
"llm.usage.prompt_tokens": 10,
|
|
"llm.usage.completion_tokens": 2,
|
|
}
|
|
)
|
|
call = span_to_llm_call(span, "plano(llm)", pricing=StubPricing())
|
|
assert call is not None
|
|
assert call.cost_usd == pytest.approx(0.26)
|
|
|
|
|
|
def test_tpt_and_tokens_per_sec_derived():
|
|
call = LLMCall(
|
|
request_id="x",
|
|
timestamp=datetime.now(tz=timezone.utc),
|
|
model="m",
|
|
duration_ms=1000,
|
|
ttft_ms=200,
|
|
completion_tokens=80,
|
|
)
|
|
# (1000 - 200) / 80 = 10ms per token => 100 tokens/sec
|
|
assert call.tpt_ms == 10.0
|
|
assert call.tokens_per_sec == 100.0
|
|
|
|
|
|
def test_tpt_returns_none_when_no_completion_tokens():
|
|
call = LLMCall(
|
|
request_id="x",
|
|
timestamp=datetime.now(tz=timezone.utc),
|
|
model="m",
|
|
duration_ms=1000,
|
|
ttft_ms=200,
|
|
completion_tokens=0,
|
|
)
|
|
assert call.tpt_ms is None
|
|
assert call.tokens_per_sec is None
|
|
|
|
|
|
def test_store_evicts_fifo_at_capacity():
|
|
store = LLMCallStore(capacity=3)
|
|
now = datetime.now(tz=timezone.utc)
|
|
for i in range(5):
|
|
store.add(
|
|
LLMCall(
|
|
request_id=f"r{i}",
|
|
timestamp=now,
|
|
model="m",
|
|
)
|
|
)
|
|
snap = store.snapshot()
|
|
assert len(snap) == 3
|
|
assert [c.request_id for c in snap] == ["r2", "r3", "r4"]
|