import time from datetime import datetime, timezone from types import SimpleNamespace from unittest.mock import MagicMock import pytest from planoai.obs.collector import LLMCall, LLMCallStore, span_to_llm_call def _mk_attr(key: str, value): v = MagicMock() if isinstance(value, bool): v.WhichOneof.return_value = "bool_value" v.bool_value = value elif isinstance(value, int): v.WhichOneof.return_value = "int_value" v.int_value = value elif isinstance(value, float): v.WhichOneof.return_value = "double_value" v.double_value = value else: v.WhichOneof.return_value = "string_value" v.string_value = str(value) kv = MagicMock() kv.key = key kv.value = v return kv def _mk_span( attrs: dict, start_ns: int | None = None, span_id_hex: str = "ab" ) -> MagicMock: span = MagicMock() span.attributes = [_mk_attr(k, v) for k, v in attrs.items()] span.start_time_unix_nano = start_ns or int(time.time() * 1_000_000_000) span.span_id.hex.return_value = span_id_hex return span def test_span_without_llm_model_is_ignored(): span = _mk_span({"http.method": "POST"}) assert span_to_llm_call(span, "plano(llm)") is None def test_span_with_full_llm_attrs_produces_call(): span = _mk_span( { "llm.model": "openai-gpt-5.4", "model.requested": "router:software-engineering", "plano.session_id": "sess-abc", "plano.route.name": "software-engineering", "llm.is_streaming": False, "llm.duration_ms": 1234, "llm.time_to_first_token": 210, "llm.usage.prompt_tokens": 100, "llm.usage.completion_tokens": 50, "llm.usage.total_tokens": 150, "llm.usage.cached_input_tokens": 30, "llm.usage.cache_creation_tokens": 5, "llm.usage.reasoning_tokens": 200, "http.status_code": 200, "request_id": "req-42", } ) call = span_to_llm_call(span, "plano(llm)") assert call is not None assert call.request_id == "req-42" assert call.model == "openai-gpt-5.4" assert call.request_model == "router:software-engineering" assert call.session_id == "sess-abc" assert call.route_name == "software-engineering" assert call.is_streaming is False assert call.duration_ms == 1234.0 assert call.ttft_ms == 210.0 assert call.prompt_tokens == 100 assert call.completion_tokens == 50 assert call.total_tokens == 150 assert call.cached_input_tokens == 30 assert call.cache_creation_tokens == 5 assert call.reasoning_tokens == 200 assert call.status_code == 200 def test_pricing_lookup_attaches_cost(): class StubPricing: def cost_for_call(self, call): # Simple: 2 * prompt + 3 * completion, in cents return 0.02 * (call.prompt_tokens or 0) + 0.03 * ( call.completion_tokens or 0 ) span = _mk_span( { "llm.model": "do/openai-gpt-5.4", "llm.usage.prompt_tokens": 10, "llm.usage.completion_tokens": 2, } ) call = span_to_llm_call(span, "plano(llm)", pricing=StubPricing()) assert call is not None assert call.cost_usd == pytest.approx(0.26) def test_tpt_and_tokens_per_sec_derived(): call = LLMCall( request_id="x", timestamp=datetime.now(tz=timezone.utc), model="m", duration_ms=1000, ttft_ms=200, completion_tokens=80, ) # (1000 - 200) / 80 = 10ms per token => 100 tokens/sec assert call.tpt_ms == 10.0 assert call.tokens_per_sec == 100.0 def test_tpt_returns_none_when_no_completion_tokens(): call = LLMCall( request_id="x", timestamp=datetime.now(tz=timezone.utc), model="m", duration_ms=1000, ttft_ms=200, completion_tokens=0, ) assert call.tpt_ms is None assert call.tokens_per_sec is None def test_store_evicts_fifo_at_capacity(): store = LLMCallStore(capacity=3) now = datetime.now(tz=timezone.utc) for i in range(5): store.add( LLMCall( request_id=f"r{i}", timestamp=now, model="m", ) ) snap = store.snapshot() assert len(snap) == 3 assert [c.request_id for c in snap] == ["r2", "r3", "r4"]