plano/cli/test/test_obs_collector.py

import time
from datetime import datetime, timezone
from types import SimpleNamespace
from unittest.mock import MagicMock

import pytest

from planoai.obs.collector import LLMCall, LLMCallStore, span_to_llm_call


def _mk_attr(key: str, value):
    v = MagicMock()
    if isinstance(value, bool):
        v.WhichOneof.return_value = "bool_value"
        v.bool_value = value
    elif isinstance(value, int):
        v.WhichOneof.return_value = "int_value"
        v.int_value = value
    elif isinstance(value, float):
        v.WhichOneof.return_value = "double_value"
        v.double_value = value
    else:
        v.WhichOneof.return_value = "string_value"
        v.string_value = str(value)
    kv = MagicMock()
    kv.key = key
    kv.value = v
    return kv


def _mk_span(
    attrs: dict, start_ns: int | None = None, span_id_hex: str = "ab"
) -> MagicMock:
    span = MagicMock()
    span.attributes = [_mk_attr(k, v) for k, v in attrs.items()]
    span.start_time_unix_nano = start_ns or int(time.time() * 1_000_000_000)
    span.span_id.hex.return_value = span_id_hex
    return span


def test_span_without_llm_model_is_ignored():
    span = _mk_span({"http.method": "POST"})
    assert span_to_llm_call(span, "plano(llm)") is None


def test_span_with_full_llm_attrs_produces_call():
    span = _mk_span(
        {
            "llm.model": "openai-gpt-5.4",
            "model.requested": "router:software-engineering",
            "plano.session_id": "sess-abc",
            "plano.route.name": "software-engineering",
            "llm.is_streaming": False,
            "llm.duration_ms": 1234,
            "llm.time_to_first_token": 210,
            "llm.usage.prompt_tokens": 100,
            "llm.usage.completion_tokens": 50,
            "llm.usage.total_tokens": 150,
            "llm.usage.cached_input_tokens": 30,
            "llm.usage.cache_creation_tokens": 5,
            "llm.usage.reasoning_tokens": 200,
            "http.status_code": 200,
            "request_id": "req-42",
        }
    )
    call = span_to_llm_call(span, "plano(llm)")
    assert call is not None
    assert call.request_id == "req-42"
    assert call.model == "openai-gpt-5.4"
    assert call.request_model == "router:software-engineering"
    assert call.session_id == "sess-abc"
    assert call.route_name == "software-engineering"
    assert call.is_streaming is False
    assert call.duration_ms == 1234.0
    assert call.ttft_ms == 210.0
    assert call.prompt_tokens == 100
    assert call.completion_tokens == 50
    assert call.total_tokens == 150
    assert call.cached_input_tokens == 30
    assert call.cache_creation_tokens == 5
    assert call.reasoning_tokens == 200
    assert call.status_code == 200


def test_pricing_lookup_attaches_cost():
    class StubPricing:
        def cost_for_call(self, call):
            # Simple: 2 * prompt + 3 * completion, in cents
            return 0.02 * (call.prompt_tokens or 0) + 0.03 * (
                call.completion_tokens or 0
            )

    span = _mk_span(
        {
            "llm.model": "do/openai-gpt-5.4",
            "llm.usage.prompt_tokens": 10,
            "llm.usage.completion_tokens": 2,
        }
    )
    call = span_to_llm_call(span, "plano(llm)", pricing=StubPricing())
    assert call is not None
    assert call.cost_usd == pytest.approx(0.26)


def test_tpt_and_tokens_per_sec_derived():
    call = LLMCall(
        request_id="x",
        timestamp=datetime.now(tz=timezone.utc),
        model="m",
        duration_ms=1000,
        ttft_ms=200,
        completion_tokens=80,
    )
    # (1000 - 200) / 80 = 10ms per token => 100 tokens/sec
    assert call.tpt_ms == 10.0
    assert call.tokens_per_sec == 100.0


def test_tpt_returns_none_when_no_completion_tokens():
    call = LLMCall(
        request_id="x",
        timestamp=datetime.now(tz=timezone.utc),
        model="m",
        duration_ms=1000,
        ttft_ms=200,
        completion_tokens=0,
    )
    assert call.tpt_ms is None
    assert call.tokens_per_sec is None


def test_store_evicts_fifo_at_capacity():
    store = LLMCallStore(capacity=3)
    now = datetime.now(tz=timezone.utc)
    for i in range(5):
        store.add(
            LLMCall(
                request_id=f"r{i}",
                timestamp=now,
                model="m",
            )
        )
    snap = store.snapshot()
    assert len(snap) == 3
    assert [c.request_id for c in snap] == ["r2", "r3", "r4"]
planoai obs: live LLM observability TUI (#891) 2026-04-17 14:03:47 -07:00			`import time`
			`from datetime import datetime, timezone`
			`from types import SimpleNamespace`
			`from unittest.mock import MagicMock`

			`import pytest`

			`from planoai.obs.collector import LLMCall, LLMCallStore, span_to_llm_call`


			`def _mk_attr(key: str, value):`
			`v = MagicMock()`
			`if isinstance(value, bool):`
			`v.WhichOneof.return_value = "bool_value"`
			`v.bool_value = value`
			`elif isinstance(value, int):`
			`v.WhichOneof.return_value = "int_value"`
			`v.int_value = value`
			`elif isinstance(value, float):`
			`v.WhichOneof.return_value = "double_value"`
			`v.double_value = value`
			`else:`
			`v.WhichOneof.return_value = "string_value"`
			`v.string_value = str(value)`
			`kv = MagicMock()`
			`kv.key = key`
			`kv.value = v`
			`return kv`


			`def _mk_span(`
			`attrs: dict, start_ns: int \| None = None, span_id_hex: str = "ab"`
			`) -> MagicMock:`
			`span = MagicMock()`
			`span.attributes = [_mk_attr(k, v) for k, v in attrs.items()]`
			`span.start_time_unix_nano = start_ns or int(time.time() * 1_000_000_000)`
			`span.span_id.hex.return_value = span_id_hex`
			`return span`


			`def test_span_without_llm_model_is_ignored():`
			`span = _mk_span({"http.method": "POST"})`
			`assert span_to_llm_call(span, "plano(llm)") is None`


			`def test_span_with_full_llm_attrs_produces_call():`
			`span = _mk_span(`
			`{`
			`"llm.model": "openai-gpt-5.4",`
			`"model.requested": "router:software-engineering",`
			`"plano.session_id": "sess-abc",`
			`"plano.route.name": "software-engineering",`
			`"llm.is_streaming": False,`
			`"llm.duration_ms": 1234,`
			`"llm.time_to_first_token": 210,`
			`"llm.usage.prompt_tokens": 100,`
			`"llm.usage.completion_tokens": 50,`
			`"llm.usage.total_tokens": 150,`
			`"llm.usage.cached_input_tokens": 30,`
			`"llm.usage.cache_creation_tokens": 5,`
			`"llm.usage.reasoning_tokens": 200,`
			`"http.status_code": 200,`
			`"request_id": "req-42",`
			`}`
			`)`
			`call = span_to_llm_call(span, "plano(llm)")`
			`assert call is not None`
			`assert call.request_id == "req-42"`
			`assert call.model == "openai-gpt-5.4"`
			`assert call.request_model == "router:software-engineering"`
			`assert call.session_id == "sess-abc"`
			`assert call.route_name == "software-engineering"`
			`assert call.is_streaming is False`
			`assert call.duration_ms == 1234.0`
			`assert call.ttft_ms == 210.0`
			`assert call.prompt_tokens == 100`
			`assert call.completion_tokens == 50`
			`assert call.total_tokens == 150`
			`assert call.cached_input_tokens == 30`
			`assert call.cache_creation_tokens == 5`
			`assert call.reasoning_tokens == 200`
			`assert call.status_code == 200`


			`def test_pricing_lookup_attaches_cost():`
			`class StubPricing:`
			`def cost_for_call(self, call):`
			`# Simple: 2 * prompt + 3 * completion, in cents`
			`return 0.02 * (call.prompt_tokens or 0) + 0.03 * (`
			`call.completion_tokens or 0`
			`)`

			`span = _mk_span(`
			`{`
			`"llm.model": "do/openai-gpt-5.4",`
			`"llm.usage.prompt_tokens": 10,`
			`"llm.usage.completion_tokens": 2,`
			`}`
			`)`
			`call = span_to_llm_call(span, "plano(llm)", pricing=StubPricing())`
			`assert call is not None`
			`assert call.cost_usd == pytest.approx(0.26)`


			`def test_tpt_and_tokens_per_sec_derived():`
			`call = LLMCall(`
			`request_id="x",`
			`timestamp=datetime.now(tz=timezone.utc),`
			`model="m",`
			`duration_ms=1000,`
			`ttft_ms=200,`
			`completion_tokens=80,`
			`)`
			`# (1000 - 200) / 80 = 10ms per token => 100 tokens/sec`
			`assert call.tpt_ms == 10.0`
			`assert call.tokens_per_sec == 100.0`


			`def test_tpt_returns_none_when_no_completion_tokens():`
			`call = LLMCall(`
			`request_id="x",`
			`timestamp=datetime.now(tz=timezone.utc),`
			`model="m",`
			`duration_ms=1000,`
			`ttft_ms=200,`
			`completion_tokens=0,`
			`)`
			`assert call.tpt_ms is None`
			`assert call.tokens_per_sec is None`


			`def test_store_evicts_fifo_at_capacity():`
			`store = LLMCallStore(capacity=3)`
			`now = datetime.now(tz=timezone.utc)`
			`for i in range(5):`
			`store.add(`
			`LLMCall(`
			`request_id=f"r{i}",`
			`timestamp=now,`
			`model="m",`
			`)`
			`)`
			`snap = store.snapshot()`
			`assert len(snap) == 3`
			`assert [c.request_id for c in snap] == ["r2", "r3", "r4"]`