diff --git a/cli/planoai/obs/collector.py b/cli/planoai/obs/collector.py index 9308c4b6..92875369 100644 --- a/cli/planoai/obs/collector.py +++ b/cli/planoai/obs/collector.py @@ -15,7 +15,6 @@ from opentelemetry.proto.collector.trace.v1 import ( trace_service_pb2_grpc, ) - DEFAULT_GRPC_PORT = 4317 DEFAULT_CAPACITY = 1000 @@ -198,9 +197,9 @@ def span_to_llm_call( route_name=( str(attrs[_PLANO_ROUTE_NAME]) if _PLANO_ROUTE_NAME in attrs else None ), - is_streaming=bool(attrs[_LLM_IS_STREAMING]) - if _LLM_IS_STREAMING in attrs - else None, + is_streaming=( + bool(attrs[_LLM_IS_STREAMING]) if _LLM_IS_STREAMING in attrs else None + ), status_code=_maybe_int(attrs.get(_HTTP_STATUS)), prompt_tokens=_maybe_int(attrs.get(_LLM_PROMPT_TOKENS)), completion_tokens=_maybe_int(attrs.get(_LLM_COMPLETION_TOKENS)), diff --git a/cli/planoai/obs/pricing.py b/cli/planoai/obs/pricing.py index 5c36c751..406b2cad 100644 --- a/cli/planoai/obs/pricing.py +++ b/cli/planoai/obs/pricing.py @@ -13,7 +13,6 @@ from typing import Any import requests - DEFAULT_PRICING_URL = "https://api.digitalocean.com/v2/gen-ai/models/catalog" FETCH_TIMEOUT_SECS = 5.0 diff --git a/cli/planoai/obs/render.py b/cli/planoai/obs/render.py index 47a3742e..602b8aed 100644 --- a/cli/planoai/obs/render.py +++ b/cli/planoai/obs/render.py @@ -271,7 +271,11 @@ def _recent_table(calls: list[LLMCall], limit: int = 15) -> Table: recent = list(reversed(calls))[:limit] for c in recent: - status_cell = "ok" if c.status_code and 200 <= c.status_code < 400 else str(c.status_code or "—") + status_cell = ( + "ok" + if c.status_code and 200 <= c.status_code < 400 + else str(c.status_code or "—") + ) row = [ c.timestamp.strftime("%H:%M:%S"), c.model, diff --git a/cli/test/test_obs_collector.py b/cli/test/test_obs_collector.py index 6a503337..a16506d9 100644 --- a/cli/test/test_obs_collector.py +++ b/cli/test/test_obs_collector.py @@ -28,7 +28,9 @@ def _mk_attr(key: str, value): return kv -def _mk_span(attrs: dict, start_ns: int | None = None, span_id_hex: str = "ab") -> MagicMock: +def _mk_span( + attrs: dict, start_ns: int | None = None, span_id_hex: str = "ab" +) -> MagicMock: span = MagicMock() span.attributes = [_mk_attr(k, v) for k, v in attrs.items()] span.start_time_unix_nano = start_ns or int(time.time() * 1_000_000_000) @@ -84,7 +86,9 @@ def test_pricing_lookup_attaches_cost(): class StubPricing: def cost_for_call(self, call): # Simple: 2 * prompt + 3 * completion, in cents - return 0.02 * (call.prompt_tokens or 0) + 0.03 * (call.completion_tokens or 0) + return 0.02 * (call.prompt_tokens or 0) + 0.03 * ( + call.completion_tokens or 0 + ) span = _mk_span( { diff --git a/cli/test/test_obs_render.py b/cli/test/test_obs_render.py index b6438e63..11f4a1fc 100644 --- a/cli/test/test_obs_render.py +++ b/cli/test/test_obs_render.py @@ -4,7 +4,17 @@ from planoai.obs.collector import LLMCall from planoai.obs.render import aggregates, model_rollups, route_hits -def _call(model: str, ts: datetime, prompt=0, completion=0, cost=None, route=None, session=None, cache_read=0, cache_write=0): +def _call( + model: str, + ts: datetime, + prompt=0, + completion=0, + cost=None, + route=None, + session=None, + cache_read=0, + cache_write=0, +): return LLMCall( request_id="r", timestamp=ts, @@ -22,9 +32,30 @@ def _call(model: str, ts: datetime, prompt=0, completion=0, cost=None, route=Non def test_aggregates_sum_and_session_counts(): now = datetime.now(tz=timezone.utc).astimezone() calls = [ - _call("m1", now - timedelta(seconds=50), prompt=10, completion=5, cost=0.001, session="s1"), - _call("m2", now - timedelta(seconds=40), prompt=20, completion=10, cost=0.002, session="s1"), - _call("m1", now - timedelta(seconds=30), prompt=30, completion=15, cost=0.003, session="s2"), + _call( + "m1", + now - timedelta(seconds=50), + prompt=10, + completion=5, + cost=0.001, + session="s1", + ), + _call( + "m2", + now - timedelta(seconds=40), + prompt=20, + completion=10, + cost=0.002, + session="s1", + ), + _call( + "m1", + now - timedelta(seconds=30), + prompt=30, + completion=15, + cost=0.003, + session="s2", + ), ] stats = aggregates(calls) assert stats.count == 3 @@ -38,7 +69,9 @@ def test_aggregates_sum_and_session_counts(): def test_rollups_split_by_model_and_cache(): now = datetime.now(tz=timezone.utc).astimezone() calls = [ - _call("m1", now, prompt=10, completion=5, cost=0.001, cache_write=3, cache_read=7), + _call( + "m1", now, prompt=10, completion=5, cost=0.001, cache_write=3, cache_read=7 + ), _call("m1", now, prompt=20, completion=10, cost=0.002, cache_read=1), _call("m2", now, prompt=30, completion=15, cost=0.004), ]