mirror of
https://github.com/katanemo/plano.git
synced 2026-04-25 00:36:34 +02:00
Some checks failed
CI / pre-commit (push) Has been cancelled
CI / plano-tools-tests (push) Has been cancelled
CI / native-smoke-test (push) Has been cancelled
CI / docker-build (push) Has been cancelled
CI / validate-config (push) Has been cancelled
Publish docker image (latest) / build-arm64 (push) Has been cancelled
Publish docker image (latest) / build-amd64 (push) Has been cancelled
Build and Deploy Documentation / build (push) Has been cancelled
CI / security-scan (push) Has been cancelled
CI / test-prompt-gateway (push) Has been cancelled
CI / test-model-alias-routing (push) Has been cancelled
CI / test-responses-api-with-state (push) Has been cancelled
CI / e2e-plano-tests (3.10) (push) Has been cancelled
CI / e2e-plano-tests (3.11) (push) Has been cancelled
CI / e2e-plano-tests (3.12) (push) Has been cancelled
CI / e2e-plano-tests (3.13) (push) Has been cancelled
CI / e2e-plano-tests (3.14) (push) Has been cancelled
CI / e2e-demo-preference (push) Has been cancelled
CI / e2e-demo-currency (push) Has been cancelled
Publish docker image (latest) / create-manifest (push) Has been cancelled
106 lines
2.9 KiB
Python
106 lines
2.9 KiB
Python
from datetime import datetime, timedelta, timezone
|
|
|
|
from planoai.obs.collector import LLMCall
|
|
from planoai.obs.render import aggregates, model_rollups, route_hits
|
|
|
|
|
|
def _call(
|
|
model: str,
|
|
ts: datetime,
|
|
prompt=0,
|
|
completion=0,
|
|
cost=None,
|
|
route=None,
|
|
session=None,
|
|
cache_read=0,
|
|
cache_write=0,
|
|
):
|
|
return LLMCall(
|
|
request_id="r",
|
|
timestamp=ts,
|
|
model=model,
|
|
prompt_tokens=prompt,
|
|
completion_tokens=completion,
|
|
cached_input_tokens=cache_read,
|
|
cache_creation_tokens=cache_write,
|
|
cost_usd=cost,
|
|
route_name=route,
|
|
session_id=session,
|
|
)
|
|
|
|
|
|
def test_aggregates_sum_and_session_counts():
|
|
now = datetime.now(tz=timezone.utc).astimezone()
|
|
calls = [
|
|
_call(
|
|
"m1",
|
|
now - timedelta(seconds=50),
|
|
prompt=10,
|
|
completion=5,
|
|
cost=0.001,
|
|
session="s1",
|
|
),
|
|
_call(
|
|
"m2",
|
|
now - timedelta(seconds=40),
|
|
prompt=20,
|
|
completion=10,
|
|
cost=0.002,
|
|
session="s1",
|
|
),
|
|
_call(
|
|
"m1",
|
|
now - timedelta(seconds=30),
|
|
prompt=30,
|
|
completion=15,
|
|
cost=0.003,
|
|
session="s2",
|
|
),
|
|
]
|
|
stats = aggregates(calls)
|
|
assert stats.count == 3
|
|
assert stats.total_cost_usd == 0.006
|
|
assert stats.total_input_tokens == 60
|
|
assert stats.total_output_tokens == 30
|
|
assert stats.distinct_sessions == 2
|
|
assert stats.current_session == "s2"
|
|
|
|
|
|
def test_rollups_split_by_model_and_cache():
|
|
now = datetime.now(tz=timezone.utc).astimezone()
|
|
calls = [
|
|
_call(
|
|
"m1", now, prompt=10, completion=5, cost=0.001, cache_write=3, cache_read=7
|
|
),
|
|
_call("m1", now, prompt=20, completion=10, cost=0.002, cache_read=1),
|
|
_call("m2", now, prompt=30, completion=15, cost=0.004),
|
|
]
|
|
rollups = model_rollups(calls)
|
|
by_model = {r.model: r for r in rollups}
|
|
assert by_model["m1"].requests == 2
|
|
assert by_model["m1"].input_tokens == 30
|
|
assert by_model["m1"].cache_write == 3
|
|
assert by_model["m1"].cache_read == 8
|
|
assert by_model["m2"].input_tokens == 30
|
|
|
|
|
|
def test_route_hits_only_for_routed_calls():
|
|
now = datetime.now(tz=timezone.utc).astimezone()
|
|
calls = [
|
|
_call("m", now, route="code"),
|
|
_call("m", now, route="code"),
|
|
_call("m", now, route="summarization"),
|
|
_call("m", now), # no route
|
|
]
|
|
hits = route_hits(calls)
|
|
# Only calls with route names are counted.
|
|
assert sum(h.hits for h in hits) == 3
|
|
hits_by_name = {h.route: h for h in hits}
|
|
assert hits_by_name["code"].hits == 2
|
|
assert hits_by_name["summarization"].hits == 1
|
|
|
|
|
|
def test_route_hits_empty_when_no_routes():
|
|
now = datetime.now(tz=timezone.utc).astimezone()
|
|
calls = [_call("m", now), _call("m", now)]
|
|
assert route_hits(calls) == []
|