mirror of
https://github.com/katanemo/plano.git
synced 2026-05-01 03:46:35 +02:00
planoai obs: live LLM observability TUI (#891)
This commit is contained in:
parent
1f701258cb
commit
0f67b2c806
19 changed files with 1766 additions and 5 deletions
328
cli/planoai/obs/render.py
Normal file
328
cli/planoai/obs/render.py
Normal file
|
|
@ -0,0 +1,328 @@
|
|||
"""Rich TUI renderer for the observability console."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections import Counter
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from rich.box import SIMPLE
|
||||
from rich.columns import Columns
|
||||
from rich.console import Group
|
||||
from rich.panel import Panel
|
||||
from rich.table import Table
|
||||
from rich.text import Text
|
||||
|
||||
from planoai.obs.collector import LLMCall
|
||||
|
||||
|
||||
@dataclass
|
||||
class AggregateStats:
|
||||
count: int
|
||||
total_cost_usd: float
|
||||
total_input_tokens: int
|
||||
total_output_tokens: int
|
||||
distinct_sessions: int
|
||||
current_session: str | None
|
||||
|
||||
|
||||
@dataclass
|
||||
class ModelRollup:
|
||||
model: str
|
||||
requests: int
|
||||
input_tokens: int
|
||||
output_tokens: int
|
||||
cache_write: int
|
||||
cache_read: int
|
||||
cost_usd: float
|
||||
|
||||
|
||||
def _now() -> datetime:
|
||||
return datetime.now(tz=timezone.utc).astimezone()
|
||||
|
||||
|
||||
def aggregates(calls: list[LLMCall]) -> AggregateStats:
|
||||
total_cost = sum((c.cost_usd or 0.0) for c in calls)
|
||||
total_input = sum(int(c.prompt_tokens or 0) for c in calls)
|
||||
total_output = sum(int(c.completion_tokens or 0) for c in calls)
|
||||
session_ids = {c.session_id for c in calls if c.session_id}
|
||||
current = next(
|
||||
(c.session_id for c in reversed(calls) if c.session_id is not None), None
|
||||
)
|
||||
return AggregateStats(
|
||||
count=len(calls),
|
||||
total_cost_usd=total_cost,
|
||||
total_input_tokens=total_input,
|
||||
total_output_tokens=total_output,
|
||||
distinct_sessions=len(session_ids),
|
||||
current_session=current,
|
||||
)
|
||||
|
||||
|
||||
def model_rollups(calls: list[LLMCall]) -> list[ModelRollup]:
|
||||
buckets: dict[str, dict[str, float | int]] = {}
|
||||
for c in calls:
|
||||
key = c.model
|
||||
b = buckets.setdefault(
|
||||
key,
|
||||
{
|
||||
"requests": 0,
|
||||
"input": 0,
|
||||
"output": 0,
|
||||
"cache_write": 0,
|
||||
"cache_read": 0,
|
||||
"cost": 0.0,
|
||||
},
|
||||
)
|
||||
b["requests"] = int(b["requests"]) + 1
|
||||
b["input"] = int(b["input"]) + int(c.prompt_tokens or 0)
|
||||
b["output"] = int(b["output"]) + int(c.completion_tokens or 0)
|
||||
b["cache_write"] = int(b["cache_write"]) + int(c.cache_creation_tokens or 0)
|
||||
b["cache_read"] = int(b["cache_read"]) + int(c.cached_input_tokens or 0)
|
||||
b["cost"] = float(b["cost"]) + (c.cost_usd or 0.0)
|
||||
|
||||
rollups: list[ModelRollup] = []
|
||||
for model, b in buckets.items():
|
||||
rollups.append(
|
||||
ModelRollup(
|
||||
model=model,
|
||||
requests=int(b["requests"]),
|
||||
input_tokens=int(b["input"]),
|
||||
output_tokens=int(b["output"]),
|
||||
cache_write=int(b["cache_write"]),
|
||||
cache_read=int(b["cache_read"]),
|
||||
cost_usd=float(b["cost"]),
|
||||
)
|
||||
)
|
||||
rollups.sort(key=lambda r: r.cost_usd, reverse=True)
|
||||
return rollups
|
||||
|
||||
|
||||
def route_hits(calls: list[LLMCall]) -> list[tuple[str, int, float]]:
|
||||
counts: Counter[str] = Counter()
|
||||
for c in calls:
|
||||
if c.route_name:
|
||||
counts[c.route_name] += 1
|
||||
total = sum(counts.values())
|
||||
if total == 0:
|
||||
return []
|
||||
return [(r, n, (n / total) * 100.0) for r, n in counts.most_common()]
|
||||
|
||||
|
||||
def _fmt_cost(v: float | None) -> str:
|
||||
if v is None:
|
||||
return "—"
|
||||
if v == 0:
|
||||
return "$0"
|
||||
# Adaptive precision so tiny costs ($3.8e-5) remain readable.
|
||||
if abs(v) < 0.0001:
|
||||
return f"${v:.8f}".rstrip("0").rstrip(".")
|
||||
if abs(v) < 0.01:
|
||||
return f"${v:.6f}".rstrip("0").rstrip(".")
|
||||
return f"${v:.4f}"
|
||||
|
||||
|
||||
def _fmt_ms(v: float | None) -> str:
|
||||
if v is None:
|
||||
return "—"
|
||||
if v >= 1000:
|
||||
return f"{v / 1000:.1f}s"
|
||||
return f"{v:.0f}ms"
|
||||
|
||||
|
||||
def _fmt_int(v: int | None) -> str:
|
||||
if v is None or v == 0:
|
||||
return "—"
|
||||
return f"{v:,}"
|
||||
|
||||
|
||||
def _fmt_tokens(v: int | None) -> str:
|
||||
if v is None:
|
||||
return "—"
|
||||
return f"{v:,}"
|
||||
|
||||
|
||||
def _request_panel(last: LLMCall | None) -> Panel:
|
||||
if last is None:
|
||||
body = Text("no requests yet", style="dim")
|
||||
else:
|
||||
t = Table.grid(padding=(0, 1))
|
||||
t.add_column(style="bold cyan")
|
||||
t.add_column()
|
||||
t.add_row("Endpoint", "chat/completions")
|
||||
status = "—" if last.status_code is None else str(last.status_code)
|
||||
t.add_row("Status", status)
|
||||
t.add_row("Model", last.model)
|
||||
if last.request_model and last.request_model != last.model:
|
||||
t.add_row("Req model", last.request_model)
|
||||
if last.route_name:
|
||||
t.add_row("Route", last.route_name)
|
||||
body = t
|
||||
return Panel(body, title="[bold]Request[/]", border_style="cyan", box=SIMPLE)
|
||||
|
||||
|
||||
def _cost_panel(last: LLMCall | None) -> Panel:
|
||||
if last is None:
|
||||
body = Text("—", style="dim")
|
||||
else:
|
||||
t = Table.grid(padding=(0, 1))
|
||||
t.add_column(style="bold green")
|
||||
t.add_column()
|
||||
t.add_row("Request", _fmt_cost(last.cost_usd))
|
||||
t.add_row("Input", _fmt_tokens(last.prompt_tokens))
|
||||
t.add_row("Output", _fmt_tokens(last.completion_tokens))
|
||||
if last.cached_input_tokens:
|
||||
t.add_row("Cached", _fmt_tokens(last.cached_input_tokens))
|
||||
body = t
|
||||
return Panel(body, title="[bold]Cost[/]", border_style="green", box=SIMPLE)
|
||||
|
||||
|
||||
def _totals_panel(stats: AggregateStats) -> Panel:
|
||||
t = Table.grid(padding=(0, 1))
|
||||
t.add_column(style="bold magenta")
|
||||
t.add_column()
|
||||
t.add_column(style="bold magenta")
|
||||
t.add_column()
|
||||
t.add_row(
|
||||
"Total cost",
|
||||
_fmt_cost(stats.total_cost_usd),
|
||||
"Requests",
|
||||
str(stats.count),
|
||||
)
|
||||
t.add_row(
|
||||
"Input",
|
||||
_fmt_tokens(stats.total_input_tokens),
|
||||
"Output",
|
||||
_fmt_tokens(stats.total_output_tokens),
|
||||
)
|
||||
t.add_row(
|
||||
"Sessions",
|
||||
str(stats.distinct_sessions),
|
||||
"Current session",
|
||||
stats.current_session or "—",
|
||||
)
|
||||
return Panel(t, title="[bold]Totals[/]", border_style="magenta", box=SIMPLE)
|
||||
|
||||
|
||||
def _model_rollup_table(rollups: list[ModelRollup]) -> Table:
|
||||
table = Table(
|
||||
title="Totals by model",
|
||||
box=SIMPLE,
|
||||
header_style="bold",
|
||||
expand=True,
|
||||
)
|
||||
table.add_column("Model", style="cyan")
|
||||
table.add_column("Req", justify="right")
|
||||
table.add_column("Input", justify="right")
|
||||
table.add_column("Output", justify="right", style="green")
|
||||
table.add_column("Cache write", justify="right", style="yellow")
|
||||
table.add_column("Cache read", justify="right", style="yellow")
|
||||
table.add_column("Cost", justify="right", style="green")
|
||||
if not rollups:
|
||||
table.add_row("—", "—", "—", "—", "—", "—", "—")
|
||||
for r in rollups:
|
||||
table.add_row(
|
||||
r.model,
|
||||
str(r.requests),
|
||||
_fmt_tokens(r.input_tokens),
|
||||
_fmt_tokens(r.output_tokens),
|
||||
_fmt_int(r.cache_write),
|
||||
_fmt_int(r.cache_read),
|
||||
_fmt_cost(r.cost_usd),
|
||||
)
|
||||
return table
|
||||
|
||||
|
||||
def _route_hit_table(hits: list[tuple[str, int, float]]) -> Table:
|
||||
table = Table(
|
||||
title="Route hit %",
|
||||
box=SIMPLE,
|
||||
header_style="bold",
|
||||
expand=True,
|
||||
)
|
||||
table.add_column("Route", style="cyan")
|
||||
table.add_column("Hits", justify="right")
|
||||
table.add_column("%", justify="right")
|
||||
for route, n, pct in hits:
|
||||
table.add_row(route, str(n), f"{pct:.1f}")
|
||||
return table
|
||||
|
||||
|
||||
def _recent_table(calls: list[LLMCall], limit: int = 15) -> Table:
|
||||
show_route = any(c.route_name for c in calls)
|
||||
table = Table(
|
||||
title="Recent requests",
|
||||
box=SIMPLE,
|
||||
header_style="bold",
|
||||
expand=True,
|
||||
)
|
||||
table.add_column("time")
|
||||
table.add_column("model", style="cyan")
|
||||
if show_route:
|
||||
table.add_column("route", style="yellow")
|
||||
table.add_column("in", justify="right")
|
||||
table.add_column("cache", justify="right", style="yellow")
|
||||
table.add_column("out", justify="right", style="green")
|
||||
table.add_column("rsn", justify="right")
|
||||
table.add_column("cost", justify="right", style="green")
|
||||
table.add_column("TTFT", justify="right")
|
||||
table.add_column("lat", justify="right")
|
||||
table.add_column("st")
|
||||
|
||||
recent = list(reversed(calls))[:limit]
|
||||
for c in recent:
|
||||
status_cell = (
|
||||
"ok"
|
||||
if c.status_code and 200 <= c.status_code < 400
|
||||
else str(c.status_code or "—")
|
||||
)
|
||||
row = [
|
||||
c.timestamp.strftime("%H:%M:%S"),
|
||||
c.model,
|
||||
]
|
||||
if show_route:
|
||||
row.append(c.route_name or "—")
|
||||
row.extend(
|
||||
[
|
||||
_fmt_tokens(c.prompt_tokens),
|
||||
_fmt_int(c.cached_input_tokens),
|
||||
_fmt_tokens(c.completion_tokens),
|
||||
_fmt_int(c.reasoning_tokens),
|
||||
_fmt_cost(c.cost_usd),
|
||||
_fmt_ms(c.ttft_ms),
|
||||
_fmt_ms(c.duration_ms),
|
||||
status_cell,
|
||||
]
|
||||
)
|
||||
table.add_row(*row)
|
||||
if not recent:
|
||||
table.add_row(*(["no requests yet"] + ["—"] * (10 if show_route else 9)))
|
||||
return table
|
||||
|
||||
|
||||
def render(calls: list[LLMCall]) -> Group:
|
||||
last = calls[-1] if calls else None
|
||||
stats = aggregates(calls)
|
||||
rollups = model_rollups(calls)
|
||||
hits = route_hits(calls)
|
||||
|
||||
header = Columns(
|
||||
[_request_panel(last), _cost_panel(last), _totals_panel(stats)],
|
||||
expand=True,
|
||||
equal=True,
|
||||
)
|
||||
parts = [
|
||||
header,
|
||||
_model_rollup_table(rollups),
|
||||
]
|
||||
if hits:
|
||||
parts.append(_route_hit_table(hits))
|
||||
parts.append(_recent_table(calls))
|
||||
parts.append(
|
||||
Text(
|
||||
"q quit · c clear · waiting for spans on OTLP :4317 — brightstaff needs "
|
||||
"tracing.opentracing_grpc_endpoint=localhost:4317",
|
||||
style="dim",
|
||||
)
|
||||
)
|
||||
return Group(*parts)
|
||||
Loading…
Add table
Add a link
Reference in a new issue