mirror of
https://github.com/katanemo/plano.git
synced 2026-04-24 16:26:34 +02:00
* Update pre-commit black to latest release * Reformat Python files for new black version
52 lines
1.8 KiB
Python
52 lines
1.8 KiB
Python
"""
|
|
Demo metrics server.
|
|
|
|
Exposes two endpoints:
|
|
GET /metrics — Prometheus text format, P95 latency per model (scraped by Prometheus)
|
|
GET /costs — JSON cost data per model, compatible with cost_metrics source
|
|
"""
|
|
|
|
import json
|
|
from http.server import HTTPServer, BaseHTTPRequestHandler
|
|
|
|
PROMETHEUS_METRICS = """\
|
|
# HELP model_latency_p95_seconds P95 request latency in seconds per model
|
|
# TYPE model_latency_p95_seconds gauge
|
|
model_latency_p95_seconds{model_name="anthropic/claude-sonnet-4-20250514"} 0.85
|
|
model_latency_p95_seconds{model_name="openai/gpt-4o"} 1.20
|
|
model_latency_p95_seconds{model_name="openai/gpt-4o-mini"} 0.40
|
|
""".encode()
|
|
|
|
COST_DATA = {
|
|
"anthropic/claude-sonnet-4-20250514": {
|
|
"input_per_million": 3.0,
|
|
"output_per_million": 15.0,
|
|
},
|
|
"openai/gpt-4o": {"input_per_million": 5.0, "output_per_million": 20.0},
|
|
"openai/gpt-4o-mini": {"input_per_million": 0.15, "output_per_million": 0.6},
|
|
}
|
|
|
|
|
|
class MetricsHandler(BaseHTTPRequestHandler):
|
|
def do_GET(self):
|
|
if self.path == "/costs":
|
|
body = json.dumps(COST_DATA).encode()
|
|
self.send_response(200)
|
|
self.send_header("Content-Type", "application/json")
|
|
self.end_headers()
|
|
self.wfile.write(body)
|
|
else:
|
|
# /metrics and everything else → Prometheus format
|
|
self.send_response(200)
|
|
self.send_header("Content-Type", "text/plain; version=0.0.4; charset=utf-8")
|
|
self.end_headers()
|
|
self.wfile.write(PROMETHEUS_METRICS)
|
|
|
|
def log_message(self, fmt, *args):
|
|
pass # suppress access logs
|
|
|
|
|
|
if __name__ == "__main__":
|
|
server = HTTPServer(("", 8080), MetricsHandler)
|
|
print("metrics server listening on :8080 (/metrics, /costs)", flush=True)
|
|
server.serve_forever()
|