diff --git a/demos/llm_routing/model_routing_service/config.yaml b/demos/llm_routing/model_routing_service/config.yaml index f79a0e30..6f20134b 100644 --- a/demos/llm_routing/model_routing_service/config.yaml +++ b/demos/llm_routing/model_routing_service/config.yaml @@ -51,4 +51,3 @@ model_metrics_sources: url: http://localhost:9090 query: model_latency_p95_seconds refresh_interval: 60 - diff --git a/demos/llm_routing/model_routing_service/metrics_server.py b/demos/llm_routing/model_routing_service/metrics_server.py index d236fe73..65a5a0a3 100644 --- a/demos/llm_routing/model_routing_service/metrics_server.py +++ b/demos/llm_routing/model_routing_service/metrics_server.py @@ -17,9 +17,12 @@ model_latency_p95_seconds{model_name="openai/gpt-4o-mini"} 0.40 """.encode() COST_DATA = { - "anthropic/claude-sonnet-4-20250514": {"input_per_million": 3.0, "output_per_million": 15.0}, - "openai/gpt-4o": {"input_per_million": 5.0, "output_per_million": 20.0}, - "openai/gpt-4o-mini": {"input_per_million": 0.15, "output_per_million": 0.6}, + "anthropic/claude-sonnet-4-20250514": { + "input_per_million": 3.0, + "output_per_million": 15.0, + }, + "openai/gpt-4o": {"input_per_million": 5.0, "output_per_million": 20.0}, + "openai/gpt-4o-mini": {"input_per_million": 0.15, "output_per_million": 0.6}, }