feat: make model pricing source configurable (models.dev + DigitalOcean) (#971)

This commit is contained in:
Musa 2026-06-24 10:14:12 -07:00 committed by GitHub
parent 5cc4c4ee77
commit 558df0307c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 687 additions and 48 deletions

View file

@ -86,6 +86,24 @@ routing_preferences:
selection_policy:
prefer: cheapest
# model_metrics_sources: external catalogs the router reads to reorder candidate
# models for selection_policy.prefer. A `cost` source ranks `prefer: cheapest`;
# a `latency` source ranks `prefer: fastest`. Both are optional.
model_metrics_sources:
# Cost catalog. provider: models.dev | digitalocean (default url per provider).
- type: cost
provider: models.dev
url: https://models.dev/api.json # optional; omit to use the provider default
refresh_interval: 3600 # optional, seconds
model_aliases: # optional: catalog key -> Plano model name
openai/gpt-oss-120b: openai/gpt-4o
# Latency catalog (Prometheus). Used for selection_policy.prefer: fastest.
- type: latency
provider: prometheus
url: http://prometheus:9090
query: avg by (model_name) (rate(plano_llm_latency_seconds_sum[5m]))
refresh_interval: 60
# HTTP listeners - entry points for agent routing, prompt targets, and direct LLM access
listeners:
# Agent listener for routing requests to multiple agents