mirror of
https://github.com/katanemo/plano.git
synced 2026-06-26 15:39:40 +02:00
feat: make model pricing source configurable (models.dev + DigitalOcean) (#971)
This commit is contained in:
parent
5cc4c4ee77
commit
558df0307c
9 changed files with 687 additions and 48 deletions
|
|
@ -86,6 +86,24 @@ routing_preferences:
|
|||
selection_policy:
|
||||
prefer: cheapest
|
||||
|
||||
# model_metrics_sources: external catalogs the router reads to reorder candidate
|
||||
# models for selection_policy.prefer. A `cost` source ranks `prefer: cheapest`;
|
||||
# a `latency` source ranks `prefer: fastest`. Both are optional.
|
||||
model_metrics_sources:
|
||||
# Cost catalog. provider: models.dev | digitalocean (default url per provider).
|
||||
- type: cost
|
||||
provider: models.dev
|
||||
url: https://models.dev/api.json # optional; omit to use the provider default
|
||||
refresh_interval: 3600 # optional, seconds
|
||||
model_aliases: # optional: catalog key -> Plano model name
|
||||
openai/gpt-oss-120b: openai/gpt-4o
|
||||
# Latency catalog (Prometheus). Used for selection_policy.prefer: fastest.
|
||||
- type: latency
|
||||
provider: prometheus
|
||||
url: http://prometheus:9090
|
||||
query: avg by (model_name) (rate(plano_llm_latency_seconds_sum[5m]))
|
||||
refresh_interval: 60
|
||||
|
||||
# HTTP listeners - entry points for agent routing, prompt targets, and direct LLM access
|
||||
listeners:
|
||||
# Agent listener for routing requests to multiple agents
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue