mirror of
https://github.com/katanemo/plano.git
synced 2026-05-02 04:12:56 +02:00
model routing: cost/latency ranking with ranked fallback list (#849)
This commit is contained in:
parent
3a531ce22a
commit
e5751d6b13
23 changed files with 1524 additions and 317 deletions
|
|
@ -1,4 +1,4 @@
|
|||
version: v0.3.0
|
||||
version: v0.4.0
|
||||
|
||||
listeners:
|
||||
- type: model
|
||||
|
|
@ -6,22 +6,48 @@ listeners:
|
|||
port: 12000
|
||||
|
||||
model_providers:
|
||||
|
||||
- model: openai/gpt-4o-mini
|
||||
access_key: $OPENAI_API_KEY
|
||||
default: true
|
||||
|
||||
- model: openai/gpt-4o
|
||||
access_key: $OPENAI_API_KEY
|
||||
routing_preferences:
|
||||
- name: complex_reasoning
|
||||
description: complex reasoning tasks, multi-step analysis, or detailed explanations
|
||||
|
||||
- model: anthropic/claude-sonnet-4-20250514
|
||||
access_key: $ANTHROPIC_API_KEY
|
||||
routing_preferences:
|
||||
- name: code_generation
|
||||
description: generating new code, writing functions, or creating boilerplate
|
||||
|
||||
tracing:
|
||||
random_sampling: 100
|
||||
routing_preferences:
|
||||
- name: complex_reasoning
|
||||
description: complex reasoning tasks, multi-step analysis, or detailed explanations
|
||||
models:
|
||||
- openai/gpt-4o
|
||||
- openai/gpt-4o-mini
|
||||
selection_policy:
|
||||
prefer: cheapest
|
||||
|
||||
- name: code_generation
|
||||
description: generating new code, writing functions, or creating boilerplate
|
||||
models:
|
||||
- anthropic/claude-sonnet-4-20250514
|
||||
- openai/gpt-4o
|
||||
selection_policy:
|
||||
prefer: fastest
|
||||
|
||||
model_metrics_sources:
|
||||
- type: digitalocean_pricing
|
||||
refresh_interval: 3600
|
||||
model_aliases:
|
||||
openai-gpt-4o: openai/gpt-4o
|
||||
openai-gpt-4o-mini: openai/gpt-4o-mini
|
||||
anthropic-claude-sonnet-4: anthropic/claude-sonnet-4-20250514
|
||||
|
||||
# Use cost_metrics instead of digitalocean_pricing to supply your own pricing data.
|
||||
# The demo metrics_server.py exposes /costs with OpenAI and Anthropic pricing.
|
||||
# - type: cost_metrics
|
||||
# url: http://localhost:8080/costs
|
||||
# refresh_interval: 300
|
||||
|
||||
- type: prometheus_metrics
|
||||
url: http://localhost:9090
|
||||
query: model_latency_p95_seconds
|
||||
refresh_interval: 60
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue