plano/demos/llm_routing/model_routing_service/config.yaml

54 lines
1.4 KiB
YAML

version: v0.4.0
listeners:
- type: model
name: model_listener
port: 12000
model_providers:
- model: openai/gpt-4o-mini
access_key: $OPENAI_API_KEY
default: true
- model: openai/gpt-4o
access_key: $OPENAI_API_KEY
- model: anthropic/claude-sonnet-4-20250514
access_key: $ANTHROPIC_API_KEY
routing_preferences:
- name: complex_reasoning
description: complex reasoning tasks, multi-step analysis, or detailed explanations
models:
- openai/gpt-4o
- openai/gpt-4o-mini
selection_policy:
prefer: cheapest
- name: code_generation
description: generating new code, writing functions, or creating boilerplate
models:
- anthropic/claude-sonnet-4-20250514
- openai/gpt-4o
selection_policy:
prefer: fastest
model_metrics_sources:
- type: digitalocean_pricing
refresh_interval: 3600
model_aliases:
openai-gpt-4o: openai/gpt-4o
openai-gpt-4o-mini: openai/gpt-4o-mini
anthropic-claude-sonnet-4: anthropic/claude-sonnet-4-20250514
# Use cost_metrics instead of digitalocean_pricing to supply your own pricing data.
# The demo metrics_server.py exposes /costs with OpenAI and Anthropic pricing.
# - type: cost_metrics
# url: http://localhost:8080/costs
# refresh_interval: 300
- type: prometheus_metrics
url: http://localhost:9090
query: model_latency_p95_seconds
refresh_interval: 60