plano/demos/llm_routing/model_routing_service/config.yaml

49 lines
1.2 KiB
YAML

version: v0.4.0
listeners:
- type: model
name: model_listener
port: 12000
model_providers:
- model: openai/gpt-4o-mini
access_key: $OPENAI_API_KEY
default: true
- model: openai/gpt-4o
access_key: $OPENAI_API_KEY
- model: anthropic/claude-sonnet-4-20250514
access_key: $ANTHROPIC_API_KEY
routing_preferences:
- name: complex_reasoning
description: complex reasoning tasks, multi-step analysis, or detailed explanations
models:
- openai/gpt-4o
- openai/gpt-4o-mini
selection_policy:
prefer: cheapest
- name: code_generation
description: generating new code, writing functions, or creating boilerplate
models:
- anthropic/claude-sonnet-4-20250514
- openai/gpt-4o
selection_policy:
prefer: fastest
model_metrics_sources:
- type: cost
provider: digitalocean
refresh_interval: 3600
model_aliases:
openai-gpt-4o: openai/gpt-4o
openai-gpt-4o-mini: openai/gpt-4o-mini
anthropic-claude-sonnet-4: anthropic/claude-sonnet-4-20250514
- type: latency
provider: prometheus
url: http://localhost:9090
query: model_latency_p95_seconds
refresh_interval: 60