mirror of
https://github.com/katanemo/plano.git
synced 2026-06-05 14:45:15 +02:00
add k8s deployment manifests and docs for self-hosted Arch-Router
This commit is contained in:
parent
f1b8c03e2f
commit
5b58bb60c3
7 changed files with 381 additions and 342 deletions
33
demos/llm_routing/model_routing_service/config_k8s.yaml
Normal file
33
demos/llm_routing/model_routing_service/config_k8s.yaml
Normal file
|
|
@ -0,0 +1,33 @@
|
|||
version: v0.3.0
|
||||
|
||||
overrides:
|
||||
llm_routing_model: plano/Arch-Router
|
||||
|
||||
listeners:
|
||||
- type: model
|
||||
name: model_listener
|
||||
port: 12000
|
||||
|
||||
model_providers:
|
||||
|
||||
- model: plano/Arch-Router
|
||||
base_url: http://arch-router:10000
|
||||
|
||||
- model: openai/gpt-4o-mini
|
||||
access_key: $OPENAI_API_KEY
|
||||
default: true
|
||||
|
||||
- model: openai/gpt-4o
|
||||
access_key: $OPENAI_API_KEY
|
||||
routing_preferences:
|
||||
- name: complex_reasoning
|
||||
description: complex reasoning tasks, multi-step analysis, or detailed explanations
|
||||
|
||||
- model: anthropic/claude-sonnet-4-20250514
|
||||
access_key: $ANTHROPIC_API_KEY
|
||||
routing_preferences:
|
||||
- name: code_generation
|
||||
description: generating new code, writing functions, or creating boilerplate
|
||||
|
||||
tracing:
|
||||
random_sampling: 100
|
||||
Loading…
Add table
Add a link
Reference in a new issue