mirror of
https://github.com/katanemo/plano.git
synced 2026-05-24 14:05:14 +02:00
add dynamic log config
This commit is contained in:
parent
f019f05738
commit
f04c0b7cdd
9 changed files with 634 additions and 5 deletions
27
demos/llm_routing/model_routing_service/config_metrics.yaml
Normal file
27
demos/llm_routing/model_routing_service/config_metrics.yaml
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
|
||||
version: v0.4.0
|
||||
|
||||
overrides:
|
||||
llm_routing_model: plano/Plano-Orchestrator
|
||||
agent_orchestration_model: plano/Plano-Orchestrator
|
||||
|
||||
listeners:
|
||||
- type: model
|
||||
name: model_listener
|
||||
port: 12000
|
||||
|
||||
model_providers:
|
||||
- model: plano/Plano-Orchestrator
|
||||
base_url: http://plano-orchestrator:10001
|
||||
passthrough_auth: true
|
||||
|
||||
model_metrics_sources:
|
||||
- type: cost
|
||||
provider: digitalocean
|
||||
refresh_interval: 3600
|
||||
|
||||
- type: latency
|
||||
provider: prometheus
|
||||
url: http://metrics-kube-prometheus-st-prometheus:9090
|
||||
query: histogram_quantile(0.95, sum by (model_name, le) (rate(inference_proxy_inference_client_ttft_duration_bucket[5m])))
|
||||
refresh_interval: 300
|
||||
Loading…
Add table
Add a link
Reference in a new issue