mirror of
https://github.com/katanemo/plano.git
synced 2026-05-03 12:52:56 +02:00
add Redis session affinity demos (Docker Compose and Kubernetes)
This commit is contained in:
parent
50670f843d
commit
90810078da
20 changed files with 2080 additions and 0 deletions
36
demos/llm_routing/session_affinity_redis/config.yaml
Normal file
36
demos/llm_routing/session_affinity_redis/config.yaml
Normal file
|
|
@ -0,0 +1,36 @@
|
|||
version: v0.4.0
|
||||
|
||||
listeners:
|
||||
- type: model
|
||||
name: model_listener
|
||||
port: 12000
|
||||
|
||||
model_providers:
|
||||
- model: openai/gpt-4o-mini
|
||||
access_key: $OPENAI_API_KEY
|
||||
default: true
|
||||
|
||||
- model: openai/gpt-5.2
|
||||
access_key: $OPENAI_API_KEY
|
||||
|
||||
routing_preferences:
|
||||
- name: fast_responses
|
||||
description: short factual questions, quick lookups, simple summarization, or greetings
|
||||
models:
|
||||
- openai/gpt-4o-mini
|
||||
|
||||
- name: deep_reasoning
|
||||
description: multi-step reasoning, complex analysis, code review, or detailed explanations
|
||||
models:
|
||||
- openai/gpt-5.2
|
||||
- openai/gpt-4o-mini
|
||||
|
||||
routing:
|
||||
session_ttl_seconds: 300
|
||||
session_cache:
|
||||
type: redis
|
||||
url: redis://localhost:6379
|
||||
|
||||
tracing:
|
||||
random_sampling: 100
|
||||
trace_arch_internal: true
|
||||
Loading…
Add table
Add a link
Reference in a new issue