mirror of
https://github.com/katanemo/plano.git
synced 2026-05-21 13:55:15 +02:00
add Redis session affinity demos (Docker Compose and Kubernetes)
This commit is contained in:
parent
50670f843d
commit
90810078da
20 changed files with 2080 additions and 0 deletions
56
demos/llm_routing/session_affinity_redis_k8s/k8s/jaeger.yaml
Normal file
56
demos/llm_routing/session_affinity_redis_k8s/k8s/jaeger.yaml
Normal file
|
|
@ -0,0 +1,56 @@
|
|||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: jaeger
|
||||
namespace: plano-demo
|
||||
labels:
|
||||
app: jaeger
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: jaeger
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: jaeger
|
||||
spec:
|
||||
containers:
|
||||
- name: jaeger
|
||||
image: jaegertracing/jaeger:2.3.0
|
||||
ports:
|
||||
- containerPort: 16686 # UI
|
||||
- containerPort: 4317 # OTLP gRPC
|
||||
- containerPort: 4318 # OTLP HTTP
|
||||
resources:
|
||||
requests:
|
||||
memory: "128Mi"
|
||||
cpu: "100m"
|
||||
limits:
|
||||
memory: "512Mi"
|
||||
cpu: "500m"
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: jaeger
|
||||
namespace: plano-demo
|
||||
labels:
|
||||
app: jaeger
|
||||
spec:
|
||||
selector:
|
||||
app: jaeger
|
||||
ports:
|
||||
- name: ui
|
||||
port: 16686
|
||||
targetPort: 16686
|
||||
- name: otlp-grpc
|
||||
port: 4317
|
||||
targetPort: 4317
|
||||
- name: otlp-http
|
||||
port: 4318
|
||||
targetPort: 4318
|
||||
---
|
||||
# NodePort for UI access from your laptop.
|
||||
# Access at: http://localhost:16686 after: kubectl port-forward svc/jaeger 16686:16686 -n plano-demo
|
||||
|
|
@ -0,0 +1,6 @@
|
|||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: plano-demo
|
||||
labels:
|
||||
app.kubernetes.io/part-of: plano-session-affinity-demo
|
||||
|
|
@ -0,0 +1,50 @@
|
|||
---
|
||||
# ConfigMap wrapping the Plano config file.
|
||||
# Regenerate after editing config_k8s.yaml:
|
||||
# kubectl create configmap plano-config \
|
||||
# --from-file=plano_config.yaml=../config_k8s.yaml \
|
||||
# -n plano-demo --dry-run=client -o yaml | kubectl apply -f -
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: plano-config
|
||||
namespace: plano-demo
|
||||
data:
|
||||
plano_config.yaml: |
|
||||
version: v0.4.0
|
||||
|
||||
listeners:
|
||||
- type: model
|
||||
name: model_listener
|
||||
port: 12000
|
||||
|
||||
model_providers:
|
||||
- model: openai/gpt-4o-mini
|
||||
access_key: $OPENAI_API_KEY
|
||||
default: true
|
||||
|
||||
- model: openai/gpt-5.2
|
||||
access_key: $OPENAI_API_KEY
|
||||
|
||||
routing_preferences:
|
||||
- name: fast_responses
|
||||
description: short factual questions, quick lookups, simple summarization, or greetings
|
||||
models:
|
||||
- openai/gpt-4o-mini
|
||||
|
||||
- name: deep_reasoning
|
||||
description: multi-step reasoning, complex analysis, code review, or detailed explanations
|
||||
models:
|
||||
- openai/gpt-5.2
|
||||
- openai/gpt-4o-mini
|
||||
|
||||
routing:
|
||||
session_ttl_seconds: 600
|
||||
session_cache:
|
||||
type: redis
|
||||
url: redis://redis.plano-demo.svc.cluster.local:6379
|
||||
|
||||
tracing:
|
||||
random_sampling: 100
|
||||
trace_arch_internal: true
|
||||
opentracing_grpc_endpoint: http://jaeger.plano-demo.svc.cluster.local:4317
|
||||
|
|
@ -0,0 +1,19 @@
|
|||
# EXAMPLE — do NOT apply this file directly.
|
||||
# Create the real secret with:
|
||||
#
|
||||
# kubectl create secret generic plano-secrets \
|
||||
# --from-literal=OPENAI_API_KEY=sk-... \
|
||||
# -n plano-demo
|
||||
#
|
||||
# Or use the deploy.sh script, which prompts for keys and creates the secret.
|
||||
#
|
||||
# If you use a secrets manager (AWS Secrets Manager, GCP Secret Manager, Vault)
|
||||
# replace this with an ExternalSecret or a CSI driver volume mount instead.
|
||||
apiVersion: v1
|
||||
kind: Secret
|
||||
metadata:
|
||||
name: plano-secrets
|
||||
namespace: plano-demo
|
||||
type: Opaque
|
||||
stringData:
|
||||
OPENAI_API_KEY: "sk-replace-me"
|
||||
130
demos/llm_routing/session_affinity_redis_k8s/k8s/plano.yaml
Normal file
130
demos/llm_routing/session_affinity_redis_k8s/k8s/plano.yaml
Normal file
|
|
@ -0,0 +1,130 @@
|
|||
---
|
||||
# Plano Deployment — 2 replicas sharing one Redis instance.
|
||||
# All replicas are stateless; routing state lives entirely in Redis.
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: plano
|
||||
namespace: plano-demo
|
||||
labels:
|
||||
app: plano
|
||||
spec:
|
||||
replicas: 2
|
||||
selector:
|
||||
matchLabels:
|
||||
app: plano
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: plano
|
||||
spec:
|
||||
containers:
|
||||
- name: plano
|
||||
# Local dev: run-local.sh sets this to plano-redis:local and loads it
|
||||
# into minikube/kind so no registry is needed.
|
||||
# Production: replace with your registry image and use imagePullPolicy: Always.
|
||||
image: plano-redis:local
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- containerPort: 12000
|
||||
name: llm-gateway
|
||||
envFrom:
|
||||
- secretRef:
|
||||
name: plano-secrets
|
||||
env:
|
||||
- name: LOG_LEVEL
|
||||
value: "info"
|
||||
- name: POD_NAME
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: metadata.name
|
||||
volumeMounts:
|
||||
- name: plano-config
|
||||
mountPath: /app/plano_config.yaml
|
||||
subPath: plano_config.yaml
|
||||
readOnly: true
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /healthz
|
||||
port: 12000
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 10
|
||||
failureThreshold: 3
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /healthz
|
||||
port: 12000
|
||||
initialDelaySeconds: 15
|
||||
periodSeconds: 30
|
||||
failureThreshold: 3
|
||||
resources:
|
||||
requests:
|
||||
memory: "512Mi"
|
||||
cpu: "250m"
|
||||
limits:
|
||||
memory: "1Gi"
|
||||
cpu: "1000m"
|
||||
volumes:
|
||||
- name: plano-config
|
||||
configMap:
|
||||
name: plano-config
|
||||
---
|
||||
# LoadBalancer Service — exposes Plano externally.
|
||||
# On minikube, run: minikube tunnel
|
||||
# On kind, use NodePort instead (see comment below).
|
||||
# On cloud providers (GKE, EKS, AKS), an external IP is assigned automatically.
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: plano
|
||||
namespace: plano-demo
|
||||
labels:
|
||||
app: plano
|
||||
spec:
|
||||
type: LoadBalancer
|
||||
selector:
|
||||
app: plano
|
||||
ports:
|
||||
- name: llm-gateway
|
||||
port: 12000
|
||||
targetPort: 12000
|
||||
---
|
||||
# Uncomment and use instead of the LoadBalancer above when running on kind/minikube
|
||||
# without tunnel:
|
||||
#
|
||||
# apiVersion: v1
|
||||
# kind: Service
|
||||
# metadata:
|
||||
# name: plano
|
||||
# namespace: plano-demo
|
||||
# spec:
|
||||
# type: NodePort
|
||||
# selector:
|
||||
# app: plano
|
||||
# ports:
|
||||
# - name: llm-gateway
|
||||
# port: 12000
|
||||
# targetPort: 12000
|
||||
# nodePort: 32000
|
||||
---
|
||||
# HorizontalPodAutoscaler — scales 2 to 5 replicas based on CPU.
|
||||
# Demonstrates that new replicas join the existing session state seamlessly.
|
||||
apiVersion: autoscaling/v2
|
||||
kind: HorizontalPodAutoscaler
|
||||
metadata:
|
||||
name: plano
|
||||
namespace: plano-demo
|
||||
spec:
|
||||
scaleTargetRef:
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
name: plano
|
||||
minReplicas: 2
|
||||
maxReplicas: 5
|
||||
metrics:
|
||||
- type: Resource
|
||||
resource:
|
||||
name: cpu
|
||||
target:
|
||||
type: Utilization
|
||||
averageUtilization: 70
|
||||
96
demos/llm_routing/session_affinity_redis_k8s/k8s/redis.yaml
Normal file
96
demos/llm_routing/session_affinity_redis_k8s/k8s/redis.yaml
Normal file
|
|
@ -0,0 +1,96 @@
|
|||
---
|
||||
# Redis StatefulSet — single-shard, persistence enabled.
|
||||
# For production, replace with Redis Cluster or a managed service (ElastiCache, MemoryStore, etc.).
|
||||
apiVersion: apps/v1
|
||||
kind: StatefulSet
|
||||
metadata:
|
||||
name: redis
|
||||
namespace: plano-demo
|
||||
labels:
|
||||
app: redis
|
||||
spec:
|
||||
serviceName: redis
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: redis
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: redis
|
||||
spec:
|
||||
containers:
|
||||
- name: redis
|
||||
image: redis:7-alpine
|
||||
ports:
|
||||
- containerPort: 6379
|
||||
name: redis
|
||||
command:
|
||||
- redis-server
|
||||
- --appendonly
|
||||
- "yes"
|
||||
- --maxmemory
|
||||
- "256mb"
|
||||
- --maxmemory-policy
|
||||
- allkeys-lru
|
||||
readinessProbe:
|
||||
exec:
|
||||
command: ["redis-cli", "ping"]
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
livenessProbe:
|
||||
exec:
|
||||
command: ["redis-cli", "ping"]
|
||||
initialDelaySeconds: 15
|
||||
periodSeconds: 20
|
||||
resources:
|
||||
requests:
|
||||
memory: "64Mi"
|
||||
cpu: "100m"
|
||||
limits:
|
||||
memory: "320Mi"
|
||||
cpu: "500m"
|
||||
volumeMounts:
|
||||
- name: redis-data
|
||||
mountPath: /data
|
||||
volumeClaimTemplates:
|
||||
- metadata:
|
||||
name: redis-data
|
||||
spec:
|
||||
accessModes: ["ReadWriteOnce"]
|
||||
resources:
|
||||
requests:
|
||||
storage: 1Gi
|
||||
---
|
||||
# Stable DNS name: redis.plano-demo.svc.cluster.local:6379
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: redis
|
||||
namespace: plano-demo
|
||||
labels:
|
||||
app: redis
|
||||
spec:
|
||||
selector:
|
||||
app: redis
|
||||
ports:
|
||||
- name: redis
|
||||
port: 6379
|
||||
targetPort: 6379
|
||||
clusterIP: None # headless — StatefulSet pods get stable DNS
|
||||
---
|
||||
# Regular ClusterIP for application code (redis://redis:6379)
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: redis-service
|
||||
namespace: plano-demo
|
||||
labels:
|
||||
app: redis
|
||||
spec:
|
||||
selector:
|
||||
app: redis
|
||||
ports:
|
||||
- name: redis
|
||||
port: 6379
|
||||
targetPort: 6379
|
||||
Loading…
Add table
Add a link
Reference in a new issue