add Redis session affinity demos (Docker Compose and Kubernetes)

This commit is contained in:
Spherrrical 2026-04-09 16:32:40 -07:00
parent 50670f843d
commit 90810078da
20 changed files with 2080 additions and 0 deletions

View file

@ -0,0 +1,56 @@
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: jaeger
namespace: plano-demo
labels:
app: jaeger
spec:
replicas: 1
selector:
matchLabels:
app: jaeger
template:
metadata:
labels:
app: jaeger
spec:
containers:
- name: jaeger
image: jaegertracing/jaeger:2.3.0
ports:
- containerPort: 16686 # UI
- containerPort: 4317 # OTLP gRPC
- containerPort: 4318 # OTLP HTTP
resources:
requests:
memory: "128Mi"
cpu: "100m"
limits:
memory: "512Mi"
cpu: "500m"
---
apiVersion: v1
kind: Service
metadata:
name: jaeger
namespace: plano-demo
labels:
app: jaeger
spec:
selector:
app: jaeger
ports:
- name: ui
port: 16686
targetPort: 16686
- name: otlp-grpc
port: 4317
targetPort: 4317
- name: otlp-http
port: 4318
targetPort: 4318
---
# NodePort for UI access from your laptop.
# Access at: http://localhost:16686 after: kubectl port-forward svc/jaeger 16686:16686 -n plano-demo

View file

@ -0,0 +1,6 @@
apiVersion: v1
kind: Namespace
metadata:
name: plano-demo
labels:
app.kubernetes.io/part-of: plano-session-affinity-demo

View file

@ -0,0 +1,50 @@
---
# ConfigMap wrapping the Plano config file.
# Regenerate after editing config_k8s.yaml:
# kubectl create configmap plano-config \
# --from-file=plano_config.yaml=../config_k8s.yaml \
# -n plano-demo --dry-run=client -o yaml | kubectl apply -f -
apiVersion: v1
kind: ConfigMap
metadata:
name: plano-config
namespace: plano-demo
data:
plano_config.yaml: |
version: v0.4.0
listeners:
- type: model
name: model_listener
port: 12000
model_providers:
- model: openai/gpt-4o-mini
access_key: $OPENAI_API_KEY
default: true
- model: openai/gpt-5.2
access_key: $OPENAI_API_KEY
routing_preferences:
- name: fast_responses
description: short factual questions, quick lookups, simple summarization, or greetings
models:
- openai/gpt-4o-mini
- name: deep_reasoning
description: multi-step reasoning, complex analysis, code review, or detailed explanations
models:
- openai/gpt-5.2
- openai/gpt-4o-mini
routing:
session_ttl_seconds: 600
session_cache:
type: redis
url: redis://redis.plano-demo.svc.cluster.local:6379
tracing:
random_sampling: 100
trace_arch_internal: true
opentracing_grpc_endpoint: http://jaeger.plano-demo.svc.cluster.local:4317

View file

@ -0,0 +1,19 @@
# EXAMPLE — do NOT apply this file directly.
# Create the real secret with:
#
# kubectl create secret generic plano-secrets \
# --from-literal=OPENAI_API_KEY=sk-... \
# -n plano-demo
#
# Or use the deploy.sh script, which prompts for keys and creates the secret.
#
# If you use a secrets manager (AWS Secrets Manager, GCP Secret Manager, Vault)
# replace this with an ExternalSecret or a CSI driver volume mount instead.
apiVersion: v1
kind: Secret
metadata:
name: plano-secrets
namespace: plano-demo
type: Opaque
stringData:
OPENAI_API_KEY: "sk-replace-me"

View file

@ -0,0 +1,130 @@
---
# Plano Deployment — 2 replicas sharing one Redis instance.
# All replicas are stateless; routing state lives entirely in Redis.
apiVersion: apps/v1
kind: Deployment
metadata:
name: plano
namespace: plano-demo
labels:
app: plano
spec:
replicas: 2
selector:
matchLabels:
app: plano
template:
metadata:
labels:
app: plano
spec:
containers:
- name: plano
# Local dev: run-local.sh sets this to plano-redis:local and loads it
# into minikube/kind so no registry is needed.
# Production: replace with your registry image and use imagePullPolicy: Always.
image: plano-redis:local
imagePullPolicy: IfNotPresent
ports:
- containerPort: 12000
name: llm-gateway
envFrom:
- secretRef:
name: plano-secrets
env:
- name: LOG_LEVEL
value: "info"
- name: POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
volumeMounts:
- name: plano-config
mountPath: /app/plano_config.yaml
subPath: plano_config.yaml
readOnly: true
readinessProbe:
httpGet:
path: /healthz
port: 12000
initialDelaySeconds: 5
periodSeconds: 10
failureThreshold: 3
livenessProbe:
httpGet:
path: /healthz
port: 12000
initialDelaySeconds: 15
periodSeconds: 30
failureThreshold: 3
resources:
requests:
memory: "512Mi"
cpu: "250m"
limits:
memory: "1Gi"
cpu: "1000m"
volumes:
- name: plano-config
configMap:
name: plano-config
---
# LoadBalancer Service — exposes Plano externally.
# On minikube, run: minikube tunnel
# On kind, use NodePort instead (see comment below).
# On cloud providers (GKE, EKS, AKS), an external IP is assigned automatically.
apiVersion: v1
kind: Service
metadata:
name: plano
namespace: plano-demo
labels:
app: plano
spec:
type: LoadBalancer
selector:
app: plano
ports:
- name: llm-gateway
port: 12000
targetPort: 12000
---
# Uncomment and use instead of the LoadBalancer above when running on kind/minikube
# without tunnel:
#
# apiVersion: v1
# kind: Service
# metadata:
# name: plano
# namespace: plano-demo
# spec:
# type: NodePort
# selector:
# app: plano
# ports:
# - name: llm-gateway
# port: 12000
# targetPort: 12000
# nodePort: 32000
---
# HorizontalPodAutoscaler — scales 2 to 5 replicas based on CPU.
# Demonstrates that new replicas join the existing session state seamlessly.
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
name: plano
namespace: plano-demo
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: plano
minReplicas: 2
maxReplicas: 5
metrics:
- type: Resource
resource:
name: cpu
target:
type: Utilization
averageUtilization: 70

View file

@ -0,0 +1,96 @@
---
# Redis StatefulSet — single-shard, persistence enabled.
# For production, replace with Redis Cluster or a managed service (ElastiCache, MemoryStore, etc.).
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: redis
namespace: plano-demo
labels:
app: redis
spec:
serviceName: redis
replicas: 1
selector:
matchLabels:
app: redis
template:
metadata:
labels:
app: redis
spec:
containers:
- name: redis
image: redis:7-alpine
ports:
- containerPort: 6379
name: redis
command:
- redis-server
- --appendonly
- "yes"
- --maxmemory
- "256mb"
- --maxmemory-policy
- allkeys-lru
readinessProbe:
exec:
command: ["redis-cli", "ping"]
initialDelaySeconds: 5
periodSeconds: 5
livenessProbe:
exec:
command: ["redis-cli", "ping"]
initialDelaySeconds: 15
periodSeconds: 20
resources:
requests:
memory: "64Mi"
cpu: "100m"
limits:
memory: "320Mi"
cpu: "500m"
volumeMounts:
- name: redis-data
mountPath: /data
volumeClaimTemplates:
- metadata:
name: redis-data
spec:
accessModes: ["ReadWriteOnce"]
resources:
requests:
storage: 1Gi
---
# Stable DNS name: redis.plano-demo.svc.cluster.local:6379
apiVersion: v1
kind: Service
metadata:
name: redis
namespace: plano-demo
labels:
app: redis
spec:
selector:
app: redis
ports:
- name: redis
port: 6379
targetPort: 6379
clusterIP: None # headless — StatefulSet pods get stable DNS
---
# Regular ClusterIP for application code (redis://redis:6379)
apiVersion: v1
kind: Service
metadata:
name: redis-service
namespace: plano-demo
labels:
app: redis
spec:
selector:
app: redis
ports:
- name: redis
port: 6379
targetPort: 6379