add Redis session affinity demos (Docker Compose and Kubernetes)

2026-05-21 13:55:15 +02:00 · 2026-04-09 16:32:40 -07:00 · 2026-04-09 16:32:40 -07:00 · 90810078da
commit 90810078da
parent 50670f843d
20 changed files with 2080 additions and 0 deletions
--- a/demos/llm_routing/session_affinity_redis_k8s/k8s/jaeger.yaml
+++ b/demos/llm_routing/session_affinity_redis_k8s/k8s/jaeger.yaml
@ -0,0 +1,56 @@
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: jaeger
+  namespace: plano-demo
+  labels:
+    app: jaeger
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: jaeger
+  template:
+    metadata:
+      labels:
+        app: jaeger
+    spec:
+      containers:
+        - name: jaeger
+          image: jaegertracing/jaeger:2.3.0
+          ports:
+            - containerPort: 16686  # UI
+            - containerPort: 4317   # OTLP gRPC
+            - containerPort: 4318   # OTLP HTTP
+          resources:
+            requests:
+              memory: "128Mi"
+              cpu: "100m"
+            limits:
+              memory: "512Mi"
+              cpu: "500m"
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: jaeger
+  namespace: plano-demo
+  labels:
+    app: jaeger
+spec:
+  selector:
+    app: jaeger
+  ports:
+    - name: ui
+      port: 16686
+      targetPort: 16686
+    - name: otlp-grpc
+      port: 4317
+      targetPort: 4317
+    - name: otlp-http
+      port: 4318
+      targetPort: 4318
+---
+# NodePort for UI access from your laptop.
+# Access at: http://localhost:16686 after: kubectl port-forward svc/jaeger 16686:16686 -n plano-demo
--- a/demos/llm_routing/session_affinity_redis_k8s/k8s/namespace.yaml
+++ b/demos/llm_routing/session_affinity_redis_k8s/k8s/namespace.yaml
@ -0,0 +1,6 @@
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: plano-demo
+  labels:
+    app.kubernetes.io/part-of: plano-session-affinity-demo
--- a/demos/llm_routing/session_affinity_redis_k8s/k8s/plano-config.yaml
+++ b/demos/llm_routing/session_affinity_redis_k8s/k8s/plano-config.yaml
@ -0,0 +1,50 @@
+---
+# ConfigMap wrapping the Plano config file.
+# Regenerate after editing config_k8s.yaml:
+#   kubectl create configmap plano-config \
+#     --from-file=plano_config.yaml=../config_k8s.yaml \
+#     -n plano-demo --dry-run=client -o yaml | kubectl apply -f -
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: plano-config
+  namespace: plano-demo
+data:
+  plano_config.yaml: |
+    version: v0.4.0
+
+    listeners:
+      - type: model
+        name: model_listener
+        port: 12000
+
+    model_providers:
+      - model: openai/gpt-4o-mini
+        access_key: $OPENAI_API_KEY
+        default: true
+
+      - model: openai/gpt-5.2
+        access_key: $OPENAI_API_KEY
+
+    routing_preferences:
+      - name: fast_responses
+        description: short factual questions, quick lookups, simple summarization, or greetings
+        models:
+          - openai/gpt-4o-mini
+
+      - name: deep_reasoning
+        description: multi-step reasoning, complex analysis, code review, or detailed explanations
+        models:
+          - openai/gpt-5.2
+          - openai/gpt-4o-mini
+
+    routing:
+      session_ttl_seconds: 600
+      session_cache:
+        type: redis
+        url: redis://redis.plano-demo.svc.cluster.local:6379
+
+    tracing:
+      random_sampling: 100
+      trace_arch_internal: true
+      opentracing_grpc_endpoint: http://jaeger.plano-demo.svc.cluster.local:4317
--- a/demos/llm_routing/session_affinity_redis_k8s/k8s/plano-secrets.example.yaml
+++ b/demos/llm_routing/session_affinity_redis_k8s/k8s/plano-secrets.example.yaml
@ -0,0 +1,19 @@
+# EXAMPLE — do NOT apply this file directly.
+# Create the real secret with:
+#
+#   kubectl create secret generic plano-secrets \
+#     --from-literal=OPENAI_API_KEY=sk-... \
+#     -n plano-demo
+#
+# Or use the deploy.sh script, which prompts for keys and creates the secret.
+#
+# If you use a secrets manager (AWS Secrets Manager, GCP Secret Manager, Vault)
+# replace this with an ExternalSecret or a CSI driver volume mount instead.
+apiVersion: v1
+kind: Secret
+metadata:
+  name: plano-secrets
+  namespace: plano-demo
+type: Opaque
+stringData:
+  OPENAI_API_KEY: "sk-replace-me"
--- a/demos/llm_routing/session_affinity_redis_k8s/k8s/plano.yaml
+++ b/demos/llm_routing/session_affinity_redis_k8s/k8s/plano.yaml
@ -0,0 +1,130 @@
+---
+# Plano Deployment — 2 replicas sharing one Redis instance.
+# All replicas are stateless; routing state lives entirely in Redis.
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: plano
+  namespace: plano-demo
+  labels:
+    app: plano
+spec:
+  replicas: 2
+  selector:
+    matchLabels:
+      app: plano
+  template:
+    metadata:
+      labels:
+        app: plano
+    spec:
+      containers:
+        - name: plano
+          # Local dev: run-local.sh sets this to plano-redis:local and loads it
+          # into minikube/kind so no registry is needed.
+          # Production: replace with your registry image and use imagePullPolicy: Always.
+          image: plano-redis:local
+          imagePullPolicy: IfNotPresent
+          ports:
+            - containerPort: 12000
+              name: llm-gateway
+          envFrom:
+            - secretRef:
+                name: plano-secrets
+          env:
+            - name: LOG_LEVEL
+              value: "info"
+            - name: POD_NAME
+              valueFrom:
+                fieldRef:
+                  fieldPath: metadata.name
+          volumeMounts:
+            - name: plano-config
+              mountPath: /app/plano_config.yaml
+              subPath: plano_config.yaml
+              readOnly: true
+          readinessProbe:
+            httpGet:
+              path: /healthz
+              port: 12000
+            initialDelaySeconds: 5
+            periodSeconds: 10
+            failureThreshold: 3
+          livenessProbe:
+            httpGet:
+              path: /healthz
+              port: 12000
+            initialDelaySeconds: 15
+            periodSeconds: 30
+            failureThreshold: 3
+          resources:
+            requests:
+              memory: "512Mi"
+              cpu: "250m"
+            limits:
+              memory: "1Gi"
+              cpu: "1000m"
+      volumes:
+        - name: plano-config
+          configMap:
+            name: plano-config
+---
+# LoadBalancer Service — exposes Plano externally.
+# On minikube, run: minikube tunnel
+# On kind, use NodePort instead (see comment below).
+# On cloud providers (GKE, EKS, AKS), an external IP is assigned automatically.
+apiVersion: v1
+kind: Service
+metadata:
+  name: plano
+  namespace: plano-demo
+  labels:
+    app: plano
+spec:
+  type: LoadBalancer
+  selector:
+    app: plano
+  ports:
+    - name: llm-gateway
+      port: 12000
+      targetPort: 12000
+---
+# Uncomment and use instead of the LoadBalancer above when running on kind/minikube
+# without tunnel:
+#
+# apiVersion: v1
+# kind: Service
+# metadata:
+#   name: plano
+#   namespace: plano-demo
+# spec:
+#   type: NodePort
+#   selector:
+#     app: plano
+#   ports:
+#     - name: llm-gateway
+#       port: 12000
+#       targetPort: 12000
+#       nodePort: 32000
+---
+# HorizontalPodAutoscaler — scales 2 to 5 replicas based on CPU.
+# Demonstrates that new replicas join the existing session state seamlessly.
+apiVersion: autoscaling/v2
+kind: HorizontalPodAutoscaler
+metadata:
+  name: plano
+  namespace: plano-demo
+spec:
+  scaleTargetRef:
+    apiVersion: apps/v1
+    kind: Deployment
+    name: plano
+  minReplicas: 2
+  maxReplicas: 5
+  metrics:
+    - type: Resource
+      resource:
+        name: cpu
+        target:
+          type: Utilization
+          averageUtilization: 70
--- a/demos/llm_routing/session_affinity_redis_k8s/k8s/redis.yaml
+++ b/demos/llm_routing/session_affinity_redis_k8s/k8s/redis.yaml
@ -0,0 +1,96 @@
+---
+# Redis StatefulSet — single-shard, persistence enabled.
+# For production, replace with Redis Cluster or a managed service (ElastiCache, MemoryStore, etc.).
+apiVersion: apps/v1
+kind: StatefulSet
+metadata:
+  name: redis
+  namespace: plano-demo
+  labels:
+    app: redis
+spec:
+  serviceName: redis
+  replicas: 1
+  selector:
+    matchLabels:
+      app: redis
+  template:
+    metadata:
+      labels:
+        app: redis
+    spec:
+      containers:
+        - name: redis
+          image: redis:7-alpine
+          ports:
+            - containerPort: 6379
+              name: redis
+          command:
+            - redis-server
+            - --appendonly
+            - "yes"
+            - --maxmemory
+            - "256mb"
+            - --maxmemory-policy
+            - allkeys-lru
+          readinessProbe:
+            exec:
+              command: ["redis-cli", "ping"]
+            initialDelaySeconds: 5
+            periodSeconds: 5
+          livenessProbe:
+            exec:
+              command: ["redis-cli", "ping"]
+            initialDelaySeconds: 15
+            periodSeconds: 20
+          resources:
+            requests:
+              memory: "64Mi"
+              cpu: "100m"
+            limits:
+              memory: "320Mi"
+              cpu: "500m"
+          volumeMounts:
+            - name: redis-data
+              mountPath: /data
+  volumeClaimTemplates:
+    - metadata:
+        name: redis-data
+      spec:
+        accessModes: ["ReadWriteOnce"]
+        resources:
+          requests:
+            storage: 1Gi
+---
+# Stable DNS name: redis.plano-demo.svc.cluster.local:6379
+apiVersion: v1
+kind: Service
+metadata:
+  name: redis
+  namespace: plano-demo
+  labels:
+    app: redis
+spec:
+  selector:
+    app: redis
+  ports:
+    - name: redis
+      port: 6379
+      targetPort: 6379
+  clusterIP: None  # headless — StatefulSet pods get stable DNS
+---
+# Regular ClusterIP for application code (redis://redis:6379)
+apiVersion: v1
+kind: Service
+metadata:
+  name: redis-service
+  namespace: plano-demo
+  labels:
+    app: redis
+spec:
+  selector:
+    app: redis
+  ports:
+    - name: redis
+      port: 6379
+      targetPort: 6379