From 35c41370d17927f92f5d9076d12756a7b54f39c8 Mon Sep 17 00:00:00 2001
From: Adil Hafeez <adil.hafeez@gmail.com>
Date: Tue, 10 Mar 2026 06:29:29 -0700
Subject: [PATCH] add k8s deploy guide

---
 docs/source/resources/deployment.rst | 188 +++++++++++++++++++++++++++
 1 file changed, 188 insertions(+)

diff --git a/docs/source/resources/deployment.rst b/docs/source/resources/deployment.rst
index 71452ea3..7b8b0554 100644
--- a/docs/source/resources/deployment.rst
+++ b/docs/source/resources/deployment.rst
@@ -100,6 +100,194 @@ You can also use the CLI with Docker mode:
    planoai up plano_config.yaml --docker
    planoai down --docker
 
+Kubernetes Deployment
+---------------------
+
+Plano runs as a single container in Kubernetes. The container bundles Envoy, WASM plugins, and brightstaff, managed by supervisord internally. Deploy it as a standard Kubernetes Deployment with your ``plano_config.yaml`` mounted via a ConfigMap and API keys injected via a Secret.
+
+.. note::
+   All environment variables referenced in your ``plano_config.yaml`` (e.g. ``$OPENAI_API_KEY``) must be set in the container environment. Use Kubernetes Secrets for API keys.
+
+Step 1: Create the Config
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Store your ``plano_config.yaml`` in a ConfigMap:
+
+.. code-block:: bash
+
+   kubectl create configmap plano-config --from-file=plano_config.yaml=./plano_config.yaml
+
+Step 2: Create API Key Secrets
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Store your LLM provider API keys in a Secret:
+
+.. code-block:: bash
+
+   kubectl create secret generic plano-secrets \
+     --from-literal=OPENAI_API_KEY=sk-... \
+     --from-literal=ANTHROPIC_API_KEY=sk-ant-...
+
+Step 3: Deploy Plano
+~~~~~~~~~~~~~~~~~~~~
+
+Create a ``plano-deployment.yaml``:
+
+.. code-block:: yaml
+
+   apiVersion: apps/v1
+   kind: Deployment
+   metadata:
+     name: plano
+     labels:
+       app: plano
+   spec:
+     replicas: 1
+     selector:
+       matchLabels:
+         app: plano
+     template:
+       metadata:
+         labels:
+           app: plano
+       spec:
+         containers:
+           - name: plano
+             image: katanemo/plano:0.4.11
+             ports:
+               - containerPort: 12000  # LLM gateway (chat completions, model routing)
+                 name: llm-gateway
+             envFrom:
+               - secretRef:
+                   name: plano-secrets
+             env:
+               - name: LOG_LEVEL
+                 value: "info"
+             volumeMounts:
+               - name: plano-config
+                 mountPath: /app/plano_config.yaml
+                 subPath: plano_config.yaml
+                 readOnly: true
+             readinessProbe:
+               httpGet:
+                 path: /healthz
+                 port: 12000
+               initialDelaySeconds: 5
+               periodSeconds: 10
+             livenessProbe:
+               httpGet:
+                 path: /healthz
+                 port: 12000
+               initialDelaySeconds: 10
+               periodSeconds: 30
+             resources:
+               requests:
+                 memory: "256Mi"
+                 cpu: "250m"
+               limits:
+                 memory: "512Mi"
+                 cpu: "1000m"
+         volumes:
+           - name: plano-config
+             configMap:
+               name: plano-config
+   ---
+   apiVersion: v1
+   kind: Service
+   metadata:
+     name: plano
+   spec:
+     selector:
+       app: plano
+     ports:
+       - name: llm-gateway
+         port: 12000
+         targetPort: 12000
+
+Apply it:
+
+.. code-block:: bash
+
+   kubectl apply -f plano-deployment.yaml
+
+Step 4: Verify
+~~~~~~~~~~~~~~
+
+.. code-block:: bash
+
+   # Check pod status
+   kubectl get pods -l app=plano
+
+   # Check logs
+   kubectl logs -l app=plano -f
+
+   # Test routing (port-forward for local testing)
+   kubectl port-forward svc/plano 12000:12000
+
+   curl -s -H "Content-Type: application/json" \
+     -d '{"messages":[{"role":"user","content":"tell me a joke"}], "model":"none"}' \
+     http://localhost:12000/v1/chat/completions | jq .model
+
+Updating Configuration
+~~~~~~~~~~~~~~~~~~~~~~
+
+To update ``plano_config.yaml``, replace the ConfigMap and restart the pod:
+
+.. code-block:: bash
+
+   kubectl create configmap plano-config \
+     --from-file=plano_config.yaml=./plano_config.yaml \
+     --dry-run=client -o yaml | kubectl apply -f -
+
+   kubectl rollout restart deployment/plano
+
+Enabling OTEL Tracing
+~~~~~~~~~~~~~~~~~~~~~
+
+Plano emits OpenTelemetry traces for every request — including routing decisions, model selection, and upstream latency. To export traces to an OTEL collector in your cluster, add the ``tracing`` section to your ``plano_config.yaml``:
+
+.. code-block:: yaml
+
+   tracing:
+     opentracing_grpc_endpoint: "http://otel-collector.monitoring:4317"
+     random_sampling: 100       # percentage of requests to trace (1-100)
+     trace_arch_internal: true  # include internal Plano spans
+     span_attributes:
+       header_prefixes:         # capture request headers as span attributes
+         - "x-"
+       static:                  # add static attributes to all spans
+         environment: "production"
+         service: "plano"
+
+Set the ``OTEL_TRACING_GRPC_ENDPOINT`` environment variable or configure it directly in the config. Plano propagates the ``traceparent`` header end-to-end, so traces correlate across your upstream and downstream services.
+
+Environment Variables Reference
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The following environment variables can be set on the container:
+
+.. list-table::
+   :header-rows: 1
+   :widths: 30 50 20
+
+   * - Variable
+     - Description
+     - Default
+   * - ``LOG_LEVEL``
+     - Log verbosity (``debug``, ``info``, ``warn``, ``error``)
+     - ``info``
+   * - ``OPENAI_API_KEY``
+     - OpenAI API key (if referenced in config)
+     -
+   * - ``ANTHROPIC_API_KEY``
+     - Anthropic API key (if referenced in config)
+     -
+   * - ``OTEL_TRACING_GRPC_ENDPOINT``
+     - OTEL collector endpoint for trace export
+     - ``http://localhost:4317``
+
+Any environment variable referenced in ``plano_config.yaml`` with ``$VAR_NAME`` syntax will be substituted at startup. Use Kubernetes Secrets for sensitive values and ConfigMaps or ``env`` entries for non-sensitive configuration.
+
 Runtime Tests
 -------------