diff --git a/deploy/helm/dograh/.gitignore b/deploy/helm/dograh/.gitignore new file mode 100644 index 0000000..a097806 --- /dev/null +++ b/deploy/helm/dograh/.gitignore @@ -0,0 +1,3 @@ +# Subchart tarballs are fetched by `helm dependency build` from Chart.lock. +# Tracked in Chart.lock; not in git. +charts/ diff --git a/deploy/helm/dograh/Chart.lock b/deploy/helm/dograh/Chart.lock new file mode 100644 index 0000000..1097715 --- /dev/null +++ b/deploy/helm/dograh/Chart.lock @@ -0,0 +1,12 @@ +dependencies: +- name: postgresql + repository: oci://registry-1.docker.io/bitnamicharts + version: 16.7.27 +- name: redis + repository: oci://registry-1.docker.io/bitnamicharts + version: 20.13.4 +- name: minio + repository: oci://registry-1.docker.io/bitnamicharts + version: 16.0.10 +digest: sha256:8a2d769b21e882239fc847a9638758131f388dbd12feabd387671e9a336a2615 +generated: "2026-05-25T19:49:15.193741+05:30" diff --git a/deploy/helm/dograh/Chart.yaml b/deploy/helm/dograh/Chart.yaml new file mode 100644 index 0000000..91220b3 --- /dev/null +++ b/deploy/helm/dograh/Chart.yaml @@ -0,0 +1,47 @@ +apiVersion: v2 +name: dograh +description: | + Dograh — open-source voice AI platform. Deploys the FastAPI backend + (decomposed into web, ARQ worker, ARI manager singleton, and campaign + orchestrator singleton), the Next.js UI, and coturn for WebRTC media + relay. Optional bundled PostgreSQL, Redis, and MinIO via subcharts. +type: application + +# version: chart version. Bump for any chart change. +# appVersion: Dograh application version. Tracks the image tag. +version: 0.1.0 +appVersion: "0.1.0" + +kubeVersion: ">=1.28.0-0" + +keywords: + - voice-ai + - webrtc + - telephony + - fastapi + +home: https://dograh.com +sources: + - https://github.com/dograh-hq/dograh + +maintainers: + - name: Dograh + +# Bundled stateful dependencies. Each is gated by its mode toggle in +# values.yaml so external-managed deployments do not pull these charts. +dependencies: + - name: postgresql + version: "16.x.x" + repository: oci://registry-1.docker.io/bitnamicharts + condition: postgresql.enabled + # Aliased to redisInternal so the dograh-level `redis.mode` toggle does not + # collide with the subchart's values namespace. + - name: redis + alias: redisInternal + version: "20.x.x" + repository: oci://registry-1.docker.io/bitnamicharts + condition: redisInternal.enabled + - name: minio + version: "16.x.x" + repository: oci://registry-1.docker.io/bitnamicharts + condition: minio.enabled diff --git a/deploy/helm/dograh/README.md b/deploy/helm/dograh/README.md new file mode 100644 index 0000000..760bf8e --- /dev/null +++ b/deploy/helm/dograh/README.md @@ -0,0 +1,185 @@ +# Dograh Helm chart + +Deploys Dograh on Kubernetes with decomposed backend workloads (web, +ARQ workers, telephony singleton, campaign singleton), Next.js UI, and +coturn for WebRTC media relay. Implements the architecture defined in +`HELM_DEPLOYMENT_PLAN.md` at the repo root. + +## Status + +v1, alpha. Validated with `helm lint` and `helm template`. Not yet +exercised against a live cluster. + +## Quick start + +```bash +cd deploy/helm/dograh + +# Pull in Bitnami subcharts (network required) +helm dependency update + +# Install with defaults (all internal deps, Gateway API exposure) +helm install dograh . \ + --set secrets.ossJwtSecret="$(openssl rand -hex 32)" \ + --set secrets.turnSecret="$(openssl rand -hex 32)" \ + --set exposure.gatewayApi.gatewayClassName=istio +``` + +See `examples/values-single-node.yaml`, `examples/values-managed.yaml`, +and `examples/values-aws.yaml` for topology-specific overrides. + +## Architecture summary + +| Workload | Replicas | Strategy | Notes | +|------------------------------|-------------|-----------------|-------| +| `dograh-web` | 2 (HPA opt) | RollingUpdate | Long-lived WS, graceful drain | +| `dograh-arq-worker` | 1 (knob) | RollingUpdate | Stateless | +| `dograh-ari-manager` | **1 fixed** | **Recreate** | Telephony singleton | +| `dograh-campaign-orchestrator` | **1 fixed** | **Recreate** | Campaign singleton (in-memory locks) | +| `dograh-ui` | 2 | RollingUpdate | Next.js SSR | +| `dograh-coturn` | 1 | Recreate | LoadBalancer Service, port-pinned | + +HTTP traffic: Gateway API (default) or Ingress (fallback). +TURN traffic: dedicated L4 Service of type `LoadBalancer`. + +## Decisions log + +These are choices the chart made where `HELM_DEPLOYMENT_PLAN.md` was +silent. Each is exposed in `values.yaml` for operator override. + +- **terminationGracePeriodSeconds for web: 600s.** Covers a 10-minute + call; tune to your call-length distribution. +- **preStop sleep: 15s.** Conservative window for the gateway/ingress + to observe pod NotReady and stop dispatching new connections. +- **Liveness probes on singletons: `exec` (`pgrep`).** No HTTP endpoint + exists on ari-manager / campaign-orchestrator; process-alive check is + the simplest correct signal. +- **HPA on web: CPU/memory, disabled by default.** Plan recommends HPA + but CPU/memory is a poor signal for WS workloads. Default + `autoscaling.web.enabled=false`; flip on with a knowing eye and plan + to replace with a connection-count metric. +- **Singleton replica counts: hard-coded.** No `replicaCount` knob + exposed on ari-manager / campaign-orchestrator. Prevents accidental + `kubectl scale` corrupting in-memory dedup state. +- **MinIO browser exposure: shared host, path prefix `/voice-audio/`.** + Mirrors current nginx behavior. Operators wanting a separate + hostname can override by editing `httproute-minio.yaml` or + `ingress.yaml` post-install. +- **NetworkPolicy: not in v1.** TODO below. +- **ServiceMonitor / Prometheus: not in v1.** TODO below. +- **TURN TLS (turns://): not in v1.** Original docker-compose exposed + port 5349 but never wired certs. Chart scopes v1 to plain TURN. + +## `/tmp` audit (review fix #6) + +The current docker-compose mounts a `shared-tmp` volume across all +logical services so file handoffs between processes Just Work. In +Kubernetes with separated pods this is broken by default. + +**Findings:** + +| File | Process | Behavior | Cross-pod? | +|------|---------|----------|------------| +| `api/services/pipecat/event_handlers.py` (lines 364–383) | **web** | Writes WAV + transcript via `NamedTemporaryFile`, then `enqueue_job(...)` to ARQ with the local path | **YES — broken** | +| `api/tasks/s3_upload.py` | **arq-worker** | Receives `temp_file_path`, `os.path.exists`, uploads, deletes | **reads from web's path** | +| `api/services/pipecat/in_memory_buffers.py` | web | Writes tempfiles consumed in the same process | No | +| `api/services/pipecat/audio_file_cache.py` | web | Per-process cache | No | +| `api/tasks/knowledge_base_processing.py` | arq-worker | Writes + reads in the same task | No | + +**Mitigation in this chart:** `sharedTmp.enabled` flag in `values.yaml`. +When enabled, the chart creates a `ReadWriteMany` PVC mounted into +both `dograh-web` and `dograh-arq-worker` at +`/tmp/dograh-shared/`. Default is `enabled: false` because most +cloud-default storage classes are RWO; enabling it on RWO will fail +PVC binding. + +**If your cluster lacks an RWX storage class** (most cloud defaults are +RWO), you MUST either: +- provision an RWX class (EFS, Azure Files, Longhorn-RWX, Rook-Ceph) and + set `sharedTmp.storageClassName`, or +- complete the long-term fix in TODOs below before splitting web/worker. + +## Open TODOs (deferred from v1) + +- **Refactor `event_handlers.py` to handle uploads in-web.** Upload to + object storage from the web process and pass the resulting storage + key (not a local path) to the ARQ job. This removes the need for a + shared `/tmp` PVC entirely. +- **Leader election for singletons.** Adopt Kubernetes lease-based + leader election so `ari-manager` / `campaign-orchestrator` can run + HA. Until then, replicas remain hard-coded to 1. +- **Connection-count HPA metric.** Expose active WS sessions per pod + (Prometheus or KEDA) and replace CPU/memory HPA target. +- **NetworkPolicy.** Add default-deny + explicit egress to Postgres, + Redis, MinIO/S3, and (for ari-manager) Asterisk. +- **ServiceMonitor.** First-class Prometheus integration once + observability stack is selected. +- **TURN TLS (turns://).** Wire certificate paths through coturn config + and document the cert-manager pattern. +- **MinIO public route via separate hostname.** Make `/voice-audio/` + path-prefix the default but allow operators to opt into a dedicated + hostname. +- **KEDA for ARQ workers.** When a queue-depth metric is available, + switch ARQ from fixed replicas to KEDA-driven scaling. + +## Validation + +```bash +cd deploy/helm/dograh + +helm lint . +helm template test-release . > /tmp/render-default.yaml +helm template test-release . -f examples/values-single-node.yaml > /tmp/render-single.yaml +helm template test-release . -f examples/values-managed.yaml > /tmp/render-managed.yaml +helm template test-release . -f examples/values-aws.yaml > /tmp/render-aws.yaml +``` + +Spot-check expectations: +- `Deployment/-ari-manager` has `replicas: 1` and + `strategy.type: Recreate`. +- `Deployment/-campaign-orchestrator` has `replicas: 1` and + `strategy.type: Recreate`. +- `Deployment/-web` has `terminationGracePeriodSeconds: 600` + and a `lifecycle.preStop` exec hook. +- Liveness probe on ari-manager / campaign-orchestrator uses `exec`, + not `httpGet`. + +## Layout + +``` +deploy/helm/dograh/ +├── Chart.yaml +├── values.yaml # heavily commented +├── values.schema.json # enforces mode enums +├── README.md # this file +├── examples/ +│ ├── values-single-node.yaml +│ ├── values-managed.yaml +│ └── values-aws.yaml +└── templates/ + ├── _helpers.tpl + ├── NOTES.txt + ├── serviceaccount.yaml + ├── configmap.yaml + ├── secret.yaml + ├── migrate-job.yaml + ├── shared-tmp-pvc.yaml + ├── web-deployment.yaml + ├── web-service.yaml + ├── web-hpa.yaml + ├── web-pdb.yaml + ├── arq-worker-deployment.yaml + ├── ari-manager-deployment.yaml + ├── campaign-orchestrator-deployment.yaml + ├── ui-deployment.yaml + ├── ui-service.yaml + ├── ui-pdb.yaml + ├── coturn-deployment.yaml + ├── coturn-service.yaml + ├── coturn-configmap.yaml + ├── gateway.yaml + ├── httproute-api.yaml + ├── httproute-ui.yaml + ├── httproute-minio.yaml + └── ingress.yaml +``` diff --git a/deploy/helm/dograh/examples/values-aws.yaml b/deploy/helm/dograh/examples/values-aws.yaml new file mode 100644 index 0000000..69eb3c7 --- /dev/null +++ b/deploy/helm/dograh/examples/values-aws.yaml @@ -0,0 +1,71 @@ +# AWS EKS — uses ALB (via AWS Gateway API controller) for HTTP and NLB +# for coturn. Assumes: +# - aws-load-balancer-controller is installed +# - aws Gateway API controller is installed (gateway.networking.k8s.io) +# - IRSA configured for the dograh ServiceAccount when using S3 +# +# REQUIRED OVERRIDES: +# --set secrets.ossJwtSecret=$(openssl rand -hex 32) +# --set secrets.turnSecret=$(openssl rand -hex 32) +# --set exposure.gatewayApi.listenerHostname=dograh.example.com +# --set storage.s3.bucket=... +# +# After install, retrieve coturn NLB address and re-upgrade: +# LB_IP=$(kubectl get svc dograh-coturn -o jsonpath='{.status.loadBalancer.ingress[0].hostname}') +# helm upgrade dograh . --reuse-values --set coturn.externalIp=$LB_IP --set config.turnHost=$LB_IP + +database: + mode: external # use RDS Postgres +redis: + mode: external # use ElastiCache Redis +storage: + mode: s3 + s3: + region: us-east-1 + bucket: "" # set via --set + +exposure: + mode: gatewayApi + gatewayApi: + createGateway: true + gatewayClassName: aws-alb + listenerHostname: "" # set via --set + + ingress: + tls: + enabled: true + secretName: "" # cert ARN via ALB annotations instead; see below + +# coturn on NLB. AWS Gateway API only handles L7; coturn keeps a plain +# Service of type LoadBalancer with NLB annotations. +coturn: + service: + type: LoadBalancer + externalTrafficPolicy: Local + annotations: + service.beta.kubernetes.io/aws-load-balancer-type: external + service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip + service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing + +# IRSA: bind a role with S3 permissions to the dograh ServiceAccount. +serviceAccount: + create: true + annotations: + eks.amazonaws.com/role-arn: "" # set via --set + +web: + replicaCount: 3 + +autoscaling: + web: + enabled: true + minReplicas: 3 + maxReplicas: 12 + +# Disable bundled deps. +postgresql: + enabled: false +redisInternal: + enabled: false +minio: + enabled: false diff --git a/deploy/helm/dograh/examples/values-managed.yaml b/deploy/helm/dograh/examples/values-managed.yaml new file mode 100644 index 0000000..4f9084e --- /dev/null +++ b/deploy/helm/dograh/examples/values-managed.yaml @@ -0,0 +1,71 @@ +# Production: external Postgres + Redis, S3 storage, Gateway API exposure. +# Suitable for managed Kubernetes (EKS, GKE, AKS) with managed DBs. +# +# REQUIRED OVERRIDES at install time: +# --set secrets.databaseUrl=... +# --set secrets.redisUrl=... +# --set secrets.ossJwtSecret=$(openssl rand -hex 32) +# --set secrets.turnSecret=$(openssl rand -hex 32) +# --set exposure.gatewayApi.gatewayClassName= +# --set exposure.gatewayApi.listenerHostname=dograh.example.com + +database: + mode: external +redis: + mode: external +storage: + mode: s3 + s3: + region: us-east-1 + bucket: dograh-voice-audio + publicEndpoint: https://dograh-voice-audio.s3.amazonaws.com +exposure: + mode: gatewayApi + gatewayApi: + createGateway: true + # gatewayClassName MUST be set via --set or override file. + listenerHostname: "" # set to your hostname + ingress: + tls: + enabled: true + secretName: dograh-tls + +config: + environment: production + logLevel: INFO + enableAwsS3: true + +web: + replicaCount: 3 + resources: + requests: + cpu: 500m + memory: 1Gi + limits: + cpu: "4" + memory: 4Gi + +workers: + replicaCount: 2 + resources: + requests: + cpu: 250m + memory: 512Mi + limits: + cpu: "2" + memory: 2Gi + +autoscaling: + web: + enabled: true + minReplicas: 3 + maxReplicas: 12 + targetCPUUtilizationPercentage: 70 + +# Disable bundled deps when using external services. +postgresql: + enabled: false +redisInternal: + enabled: false +minio: + enabled: false diff --git a/deploy/helm/dograh/examples/values-single-node.yaml b/deploy/helm/dograh/examples/values-single-node.yaml new file mode 100644 index 0000000..8f0c346 --- /dev/null +++ b/deploy/helm/dograh/examples/values-single-node.yaml @@ -0,0 +1,61 @@ +# Single-node deployment (k3s, minikube, single VM). +# All stateful deps bundled in-cluster, Ingress for HTTP, smaller resources. + +database: + mode: internal +redis: + mode: internal +storage: + mode: internalMinio +exposure: + mode: ingress + ingress: + className: nginx + host: dograh.local + +config: + environment: production + logLevel: INFO + +web: + replicaCount: 1 + resources: + requests: + cpu: 100m + memory: 256Mi + limits: + cpu: "1" + memory: 1Gi + pdb: + enabled: false + +workers: + replicaCount: 1 + resources: + requests: + cpu: 50m + memory: 128Mi + limits: + cpu: 500m + memory: 512Mi + +ui: + replicaCount: 1 + pdb: + enabled: false + +autoscaling: + web: + enabled: false + +postgresql: + primary: + persistence: + size: 2Gi +redisInternal: + master: + persistence: + size: 1Gi +minio: + persistence: + size: 5Gi diff --git a/deploy/helm/dograh/templates/NOTES.txt b/deploy/helm/dograh/templates/NOTES.txt new file mode 100644 index 0000000..016c617 --- /dev/null +++ b/deploy/helm/dograh/templates/NOTES.txt @@ -0,0 +1,92 @@ +Dograh has been installed. + +Release: {{ .Release.Name }} +Namespace: {{ .Release.Namespace }} +Chart: {{ .Chart.Name }}-{{ .Chart.Version }} + +=== HTTP exposure ({{ .Values.exposure.mode }}) === +{{- if eq .Values.exposure.mode "gatewayApi" }} +{{- if .Values.exposure.gatewayApi.createGateway }} +A Gateway named {{ include "dograh.fullname" . }} was created with class +"{{ .Values.exposure.gatewayApi.gatewayClassName }}". Find its address with: + + kubectl get gateway {{ include "dograh.fullname" . }} -n {{ .Release.Namespace }} \ + -o jsonpath='{.status.addresses[0].value}' +{{- else }} +HTTPRoutes were attached to existing Gateway(s): +{{- range .Values.exposure.gatewayApi.parentRefs }} + - {{ .name }}{{ if .namespace }}/{{ .namespace }}{{ end }} +{{- end }} +{{- end }} +{{- else }} +Ingress class: "{{ .Values.exposure.ingress.className }}" +Host: {{ default "(unset — set exposure.ingress.host)" .Values.exposure.ingress.host }} +{{- end }} + +=== TURN (coturn) === +{{- if .Values.coturn.enabled }} +The coturn Service is type LoadBalancer. Find its external address: + + kubectl get svc {{ include "dograh.coturn.fullname" . }} -n {{ .Release.Namespace }} \ + -o jsonpath='{.status.loadBalancer.ingress[0].ip}{.status.loadBalancer.ingress[0].hostname}' + +IMPORTANT — chicken-and-egg with coturn.externalIp: +coturn announces an external IP in ICE candidates. The LoadBalancer IP is +typically not known until after install. Once the LB has an address: + + helm upgrade {{ .Release.Name }} . \ + --reuse-values \ + --set coturn.externalIp= \ + --set config.turnHost= + +Until then, WebRTC media will be impaired in relay-only scenarios. + +NLB listener-quota note (AWS): the default coturn relay range is +{{ .Values.coturn.relayPortRange.min }}-{{ .Values.coturn.relayPortRange.max }}, which is +{{ sub (int .Values.coturn.relayPortRange.max) (int .Values.coturn.relayPortRange.min) | add1 }} ports. +AWS NLB default quota is 50 listeners per LB. Widening the range requires either +a quota increase or splitting TURN across multiple LBs. +{{- else }} +coturn is disabled. Set coturn.enabled=true to deploy media relay. +{{- end }} + +=== Migrations === +{{- if .Values.migrate.enabled }} +Alembic migrations run as a pre-install / pre-upgrade hook. Inspect with: + + kubectl logs job/{{ include "dograh.migrate.fullname" . }} -n {{ .Release.Namespace }} +{{- end }} + +=== /tmp shared volume === +{{- if .Values.sharedTmp.enabled }} +sharedTmp.enabled=true — web and arq-worker pods mount a ReadWriteMany PVC +at {{ .Values.sharedTmp.mountPath }} so end-of-call uploads survive pod boundaries. +{{- else }} +WARNING: sharedTmp.enabled=false. End-of-call uploads (event_handlers.py → +ARQ s3_upload) hand off via /tmp paths. With separated web and worker pods +this WILL fail unless you have an RWX storage class configured. + +See deploy/helm/dograh/README.md "/tmp audit" section. +{{- end }} + +=== Singletons === +ari-manager and campaign-orchestrator run with replicas=1 and +strategy=Recreate by design. Do NOT scale these via kubectl scale — +they use in-memory locks and would silently corrupt with >1 replica. + +=== Required overrides === +{{- if eq .Values.secrets.ossJwtSecret "ChangeMeInProduction" }} +WARNING: secrets.ossJwtSecret is still the chart default. Override before +running in any non-dev environment: + + --set secrets.ossJwtSecret="$(openssl rand -hex 32)" +{{- end }} +{{- if and (eq .Values.database.mode "external") (empty .Values.secrets.databaseUrl) }} +ERROR: database.mode=external but secrets.databaseUrl is empty. +{{- end }} +{{- if and (eq .Values.redis.mode "external") (empty .Values.secrets.redisUrl) }} +ERROR: redis.mode=external but secrets.redisUrl is empty. +{{- end }} + +For troubleshooting and topology examples see +deploy/helm/dograh/README.md and examples/. diff --git a/deploy/helm/dograh/templates/_helpers.tpl b/deploy/helm/dograh/templates/_helpers.tpl new file mode 100644 index 0000000..2d497f6 --- /dev/null +++ b/deploy/helm/dograh/templates/_helpers.tpl @@ -0,0 +1,162 @@ +{{/* +Common helpers. +*/}} + +{{- define "dograh.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{- define "dograh.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{- define "dograh.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{- define "dograh.labels" -}} +helm.sh/chart: {{ include "dograh.chart" . }} +{{ include "dograh.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{- define "dograh.selectorLabels" -}} +app.kubernetes.io/name: {{ include "dograh.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{- define "dograh.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "dograh.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} + +{{/* +Component-specific names. +*/}} +{{- define "dograh.web.fullname" -}}{{ include "dograh.fullname" . }}-web{{- end }} +{{- define "dograh.arqWorker.fullname" -}}{{ include "dograh.fullname" . }}-arq-worker{{- end }} +{{- define "dograh.ariManager.fullname" -}}{{ include "dograh.fullname" . }}-ari-manager{{- end }} +{{- define "dograh.campaignOrchestrator.fullname" -}}{{ include "dograh.fullname" . }}-campaign-orchestrator{{- end }} +{{- define "dograh.ui.fullname" -}}{{ include "dograh.fullname" . }}-ui{{- end }} +{{- define "dograh.coturn.fullname" -}}{{ include "dograh.fullname" . }}-coturn{{- end }} +{{- define "dograh.migrate.fullname" -}}{{ include "dograh.fullname" . }}-migrate{{- end }} + +{{- define "dograh.configMapName" -}}{{ include "dograh.fullname" . }}-config{{- end }} +{{- define "dograh.secretName" -}} +{{- if .Values.secrets.existingSecret -}} +{{- .Values.secrets.existingSecret -}} +{{- else -}} +{{- include "dograh.fullname" . }}-secret +{{- end -}} +{{- end }} + +{{/* +Image reference. +*/}} +{{- define "dograh.image" -}} +{{- $registry := .Values.image.registry | default "docker.io" -}} +{{- printf "%s/%s:%s" $registry .Values.image.repository .Values.image.tag -}} +{{- end }} + +{{- define "dograh.ui.image" -}} +{{- $registry := .Values.ui.image.registry | default "docker.io" -}} +{{- printf "%s/%s:%s" $registry .Values.ui.image.repository .Values.ui.image.tag -}} +{{- end }} + +{{- define "dograh.coturn.image" -}} +{{- $registry := .Values.coturn.image.registry | default "docker.io" -}} +{{- printf "%s/%s:%s" $registry .Values.coturn.image.repository .Values.coturn.image.tag -}} +{{- end }} + +{{/* +Subchart enabling — flips top-level chart-dependency `enabled` flags from mode. +Called from each template via `include "dograh.deps.resolved" .` (no-op output). +*/}} +{{- define "dograh.deps.resolved" -}} +{{- /* compute whether internal deps are enabled */ -}} +{{- end }} + +{{/* +In-cluster service references for internal deps. +*/}} +{{- define "dograh.postgresHost" -}}{{ .Release.Name }}-postgresql{{- end }} +{{- define "dograh.redisHost" -}}{{ .Release.Name }}-redisinternal-master{{- end }} +{{- define "dograh.minioHost" -}}{{ .Release.Name }}-minio{{- end }} + +{{/* +Default DATABASE_URL when database.mode=internal. +Bitnami Postgres exposes the password as -postgresql secret key +`postgres-password` or `password`. The chart pulls it via envFrom on the +generated secret; for clarity we still need the URL string. Auth username +defaults to `dograh` (see values.postgresql.auth.username). +*/}} +{{- define "dograh.databaseUrl" -}} +{{- if eq .Values.database.mode "internal" -}} +postgresql+asyncpg://{{ .Values.postgresql.auth.username }}:$(POSTGRES_PASSWORD)@{{ include "dograh.postgresHost" . }}:5432/{{ .Values.postgresql.auth.database }} +{{- else -}} +$(DATABASE_URL) +{{- end -}} +{{- end }} + +{{- define "dograh.redisUrl" -}} +{{- if eq .Values.redis.mode "internal" -}} +redis://:$(REDIS_PASSWORD)@{{ include "dograh.redisHost" . }}:6379 +{{- else -}} +$(REDIS_URL) +{{- end -}} +{{- end }} + +{{/* +Common env block for backend workloads (web, arq, singletons, migrate). +References the ConfigMap + Secret via envFrom. DATABASE_URL and REDIS_URL +are added inline because they may need composition from subchart secrets. +*/}} +{{- define "dograh.backendEnvFrom" -}} +- configMapRef: + name: {{ include "dograh.configMapName" . }} +- secretRef: + name: {{ include "dograh.secretName" . }} +{{- if eq .Values.database.mode "internal" }} +- secretRef: + name: {{ .Release.Name }}-postgresql + optional: true +{{- end }} +{{- if eq .Values.redis.mode "internal" }} +- secretRef: + name: {{ .Release.Name }}-redisinternal + optional: true +{{- end }} +{{- end }} + +{{/* +Volume mounts for the shared-tmp PVC when enabled. +*/}} +{{- define "dograh.sharedTmpVolumeMounts" -}} +{{- if .Values.sharedTmp.enabled }} +- name: shared-tmp + mountPath: {{ .Values.sharedTmp.mountPath }} +{{- end }} +{{- end }} + +{{- define "dograh.sharedTmpVolumes" -}} +{{- if .Values.sharedTmp.enabled }} +- name: shared-tmp + persistentVolumeClaim: + claimName: {{ include "dograh.fullname" . }}-shared-tmp +{{- end }} +{{- end }} diff --git a/deploy/helm/dograh/templates/ari-manager-deployment.yaml b/deploy/helm/dograh/templates/ari-manager-deployment.yaml new file mode 100644 index 0000000..80b668e --- /dev/null +++ b/deploy/helm/dograh/templates/ari-manager-deployment.yaml @@ -0,0 +1,81 @@ +{{- if .Values.ariManager.enabled }} +# SINGLETON — replicas hard-coded to 1, strategy: Recreate. +# ari-manager maintains an outbound WebSocket to Asterisk and is the +# canonical receiver of ARI events. Running >1 replica produces duplicate +# event handling. There is NO replica knob on purpose. Add leader +# election before relaxing this constraint. +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "dograh.ariManager.fullname" . }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "dograh.labels" . | nindent 4 }} + app.kubernetes.io/component: ari-manager +spec: + replicas: 1 + strategy: + type: Recreate + selector: + matchLabels: + {{- include "dograh.selectorLabels" . | nindent 6 }} + app.kubernetes.io/component: ari-manager + template: + metadata: + labels: + {{- include "dograh.selectorLabels" . | nindent 8 }} + app.kubernetes.io/component: ari-manager + {{- with .Values.ariManager.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + spec: + serviceAccountName: {{ include "dograh.serviceAccountName" . }} + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + containers: + - name: ari-manager + image: {{ include "dograh.image" . }} + imagePullPolicy: {{ .Values.image.pullPolicy }} + command: ["./scripts/run_ari_manager.sh"] + envFrom: + {{- include "dograh.backendEnvFrom" . | nindent 12 }} + env: + - name: DATABASE_URL + value: {{ include "dograh.databaseUrl" . | quote }} + - name: REDIS_URL + value: {{ include "dograh.redisUrl" . | quote }} + {{- if eq .Values.database.mode "internal" }} + - name: POSTGRES_PASSWORD + valueFrom: + secretKeyRef: + name: {{ .Release.Name }}-postgresql + key: password + {{- end }} + {{- if eq .Values.redis.mode "internal" }} + - name: REDIS_PASSWORD + valueFrom: + secretKeyRef: + name: {{ .Release.Name }}-redisinternal + key: redis-password + {{- end }} + # exec probe — no HTTP endpoint exists on ari-manager. + livenessProbe: + {{- toYaml .Values.ariManager.livenessProbe | nindent 12 }} + resources: + {{- toYaml .Values.ariManager.resources | nindent 12 }} + {{- with .Values.ariManager.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.ariManager.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.ariManager.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} +{{- end }} diff --git a/deploy/helm/dograh/templates/arq-worker-deployment.yaml b/deploy/helm/dograh/templates/arq-worker-deployment.yaml new file mode 100644 index 0000000..95fb6bb --- /dev/null +++ b/deploy/helm/dograh/templates/arq-worker-deployment.yaml @@ -0,0 +1,80 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "dograh.arqWorker.fullname" . }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "dograh.labels" . | nindent 4 }} + app.kubernetes.io/component: arq-worker +spec: + replicas: {{ .Values.workers.replicaCount }} + selector: + matchLabels: + {{- include "dograh.selectorLabels" . | nindent 6 }} + app.kubernetes.io/component: arq-worker + strategy: + type: RollingUpdate + rollingUpdate: + maxSurge: 1 + maxUnavailable: 0 + template: + metadata: + labels: + {{- include "dograh.selectorLabels" . | nindent 8 }} + app.kubernetes.io/component: arq-worker + {{- with .Values.workers.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + spec: + serviceAccountName: {{ include "dograh.serviceAccountName" . }} + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + containers: + - name: arq-worker + image: {{ include "dograh.image" . }} + imagePullPolicy: {{ .Values.image.pullPolicy }} + command: ["./scripts/run_arq_worker.sh"] + envFrom: + {{- include "dograh.backendEnvFrom" . | nindent 12 }} + env: + - name: DATABASE_URL + value: {{ include "dograh.databaseUrl" . | quote }} + - name: REDIS_URL + value: {{ include "dograh.redisUrl" . | quote }} + {{- if eq .Values.database.mode "internal" }} + - name: POSTGRES_PASSWORD + valueFrom: + secretKeyRef: + name: {{ .Release.Name }}-postgresql + key: password + {{- end }} + {{- if eq .Values.redis.mode "internal" }} + - name: REDIS_PASSWORD + valueFrom: + secretKeyRef: + name: {{ .Release.Name }}-redisinternal + key: redis-password + {{- end }} + livenessProbe: + {{- toYaml .Values.workers.livenessProbe | nindent 12 }} + resources: + {{- toYaml .Values.workers.resources | nindent 12 }} + volumeMounts: + {{- include "dograh.sharedTmpVolumeMounts" . | nindent 12 }} + volumes: + {{- include "dograh.sharedTmpVolumes" . | nindent 8 }} + {{- with .Values.workers.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.workers.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.workers.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} diff --git a/deploy/helm/dograh/templates/campaign-orchestrator-deployment.yaml b/deploy/helm/dograh/templates/campaign-orchestrator-deployment.yaml new file mode 100644 index 0000000..22a2a56 --- /dev/null +++ b/deploy/helm/dograh/templates/campaign-orchestrator-deployment.yaml @@ -0,0 +1,81 @@ +{{- if .Values.campaignOrchestrator.enabled }} +# SINGLETON — replicas hard-coded to 1, strategy: Recreate. +# campaign_orchestrator uses in-memory deduplication locks +# (`_processing_locks`); running >1 replica would silently break +# scheduling. Same singleton rules as ari-manager: no replica knob, +# Recreate strategy. +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "dograh.campaignOrchestrator.fullname" . }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "dograh.labels" . | nindent 4 }} + app.kubernetes.io/component: campaign-orchestrator +spec: + replicas: 1 + strategy: + type: Recreate + selector: + matchLabels: + {{- include "dograh.selectorLabels" . | nindent 6 }} + app.kubernetes.io/component: campaign-orchestrator + template: + metadata: + labels: + {{- include "dograh.selectorLabels" . | nindent 8 }} + app.kubernetes.io/component: campaign-orchestrator + {{- with .Values.campaignOrchestrator.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + spec: + serviceAccountName: {{ include "dograh.serviceAccountName" . }} + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + containers: + - name: campaign-orchestrator + image: {{ include "dograh.image" . }} + imagePullPolicy: {{ .Values.image.pullPolicy }} + command: ["./scripts/run_campaign_orchestrator.sh"] + envFrom: + {{- include "dograh.backendEnvFrom" . | nindent 12 }} + env: + - name: DATABASE_URL + value: {{ include "dograh.databaseUrl" . | quote }} + - name: REDIS_URL + value: {{ include "dograh.redisUrl" . | quote }} + {{- if eq .Values.database.mode "internal" }} + - name: POSTGRES_PASSWORD + valueFrom: + secretKeyRef: + name: {{ .Release.Name }}-postgresql + key: password + {{- end }} + {{- if eq .Values.redis.mode "internal" }} + - name: REDIS_PASSWORD + valueFrom: + secretKeyRef: + name: {{ .Release.Name }}-redisinternal + key: redis-password + {{- end }} + # exec probe — no HTTP endpoint exists on campaign-orchestrator. + livenessProbe: + {{- toYaml .Values.campaignOrchestrator.livenessProbe | nindent 12 }} + resources: + {{- toYaml .Values.campaignOrchestrator.resources | nindent 12 }} + {{- with .Values.campaignOrchestrator.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.campaignOrchestrator.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.campaignOrchestrator.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} +{{- end }} diff --git a/deploy/helm/dograh/templates/configmap.yaml b/deploy/helm/dograh/templates/configmap.yaml new file mode 100644 index 0000000..6502d61 --- /dev/null +++ b/deploy/helm/dograh/templates/configmap.yaml @@ -0,0 +1,38 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "dograh.configMapName" . }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "dograh.labels" . | nindent 4 }} +data: + ENVIRONMENT: {{ .Values.config.environment | quote }} + LOG_LEVEL: {{ .Values.config.logLevel | quote }} + BACKEND_API_ENDPOINT: {{ .Values.config.backendApiEndpoint | quote }} + MINIO_BUCKET: {{ .Values.config.minioBucket | quote }} + MINIO_SECURE: {{ .Values.config.minioSecure | quote }} + ENABLE_AWS_S3: {{ ternary "true" "false" (eq .Values.storage.mode "s3") | quote }} + ENABLE_TELEMETRY: {{ .Values.config.enableTelemetry | quote }} + POSTHOG_HOST: {{ .Values.config.posthogHost | quote }} + POSTHOG_API_KEY: {{ .Values.config.posthogApiKey | quote }} + FORCE_TURN_RELAY: {{ .Values.config.forceTurnRelay | quote }} + TURN_HOST: {{ .Values.config.turnHost | quote }} + FASTAPI_WORKERS: {{ .Values.config.fastapiWorkers | quote }} + {{- /* MinIO endpoints derived from storage mode. */ -}} + {{- if eq .Values.storage.mode "internalMinio" }} + MINIO_ENDPOINT: {{ printf "%s:9000" (include "dograh.minioHost" .) | quote }} + MINIO_PUBLIC_ENDPOINT: {{ .Values.config.minioPublicEndpoint | quote }} + {{- else if eq .Values.storage.mode "externalMinio" }} + MINIO_ENDPOINT: {{ .Values.storage.externalMinio.endpoint | quote }} + MINIO_PUBLIC_ENDPOINT: {{ .Values.storage.externalMinio.publicEndpoint | quote }} + {{- else if eq .Values.storage.mode "s3" }} + AWS_REGION: {{ .Values.storage.s3.region | quote }} + MINIO_PUBLIC_ENDPOINT: {{ .Values.storage.s3.publicEndpoint | quote }} + {{- end }} + {{- /* TURN external IP visible to the web service for credential issuance. */ -}} + {{- if .Values.coturn.enabled }} + TURN_EXTERNAL_IP: {{ .Values.coturn.externalIp | quote }} + {{- end }} + {{- if .Values.secrets.langfuseHost }} + LANGFUSE_HOST: {{ .Values.secrets.langfuseHost | quote }} + {{- end }} diff --git a/deploy/helm/dograh/templates/coturn-configmap.yaml b/deploy/helm/dograh/templates/coturn-configmap.yaml new file mode 100644 index 0000000..9148d4a --- /dev/null +++ b/deploy/helm/dograh/templates/coturn-configmap.yaml @@ -0,0 +1,37 @@ +{{- if .Values.coturn.enabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "dograh.coturn.fullname" . }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "dograh.labels" . | nindent 4 }} + app.kubernetes.io/component: coturn +data: + turnserver.conf: | + # Auto-generated by the Dograh Helm chart. + listening-port={{ .Values.coturn.ports.plain }} + tls-listening-port={{ .Values.coturn.ports.tls }} + + min-port={{ .Values.coturn.relayPortRange.min }} + max-port={{ .Values.coturn.relayPortRange.max }} + + {{- if .Values.coturn.externalIp }} + external-ip={{ .Values.coturn.externalIp }} + {{- else }} + # external-ip not yet set. Run: + # helm upgrade {{ .Release.Name }} . --reuse-values --set coturn.externalIp= + # once the LoadBalancer Service has an address. + {{- end }} + + realm={{ .Values.coturn.realm }} + + use-auth-secret + static-auth-secret=$(TURN_SECRET) + + fingerprint + no-cli + no-multicast-peers + + log-file=stdout +{{- end }} diff --git a/deploy/helm/dograh/templates/coturn-deployment.yaml b/deploy/helm/dograh/templates/coturn-deployment.yaml new file mode 100644 index 0000000..16d958c --- /dev/null +++ b/deploy/helm/dograh/templates/coturn-deployment.yaml @@ -0,0 +1,98 @@ +{{- if .Values.coturn.enabled }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "dograh.coturn.fullname" . }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "dograh.labels" . | nindent 4 }} + app.kubernetes.io/component: coturn +spec: + # coturn is a singleton (per-LB instance). HA TURN requires a separate + # design (multiple LBs or anycast). + replicas: 1 + strategy: + type: Recreate + selector: + matchLabels: + {{- include "dograh.selectorLabels" . | nindent 6 }} + app.kubernetes.io/component: coturn + template: + metadata: + labels: + {{- include "dograh.selectorLabels" . | nindent 8 }} + app.kubernetes.io/component: coturn + {{- with .Values.coturn.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + annotations: + # Re-roll coturn when turnserver.conf changes. + checksum/config: {{ include (print $.Template.BasePath "/coturn-configmap.yaml") . | sha256sum }} + spec: + serviceAccountName: {{ include "dograh.serviceAccountName" . }} + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + containers: + - name: coturn + image: {{ include "dograh.coturn.image" . }} + imagePullPolicy: {{ .Values.coturn.image.pullPolicy }} + # coturn doesn't expand env vars in its config file; we render + # via envsubst at start so static-auth-secret can come from a + # Kubernetes Secret instead of being baked into the ConfigMap. + command: + - sh + - -c + - | + set -e + apk add --no-cache gettext >/dev/null 2>&1 || true + envsubst < /etc/coturn-template/turnserver.conf > /tmp/turnserver.conf + exec turnserver -c /tmp/turnserver.conf + env: + - name: TURN_SECRET + valueFrom: + secretKeyRef: + name: {{ include "dograh.secretName" . }} + key: TURN_SECRET + ports: + - name: turn-udp + containerPort: {{ .Values.coturn.ports.plain }} + protocol: UDP + - name: turn-tcp + containerPort: {{ .Values.coturn.ports.plain }} + protocol: TCP + - name: turns-udp + containerPort: {{ .Values.coturn.ports.tls }} + protocol: UDP + - name: turns-tcp + containerPort: {{ .Values.coturn.ports.tls }} + protocol: TCP + resources: + {{- toYaml .Values.coturn.resources | nindent 12 }} + volumeMounts: + - name: config + mountPath: /etc/coturn-template + readOnly: true + - name: tmp + mountPath: /tmp + volumes: + - name: config + configMap: + name: {{ include "dograh.coturn.fullname" . }} + - name: tmp + emptyDir: {} + {{- with .Values.coturn.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.coturn.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.coturn.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} +{{- end }} diff --git a/deploy/helm/dograh/templates/coturn-service.yaml b/deploy/helm/dograh/templates/coturn-service.yaml new file mode 100644 index 0000000..a503463 --- /dev/null +++ b/deploy/helm/dograh/templates/coturn-service.yaml @@ -0,0 +1,47 @@ +{{- if .Values.coturn.enabled }} +apiVersion: v1 +kind: Service +metadata: + name: {{ include "dograh.coturn.fullname" . }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "dograh.labels" . | nindent 4 }} + app.kubernetes.io/component: coturn + {{- with .Values.coturn.service.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + type: {{ .Values.coturn.service.type }} + {{- if eq .Values.coturn.service.type "LoadBalancer" }} + externalTrafficPolicy: {{ .Values.coturn.service.externalTrafficPolicy }} + {{- end }} + ports: + - name: turn-udp + port: {{ .Values.coturn.ports.plain }} + targetPort: turn-udp + protocol: UDP + - name: turn-tcp + port: {{ .Values.coturn.ports.plain }} + targetPort: turn-tcp + protocol: TCP + - name: turns-udp + port: {{ .Values.coturn.ports.tls }} + targetPort: turns-udp + protocol: UDP + - name: turns-tcp + port: {{ .Values.coturn.ports.tls }} + targetPort: turns-tcp + protocol: TCP + # Relay range ports. AWS NLB has a default 50-listener cap; widening + # the range past that requires either a quota bump or multiple NLBs. + {{- range $port := untilStep (int .Values.coturn.relayPortRange.min) (int (add1 (int .Values.coturn.relayPortRange.max))) 1 }} + - name: relay-{{ $port }} + port: {{ $port }} + targetPort: {{ $port }} + protocol: UDP + {{- end }} + selector: + {{- include "dograh.selectorLabels" . | nindent 4 }} + app.kubernetes.io/component: coturn +{{- end }} diff --git a/deploy/helm/dograh/templates/gateway.yaml b/deploy/helm/dograh/templates/gateway.yaml new file mode 100644 index 0000000..24a6b62 --- /dev/null +++ b/deploy/helm/dograh/templates/gateway.yaml @@ -0,0 +1,34 @@ +{{- if and (eq .Values.exposure.mode "gatewayApi") .Values.exposure.gatewayApi.createGateway }} +apiVersion: gateway.networking.k8s.io/v1 +kind: Gateway +metadata: + name: {{ include "dograh.fullname" . }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "dograh.labels" . | nindent 4 }} +spec: + gatewayClassName: {{ required "exposure.gatewayApi.gatewayClassName is required when createGateway=true" .Values.exposure.gatewayApi.gatewayClassName }} + listeners: + - name: http + port: 80 + protocol: HTTP + {{- if .Values.exposure.gatewayApi.listenerHostname }} + hostname: {{ .Values.exposure.gatewayApi.listenerHostname | quote }} + {{- end }} + allowedRoutes: + namespaces: + from: Same + - name: https + port: 443 + protocol: HTTPS + {{- if .Values.exposure.gatewayApi.listenerHostname }} + hostname: {{ .Values.exposure.gatewayApi.listenerHostname | quote }} + {{- end }} + tls: + mode: Terminate + certificateRefs: + - name: {{ default (printf "%s-tls" (include "dograh.fullname" .)) .Values.exposure.ingress.tls.secretName }} + allowedRoutes: + namespaces: + from: Same +{{- end }} diff --git a/deploy/helm/dograh/templates/httproute-api.yaml b/deploy/helm/dograh/templates/httproute-api.yaml new file mode 100644 index 0000000..8e8a292 --- /dev/null +++ b/deploy/helm/dograh/templates/httproute-api.yaml @@ -0,0 +1,33 @@ +{{- if eq .Values.exposure.mode "gatewayApi" }} +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute +metadata: + name: {{ include "dograh.fullname" . }}-api + namespace: {{ .Release.Namespace }} + labels: + {{- include "dograh.labels" . | nindent 4 }} +spec: + parentRefs: + {{- if .Values.exposure.gatewayApi.createGateway }} + - name: {{ include "dograh.fullname" . }} + {{- else }} + {{- range .Values.exposure.gatewayApi.parentRefs }} + - name: {{ .name }} + {{- if .namespace }} + namespace: {{ .namespace }} + {{- end }} + {{- end }} + {{- end }} + {{- if .Values.exposure.gatewayApi.listenerHostname }} + hostnames: + - {{ .Values.exposure.gatewayApi.listenerHostname | quote }} + {{- end }} + rules: + - matches: + - path: + type: PathPrefix + value: /api/ + backendRefs: + - name: {{ include "dograh.web.fullname" . }} + port: {{ .Values.web.service.port }} +{{- end }} diff --git a/deploy/helm/dograh/templates/httproute-minio.yaml b/deploy/helm/dograh/templates/httproute-minio.yaml new file mode 100644 index 0000000..5630fe2 --- /dev/null +++ b/deploy/helm/dograh/templates/httproute-minio.yaml @@ -0,0 +1,46 @@ +{{- /* +Browser-visible MinIO route. Mounted under the shared API/UI hostname at +/voice-audio/* to mirror the existing nginx pass-through. Operators who +want a dedicated hostname can edit this template. +*/ -}} +{{- if and (eq .Values.exposure.mode "gatewayApi") (or (eq .Values.storage.mode "internalMinio") (eq .Values.storage.mode "externalMinio")) }} +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute +metadata: + name: {{ include "dograh.fullname" . }}-minio + namespace: {{ .Release.Namespace }} + labels: + {{- include "dograh.labels" . | nindent 4 }} +spec: + parentRefs: + {{- if .Values.exposure.gatewayApi.createGateway }} + - name: {{ include "dograh.fullname" . }} + {{- else }} + {{- range .Values.exposure.gatewayApi.parentRefs }} + - name: {{ .name }} + {{- if .namespace }} + namespace: {{ .namespace }} + {{- end }} + {{- end }} + {{- end }} + {{- if .Values.exposure.gatewayApi.listenerHostname }} + hostnames: + - {{ .Values.exposure.gatewayApi.listenerHostname | quote }} + {{- end }} + rules: + - matches: + - path: + type: PathPrefix + value: /voice-audio/ + backendRefs: + {{- if eq .Values.storage.mode "internalMinio" }} + - name: {{ include "dograh.minioHost" . }} + port: 9000 + {{- else }} + # externalMinio: route to a placeholder Service named + # -minio-external. Operators must create this Service of + # type ExternalName pointing at storage.externalMinio.endpoint. + - name: {{ include "dograh.fullname" . }}-minio-external + port: 9000 + {{- end }} +{{- end }} diff --git a/deploy/helm/dograh/templates/httproute-ui.yaml b/deploy/helm/dograh/templates/httproute-ui.yaml new file mode 100644 index 0000000..23b170d --- /dev/null +++ b/deploy/helm/dograh/templates/httproute-ui.yaml @@ -0,0 +1,33 @@ +{{- if and (eq .Values.exposure.mode "gatewayApi") .Values.ui.enabled }} +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute +metadata: + name: {{ include "dograh.fullname" . }}-ui + namespace: {{ .Release.Namespace }} + labels: + {{- include "dograh.labels" . | nindent 4 }} +spec: + parentRefs: + {{- if .Values.exposure.gatewayApi.createGateway }} + - name: {{ include "dograh.fullname" . }} + {{- else }} + {{- range .Values.exposure.gatewayApi.parentRefs }} + - name: {{ .name }} + {{- if .namespace }} + namespace: {{ .namespace }} + {{- end }} + {{- end }} + {{- end }} + {{- if .Values.exposure.gatewayApi.listenerHostname }} + hostnames: + - {{ .Values.exposure.gatewayApi.listenerHostname | quote }} + {{- end }} + rules: + - matches: + - path: + type: PathPrefix + value: / + backendRefs: + - name: {{ include "dograh.ui.fullname" . }} + port: {{ .Values.ui.service.port }} +{{- end }} diff --git a/deploy/helm/dograh/templates/ingress.yaml b/deploy/helm/dograh/templates/ingress.yaml new file mode 100644 index 0000000..e06850c --- /dev/null +++ b/deploy/helm/dograh/templates/ingress.yaml @@ -0,0 +1,66 @@ +{{- if eq .Values.exposure.mode "ingress" }} +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: {{ include "dograh.fullname" . }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "dograh.labels" . | nindent 4 }} + annotations: + # Sensible defaults for WebSocket-heavy signaling traffic. These are + # nginx-ingress style; if you use a different controller, override + # via exposure.ingress.annotations. + nginx.ingress.kubernetes.io/proxy-read-timeout: "3600" + nginx.ingress.kubernetes.io/proxy-send-timeout: "3600" + nginx.ingress.kubernetes.io/proxy-body-size: "100m" + {{- with .Values.exposure.ingress.annotations }} + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + {{- if .Values.exposure.ingress.className }} + ingressClassName: {{ .Values.exposure.ingress.className | quote }} + {{- end }} + {{- if .Values.exposure.ingress.tls.enabled }} + tls: + - hosts: + - {{ required "exposure.ingress.host is required when tls.enabled=true" .Values.exposure.ingress.host | quote }} + secretName: {{ required "exposure.ingress.tls.secretName is required when tls.enabled=true" .Values.exposure.ingress.tls.secretName | quote }} + {{- end }} + rules: + - {{- if .Values.exposure.ingress.host }} + host: {{ .Values.exposure.ingress.host | quote }} + {{- end }} + http: + paths: + - path: /api/ + pathType: Prefix + backend: + service: + name: {{ include "dograh.web.fullname" . }} + port: + number: {{ .Values.web.service.port }} + {{- if or (eq .Values.storage.mode "internalMinio") (eq .Values.storage.mode "externalMinio") }} + - path: /voice-audio/ + pathType: Prefix + backend: + service: + {{- if eq .Values.storage.mode "internalMinio" }} + name: {{ include "dograh.minioHost" . }} + {{- else }} + # externalMinio: requires an ExternalName Service named + # -minio-external pointing at storage.externalMinio.endpoint. + name: {{ include "dograh.fullname" . }}-minio-external + {{- end }} + port: + number: 9000 + {{- end }} + {{- if .Values.ui.enabled }} + - path: / + pathType: Prefix + backend: + service: + name: {{ include "dograh.ui.fullname" . }} + port: + number: {{ .Values.ui.service.port }} + {{- end }} +{{- end }} diff --git a/deploy/helm/dograh/templates/migrate-job.yaml b/deploy/helm/dograh/templates/migrate-job.yaml new file mode 100644 index 0000000..20b6c82 --- /dev/null +++ b/deploy/helm/dograh/templates/migrate-job.yaml @@ -0,0 +1,49 @@ +{{- if .Values.migrate.enabled }} +apiVersion: batch/v1 +kind: Job +metadata: + name: {{ include "dograh.migrate.fullname" . }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "dograh.labels" . | nindent 4 }} + app.kubernetes.io/component: migrate + annotations: + "helm.sh/hook": pre-install,pre-upgrade + "helm.sh/hook-weight": "-5" + "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded +spec: + backoffLimit: 0 + activeDeadlineSeconds: {{ .Values.migrate.activeDeadlineSeconds }} + ttlSecondsAfterFinished: 86400 + template: + metadata: + labels: + {{- include "dograh.selectorLabels" . | nindent 8 }} + app.kubernetes.io/component: migrate + spec: + serviceAccountName: {{ include "dograh.serviceAccountName" . }} + restartPolicy: Never + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + containers: + - name: migrate + image: {{ include "dograh.image" . }} + imagePullPolicy: {{ .Values.image.pullPolicy }} + command: ["./scripts/run_migrate.sh"] + envFrom: + {{- include "dograh.backendEnvFrom" . | nindent 12 }} + env: + - name: DATABASE_URL + value: {{ include "dograh.databaseUrl" . | quote }} + {{- if eq .Values.database.mode "internal" }} + - name: POSTGRES_PASSWORD + valueFrom: + secretKeyRef: + name: {{ .Release.Name }}-postgresql + key: password + {{- end }} + resources: + {{- toYaml .Values.migrate.resources | nindent 12 }} +{{- end }} diff --git a/deploy/helm/dograh/templates/secret.yaml b/deploy/helm/dograh/templates/secret.yaml new file mode 100644 index 0000000..5af798c --- /dev/null +++ b/deploy/helm/dograh/templates/secret.yaml @@ -0,0 +1,33 @@ +{{- if not .Values.secrets.existingSecret }} +apiVersion: v1 +kind: Secret +metadata: + name: {{ include "dograh.secretName" . }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "dograh.labels" . | nindent 4 }} +type: Opaque +stringData: + OSS_JWT_SECRET: {{ required "secrets.ossJwtSecret is required" .Values.secrets.ossJwtSecret | quote }} + TURN_SECRET: {{ .Values.secrets.turnSecret | quote }} + {{- if eq .Values.database.mode "external" }} + DATABASE_URL: {{ required "secrets.databaseUrl is required when database.mode=external" .Values.secrets.databaseUrl | quote }} + {{- end }} + {{- if eq .Values.redis.mode "external" }} + REDIS_URL: {{ required "secrets.redisUrl is required when redis.mode=external" .Values.secrets.redisUrl | quote }} + {{- end }} + {{- if or (eq .Values.storage.mode "internalMinio") (eq .Values.storage.mode "externalMinio") }} + MINIO_ACCESS_KEY: {{ .Values.secrets.minioAccessKey | quote }} + MINIO_SECRET_KEY: {{ .Values.secrets.minioSecretKey | quote }} + {{- end }} + {{- if eq .Values.storage.mode "s3" }} + {{- if .Values.secrets.awsAccessKeyId }} + AWS_ACCESS_KEY_ID: {{ .Values.secrets.awsAccessKeyId | quote }} + AWS_SECRET_ACCESS_KEY: {{ .Values.secrets.awsSecretAccessKey | quote }} + {{- end }} + {{- end }} + {{- if .Values.secrets.langfuseSecretKey }} + LANGFUSE_SECRET_KEY: {{ .Values.secrets.langfuseSecretKey | quote }} + LANGFUSE_PUBLIC_KEY: {{ .Values.secrets.langfusePublicKey | quote }} + {{- end }} +{{- end }} diff --git a/deploy/helm/dograh/templates/serviceaccount.yaml b/deploy/helm/dograh/templates/serviceaccount.yaml new file mode 100644 index 0000000..7331cad --- /dev/null +++ b/deploy/helm/dograh/templates/serviceaccount.yaml @@ -0,0 +1,13 @@ +{{- if .Values.serviceAccount.create }} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "dograh.serviceAccountName" . }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "dograh.labels" . | nindent 4 }} + {{- with .Values.serviceAccount.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +{{- end }} diff --git a/deploy/helm/dograh/templates/shared-tmp-pvc.yaml b/deploy/helm/dograh/templates/shared-tmp-pvc.yaml new file mode 100644 index 0000000..5a3546f --- /dev/null +++ b/deploy/helm/dograh/templates/shared-tmp-pvc.yaml @@ -0,0 +1,18 @@ +{{- if .Values.sharedTmp.enabled }} +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: {{ include "dograh.fullname" . }}-shared-tmp + namespace: {{ .Release.Namespace }} + labels: + {{- include "dograh.labels" . | nindent 4 }} +spec: + accessModes: + - ReadWriteMany + {{- if .Values.sharedTmp.storageClassName }} + storageClassName: {{ .Values.sharedTmp.storageClassName | quote }} + {{- end }} + resources: + requests: + storage: {{ .Values.sharedTmp.size }} +{{- end }} diff --git a/deploy/helm/dograh/templates/ui-deployment.yaml b/deploy/helm/dograh/templates/ui-deployment.yaml new file mode 100644 index 0000000..9285fc3 --- /dev/null +++ b/deploy/helm/dograh/templates/ui-deployment.yaml @@ -0,0 +1,73 @@ +{{- if .Values.ui.enabled }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "dograh.ui.fullname" . }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "dograh.labels" . | nindent 4 }} + app.kubernetes.io/component: ui +spec: + replicas: {{ .Values.ui.replicaCount }} + selector: + matchLabels: + {{- include "dograh.selectorLabels" . | nindent 6 }} + app.kubernetes.io/component: ui + strategy: + type: RollingUpdate + rollingUpdate: + maxSurge: 1 + maxUnavailable: 0 + template: + metadata: + labels: + {{- include "dograh.selectorLabels" . | nindent 8 }} + app.kubernetes.io/component: ui + {{- with .Values.ui.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + spec: + serviceAccountName: {{ include "dograh.serviceAccountName" . }} + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + containers: + - name: ui + image: {{ include "dograh.ui.image" . }} + imagePullPolicy: {{ .Values.ui.image.pullPolicy }} + ports: + - name: http + containerPort: {{ .Values.ui.port }} + protocol: TCP + env: + - name: NODE_ENV + value: "oss" + - name: BACKEND_URL + value: {{ default (printf "http://%s:%d" (include "dograh.web.fullname" .) (int .Values.web.service.port)) .Values.ui.backendUrl | quote }} + - name: ENABLE_TELEMETRY + value: {{ .Values.config.enableTelemetry | quote }} + - name: POSTHOG_KEY + value: {{ .Values.config.posthogApiKey | quote }} + - name: POSTHOG_HOST + value: {{ .Values.config.posthogHost | quote }} + livenessProbe: + {{- toYaml .Values.ui.livenessProbe | nindent 12 }} + readinessProbe: + {{- toYaml .Values.ui.readinessProbe | nindent 12 }} + resources: + {{- toYaml .Values.ui.resources | nindent 12 }} + {{- with .Values.ui.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.ui.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.ui.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} +{{- end }} diff --git a/deploy/helm/dograh/templates/ui-pdb.yaml b/deploy/helm/dograh/templates/ui-pdb.yaml new file mode 100644 index 0000000..6d1b74d --- /dev/null +++ b/deploy/helm/dograh/templates/ui-pdb.yaml @@ -0,0 +1,16 @@ +{{- if and .Values.ui.enabled .Values.ui.pdb.enabled }} +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + name: {{ include "dograh.ui.fullname" . }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "dograh.labels" . | nindent 4 }} + app.kubernetes.io/component: ui +spec: + minAvailable: {{ .Values.ui.pdb.minAvailable }} + selector: + matchLabels: + {{- include "dograh.selectorLabels" . | nindent 6 }} + app.kubernetes.io/component: ui +{{- end }} diff --git a/deploy/helm/dograh/templates/ui-service.yaml b/deploy/helm/dograh/templates/ui-service.yaml new file mode 100644 index 0000000..6e807f7 --- /dev/null +++ b/deploy/helm/dograh/templates/ui-service.yaml @@ -0,0 +1,20 @@ +{{- if .Values.ui.enabled }} +apiVersion: v1 +kind: Service +metadata: + name: {{ include "dograh.ui.fullname" . }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "dograh.labels" . | nindent 4 }} + app.kubernetes.io/component: ui +spec: + type: {{ .Values.ui.service.type }} + ports: + - name: http + port: {{ .Values.ui.service.port }} + targetPort: http + protocol: TCP + selector: + {{- include "dograh.selectorLabels" . | nindent 4 }} + app.kubernetes.io/component: ui +{{- end }} diff --git a/deploy/helm/dograh/templates/web-deployment.yaml b/deploy/helm/dograh/templates/web-deployment.yaml new file mode 100644 index 0000000..6fdb585 --- /dev/null +++ b/deploy/helm/dograh/templates/web-deployment.yaml @@ -0,0 +1,107 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "dograh.web.fullname" . }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "dograh.labels" . | nindent 4 }} + app.kubernetes.io/component: web +spec: + {{- if not .Values.autoscaling.web.enabled }} + replicas: {{ .Values.web.replicaCount }} + {{- end }} + selector: + matchLabels: + {{- include "dograh.selectorLabels" . | nindent 6 }} + app.kubernetes.io/component: web + strategy: + type: RollingUpdate + rollingUpdate: + maxSurge: 1 + maxUnavailable: 0 + template: + metadata: + labels: + {{- include "dograh.selectorLabels" . | nindent 8 }} + app.kubernetes.io/component: web + {{- with .Values.web.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + spec: + serviceAccountName: {{ include "dograh.serviceAccountName" . }} + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + # Long-lived signaling WebSockets keep state in-process; honor the + # configured drain window so in-flight calls survive a rolling + # update. See README "Decisions log". + terminationGracePeriodSeconds: {{ .Values.web.terminationGracePeriodSeconds }} + containers: + - name: web + image: {{ include "dograh.image" . }} + imagePullPolicy: {{ .Values.image.pullPolicy }} + command: ["./scripts/run_web.sh"] + ports: + - name: http + containerPort: {{ .Values.web.port }} + protocol: TCP + envFrom: + {{- include "dograh.backendEnvFrom" . | nindent 12 }} + env: + - name: WEB_PORT + value: {{ .Values.web.port | quote }} + - name: DATABASE_URL + value: {{ include "dograh.databaseUrl" . | quote }} + - name: REDIS_URL + value: {{ include "dograh.redisUrl" . | quote }} + {{- if eq .Values.database.mode "internal" }} + - name: POSTGRES_PASSWORD + valueFrom: + secretKeyRef: + name: {{ .Release.Name }}-postgresql + key: password + {{- end }} + {{- if eq .Values.redis.mode "internal" }} + - name: REDIS_PASSWORD + valueFrom: + secretKeyRef: + name: {{ .Release.Name }}-redisinternal + key: redis-password + {{- end }} + # Distinct probes: readiness flips fast (drain), liveness is + # slower (process aliveness). + livenessProbe: + {{- toYaml .Values.web.livenessProbe | nindent 12 }} + readinessProbe: + {{- toYaml .Values.web.readinessProbe | nindent 12 }} + lifecycle: + preStop: + # Sleep so the gateway / load balancer observes the pod + # NotReady and stops sending new connections before SIGTERM + # propagates to uvicorn. + exec: + command: ["sh", "-c", "sleep {{ .Values.web.preStopSleepSeconds }}"] + resources: + {{- toYaml .Values.web.resources | nindent 12 }} + volumeMounts: + {{- include "dograh.sharedTmpVolumeMounts" . | nindent 12 }} + volumes: + {{- include "dograh.sharedTmpVolumes" . | nindent 8 }} + {{- with .Values.web.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.web.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.web.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.web.topologySpreadConstraints }} + topologySpreadConstraints: + {{- toYaml . | nindent 8 }} + {{- end }} diff --git a/deploy/helm/dograh/templates/web-hpa.yaml b/deploy/helm/dograh/templates/web-hpa.yaml new file mode 100644 index 0000000..cdc0486 --- /dev/null +++ b/deploy/helm/dograh/templates/web-hpa.yaml @@ -0,0 +1,38 @@ +{{- if .Values.autoscaling.web.enabled }} +# WARNING: CPU/memory is a poor signal for WebRTC signaling. WebSockets +# are long-lived and low-CPU; this HPA will not respond to connection +# pressure. Replace with a custom metric (active connections, active +# calls) as soon as one is exposed. +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + name: {{ include "dograh.web.fullname" . }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "dograh.labels" . | nindent 4 }} + app.kubernetes.io/component: web +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: {{ include "dograh.web.fullname" . }} + minReplicas: {{ .Values.autoscaling.web.minReplicas }} + maxReplicas: {{ .Values.autoscaling.web.maxReplicas }} + metrics: + {{- if .Values.autoscaling.web.targetCPUUtilizationPercentage }} + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: {{ .Values.autoscaling.web.targetCPUUtilizationPercentage }} + {{- end }} + {{- if .Values.autoscaling.web.targetMemoryUtilizationPercentage }} + - type: Resource + resource: + name: memory + target: + type: Utilization + averageUtilization: {{ .Values.autoscaling.web.targetMemoryUtilizationPercentage }} + {{- end }} +{{- end }} diff --git a/deploy/helm/dograh/templates/web-pdb.yaml b/deploy/helm/dograh/templates/web-pdb.yaml new file mode 100644 index 0000000..39d74b1 --- /dev/null +++ b/deploy/helm/dograh/templates/web-pdb.yaml @@ -0,0 +1,16 @@ +{{- if .Values.web.pdb.enabled }} +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + name: {{ include "dograh.web.fullname" . }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "dograh.labels" . | nindent 4 }} + app.kubernetes.io/component: web +spec: + minAvailable: {{ .Values.web.pdb.minAvailable }} + selector: + matchLabels: + {{- include "dograh.selectorLabels" . | nindent 6 }} + app.kubernetes.io/component: web +{{- end }} diff --git a/deploy/helm/dograh/templates/web-service.yaml b/deploy/helm/dograh/templates/web-service.yaml new file mode 100644 index 0000000..159d96b --- /dev/null +++ b/deploy/helm/dograh/templates/web-service.yaml @@ -0,0 +1,22 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "dograh.web.fullname" . }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "dograh.labels" . | nindent 4 }} + app.kubernetes.io/component: web + {{- with .Values.web.service.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + type: {{ .Values.web.service.type }} + ports: + - name: http + port: {{ .Values.web.service.port }} + targetPort: http + protocol: TCP + selector: + {{- include "dograh.selectorLabels" . | nindent 4 }} + app.kubernetes.io/component: web diff --git a/deploy/helm/dograh/values.schema.json b/deploy/helm/dograh/values.schema.json new file mode 100644 index 0000000..e2b746c --- /dev/null +++ b/deploy/helm/dograh/values.schema.json @@ -0,0 +1,48 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "Dograh Helm chart values", + "type": "object", + "properties": { + "database": { + "type": "object", + "properties": { + "mode": { + "type": "string", + "enum": ["internal", "external"] + } + }, + "required": ["mode"] + }, + "redis": { + "type": "object", + "properties": { + "mode": { + "type": "string", + "enum": ["internal", "external"] + } + }, + "required": ["mode"] + }, + "storage": { + "type": "object", + "properties": { + "mode": { + "type": "string", + "enum": ["internalMinio", "externalMinio", "s3"] + } + }, + "required": ["mode"] + }, + "exposure": { + "type": "object", + "properties": { + "mode": { + "type": "string", + "enum": ["gatewayApi", "ingress"] + } + }, + "required": ["mode"] + } + }, + "required": ["database", "redis", "storage", "exposure"] +} diff --git a/deploy/helm/dograh/values.yaml b/deploy/helm/dograh/values.yaml new file mode 100644 index 0000000..5c53d13 --- /dev/null +++ b/deploy/helm/dograh/values.yaml @@ -0,0 +1,527 @@ +# Dograh Helm chart — default values. +# +# Conventions: +# - "mode" fields are enums; see values.schema.json for allowed values. +# - Anything sensitive (passwords, tokens, signing keys) is split into the +# `secrets:` section and rendered as a Kubernetes Secret. Non-sensitive +# config lives in `config:` and renders as a ConfigMap. +# - The chart never ships real defaults for credentials. Operators must +# override `secrets.*` (or supply an existing Secret name). + +# ----------------------------------------------------------------------------- +# Global image config — applied to web, workers, ariManager, campaignOrchestrator +# ----------------------------------------------------------------------------- +image: + registry: docker.io + repository: dograhai/dograh-api + tag: latest + pullPolicy: IfNotPresent + +imagePullSecrets: [] +# - name: regcred + +nameOverride: "" +fullnameOverride: "" + +serviceAccount: + create: true + name: "" + annotations: {} + +# ----------------------------------------------------------------------------- +# Stateful dependency modes. +# +# database.mode: +# internal — bundled Bitnami PostgreSQL subchart (postgresql.enabled=true) +# external — operator supplies DATABASE_URL via secrets.databaseUrl +# redis.mode: +# internal — bundled Bitnami Redis subchart (redis.enabled=true) +# external — operator supplies REDIS_URL via secrets.redisUrl +# storage.mode: +# internalMinio — bundled MinIO subchart (minio.enabled=true) +# externalMinio — operator supplies a MinIO-compatible endpoint + creds +# s3 — sets ENABLE_AWS_S3=true; uses AWS S3 +# exposure.mode: +# gatewayApi — renders Gateway + HTTPRoute (gateway.networking.k8s.io/v1) +# ingress — renders Ingress resources (networking.k8s.io/v1) +# ----------------------------------------------------------------------------- +database: + mode: internal + # For external mode, secrets.databaseUrl must be set. + +redis: + mode: internal + # For external mode, secrets.redisUrl must be set. + +storage: + mode: internalMinio + # For externalMinio mode, set externalMinio.endpoint + secrets.minioAccessKey + # + secrets.minioSecretKey. + externalMinio: + endpoint: "" # e.g. minio.example.com + publicEndpoint: "" # browser-visible URL + secure: false + bucket: voice-audio + # For s3 mode, set s3.region. AWS credentials are picked up from the pod's + # IAM role (IRSA recommended) or from secrets.awsAccessKeyId + secrets.awsSecretAccessKey. + s3: + region: us-east-1 + bucket: voice-audio + publicEndpoint: "" # e.g. https://s3.amazonaws.com + +exposure: + # Default is `ingress` because it works out-of-the-box on any cluster + # without requiring Gateway API CRDs. Production deployments should + # prefer `gatewayApi` per HELM_DEPLOYMENT_PLAN.md — switch the mode + # and supply gatewayClassName. + mode: ingress + # Gateway API config (when mode=gatewayApi). + gatewayApi: + # Set to false to skip rendering the Gateway resource and instead + # attach HTTPRoutes to a pre-existing Gateway (parentRef.name below). + createGateway: true + gatewayClassName: "" # required when createGateway=true (e.g. "istio", "envoy-gateway", "aws-alb") + listenerHostname: "" # optional SNI hostname for the listener; empty = wildcard + # Reference an existing Gateway instead of creating one. + # Ignored when createGateway=true. + parentRefs: + - name: dograh + namespace: "" # empty = same namespace as the release + # Ingress config (when mode=ingress). + ingress: + className: "" # e.g. "nginx", "alb" + annotations: {} + # Hostname for the API/UI. UI is served at / and API under /api/. + # MinIO browser-visible path uses the same hostname under /voice-audio/. + host: "" # e.g. dograh.example.com + tls: + enabled: false + secretName: "" # operator-managed TLS secret in the release namespace + +# ----------------------------------------------------------------------------- +# Non-sensitive runtime config — rendered into a ConfigMap and injected via +# envFrom on every backend pod. Sensitive values live under `secrets:` below. +# ----------------------------------------------------------------------------- +config: + environment: production + logLevel: INFO + backendApiEndpoint: "" # public URL the browser uses to reach the API; auto-derived from exposure.host if empty in NOTES + minioBucket: voice-audio + minioEndpoint: "" # internal cluster endpoint (auto-set when internalMinio) + minioPublicEndpoint: "" # browser-visible endpoint (auto-set when ingress/gateway path exposes MinIO) + minioSecure: false + enableAwsS3: false + enableTelemetry: true + posthogHost: https://us.i.posthog.com + posthogApiKey: phc_ItizB1dP6yv7ZYobbcqrpxTdbomDA8hJFSEmAMdYvIr + forceTurnRelay: false + turnHost: "" # public hostname/IP of coturn (the LoadBalancer address) + fastapiWorkers: 1 # informational only; web tier scales by pod, not in-pod workers + +# ----------------------------------------------------------------------------- +# Secrets — rendered into a Kubernetes Secret unless secrets.existingSecret is +# set. NEVER commit real values here; override via -f overrides.yaml or +# --set-string at install time. +# ----------------------------------------------------------------------------- +secrets: + # If set, the chart skips rendering its own Secret and assumes this Secret + # already exists in the release namespace with all keys below. + existingSecret: "" + + # Required when database.mode=external. + databaseUrl: "" # e.g. postgresql+asyncpg://user:pass@host:5432/dograh + # Required when redis.mode=external. + redisUrl: "" # e.g. redis://:pass@host:6379 + + # MinIO / S3 credentials. + minioAccessKey: "" + minioSecretKey: "" + awsAccessKeyId: "" # only used when storage.mode=s3 and not using IRSA + awsSecretAccessKey: "" + + # JWT signing key for the OSS auth path. MUST be overridden in production. + ossJwtSecret: "ChangeMeInProduction" + + # TURN REST API shared secret (matches coturn.staticAuthSecret below). + turnSecret: "" + + # Optional Langfuse tracing. + langfuseSecretKey: "" + langfusePublicKey: "" + langfuseHost: "" + +# ----------------------------------------------------------------------------- +# Shared /tmp PVC. +# +# AUDIT FINDING: api/services/pipecat/event_handlers.py writes WAV/transcript +# tempfiles in the web process and enqueues an ARQ job that reads those exact +# paths in the worker (api/tasks/s3_upload.py). In compose this works because +# all processes share the `shared-tmp` volume. In Kubernetes web and worker run +# in separate pods. Options: +# 1. Enable this PVC (ReadWriteMany required) to mount /tmp/dograh-shared +# into both web and arq-worker pods. Use this for v1. +# 2. Refactor event_handlers.py to upload from the web process and pass a +# storage key (not a local path) to the ARQ job. Preferred long-term; +# see deploy/helm/dograh/README.md "Open TODOs". +# If your cluster lacks RWX (most cloud default storage classes are RWO), +# you MUST take option (2) before splitting web and worker pods, or end-of- +# call uploads will fail silently. +sharedTmp: + enabled: false + storageClassName: "" # must be an RWX-capable class (e.g. efs-sc, azurefile, longhorn-rwx) + size: 10Gi + mountPath: /tmp/dograh-shared + +# ----------------------------------------------------------------------------- +# Web tier (FastAPI + WebSocket signaling) +# ----------------------------------------------------------------------------- +web: + replicaCount: 2 + port: 8000 + + # Long-lived signaling WebSockets keep per-connection state in process + # memory (api/routes/webrtc_signaling.py). A naive pod restart drops every + # in-flight call. The two settings below give the gateway time to stop + # routing new connections to a terminating pod and give in-flight calls + # time to finish. + terminationGracePeriodSeconds: 600 + # preStop sleep: long enough for the load balancer to observe the pod going + # NotReady and stop sending new connections. 15s is conservative for most + # controllers (gateway/nginx/ALB). + preStopSleepSeconds: 15 + + resources: + # These are conservative starting numbers. Tune to your workload — + # WebRTC signaling is mostly idle but bursty during call setup. + requests: + cpu: 200m + memory: 512Mi + limits: + cpu: "2" + memory: 2Gi + + # Distinct probes so the pod can fail readiness during drain without being + # killed for liveness. liveness has a longer threshold (process is alive) + # while readiness flips quickly (stop receiving new connections). + livenessProbe: + httpGet: + path: /api/v1/health + port: 8000 + initialDelaySeconds: 30 + periodSeconds: 30 + timeoutSeconds: 5 + failureThreshold: 6 + readinessProbe: + httpGet: + path: /api/v1/health + port: 8000 + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 3 + failureThreshold: 2 + + service: + type: ClusterIP + port: 8000 + annotations: {} + + pdb: + enabled: true + minAvailable: 1 + + podAnnotations: {} + nodeSelector: {} + tolerations: [] + # Recommend spreading web pods across zones / nodes. + topologySpreadConstraints: [] + affinity: {} + +# ----------------------------------------------------------------------------- +# ARQ background workers +# ----------------------------------------------------------------------------- +workers: + replicaCount: 1 + + resources: + requests: + cpu: 100m + memory: 256Mi + limits: + cpu: "1" + memory: 1Gi + + # exec probe — workers have no HTTP endpoint. + livenessProbe: + exec: + command: ["sh", "-c", "pgrep -f 'arq api.tasks.arq.WorkerSettings' > /dev/null"] + initialDelaySeconds: 30 + periodSeconds: 30 + timeoutSeconds: 5 + failureThreshold: 3 + + podAnnotations: {} + nodeSelector: {} + tolerations: [] + affinity: {} + +# ----------------------------------------------------------------------------- +# ARI manager — TELEPHONY SINGLETON +# +# Maintains an outbound WebSocket to Asterisk and is the canonical receiver of +# ARI events. Running >1 replica produces duplicate event handling. The chart +# hard-codes replicas:1 and strategy:Recreate; there is NO replica knob here +# on purpose. Add proper leader election before relaxing this. +# ----------------------------------------------------------------------------- +ariManager: + enabled: true + + resources: + requests: + cpu: 100m + memory: 256Mi + limits: + cpu: 500m + memory: 512Mi + + livenessProbe: + exec: + command: ["sh", "-c", "pgrep -f api.services.telephony.ari_manager > /dev/null"] + initialDelaySeconds: 30 + periodSeconds: 30 + timeoutSeconds: 5 + failureThreshold: 3 + + podAnnotations: {} + nodeSelector: {} + tolerations: [] + affinity: {} + +# ----------------------------------------------------------------------------- +# Campaign orchestrator — CAMPAIGN SINGLETON +# +# Uses in-memory deduplication locks (api/services/campaign/campaign_orchestrator.py +# `_processing_locks`). Running >1 replica would silently break scheduling. +# Same singleton rules as ariManager: no replica knob, Recreate strategy. +# ----------------------------------------------------------------------------- +campaignOrchestrator: + enabled: true + + resources: + requests: + cpu: 100m + memory: 256Mi + limits: + cpu: 500m + memory: 512Mi + + livenessProbe: + exec: + command: ["sh", "-c", "pgrep -f api.services.campaign.campaign_orchestrator > /dev/null"] + initialDelaySeconds: 30 + periodSeconds: 30 + timeoutSeconds: 5 + failureThreshold: 3 + + podAnnotations: {} + nodeSelector: {} + tolerations: [] + affinity: {} + +# ----------------------------------------------------------------------------- +# Next.js UI +# ----------------------------------------------------------------------------- +ui: + enabled: true + replicaCount: 2 + + image: + registry: docker.io + repository: dograhai/dograh-ui + tag: latest + pullPolicy: IfNotPresent + + port: 3010 + + # Server-side (SSR) URL. Defaults to the in-cluster web Service. + backendUrl: "" # auto-set in template when empty + + resources: + requests: + cpu: 100m + memory: 256Mi + limits: + cpu: 500m + memory: 512Mi + + livenessProbe: + httpGet: + path: / + port: 3010 + initialDelaySeconds: 30 + periodSeconds: 30 + timeoutSeconds: 5 + failureThreshold: 3 + readinessProbe: + httpGet: + path: / + port: 3010 + initialDelaySeconds: 5 + periodSeconds: 10 + timeoutSeconds: 3 + failureThreshold: 2 + + service: + type: ClusterIP + port: 3010 + + pdb: + enabled: true + minAvailable: 1 + + podAnnotations: {} + nodeSelector: {} + tolerations: [] + affinity: {} + +# ----------------------------------------------------------------------------- +# coturn — TURN media relay +# ----------------------------------------------------------------------------- +coturn: + enabled: true + + image: + registry: docker.io + repository: coturn/coturn + tag: "4.8.0" + pullPolicy: IfNotPresent + + # External IP advertised by coturn for NAT traversal. This is the + # LoadBalancer IP of the coturn Service. There is a chicken-and-egg here: + # the LB IP may not be known until after install. See NOTES.txt for the + # supported workflow (install with placeholder, kubectl get svc, helm + # upgrade --set coturn.externalIp=). + externalIp: "" + + realm: dograh.com + + # Coturn uses TURN REST API authentication (HMAC-SHA1). The secret here + # MUST match secrets.turnSecret — the chart will warn at install time if + # they diverge. + staticAuthSecretFromSecretsKey: turnSecret + + # Relay port range. AWS NLB has a default quota of 50 listeners per LB, + # so the default 49 ports (49152-49200) sits just inside the limit. + # Increasing this requires either a higher NLB listener quota or + # additional TURN deployments. + relayPortRange: + min: 49152 + max: 49200 + + # Standard TURN ports. + ports: + plain: 3478 + tls: 5349 + + # TLS for turns:// — NOT WIRED IN v1. The original docker-compose exposes + # 5349 but does not configure cert paths. v1 scopes to plain TURN over + # UDP/TCP. See README.md "Open TODOs". + tls: + enabled: false + + service: + type: LoadBalancer + annotations: {} + # externalTrafficPolicy: Local preserves the client IP, which TURN auth + # benefits from. Some LBs need this set to "Cluster" to be reachable. + externalTrafficPolicy: Local + + resources: + requests: + cpu: 200m + memory: 256Mi + limits: + cpu: "2" + memory: 1Gi + + podAnnotations: {} + nodeSelector: {} + tolerations: [] + affinity: {} + +# ----------------------------------------------------------------------------- +# Migration Job +# ----------------------------------------------------------------------------- +migrate: + # Run alembic upgrade head as a pre-install / pre-upgrade Helm hook. + enabled: true + + # Hard cap on how long a migration may run. A failed/exceeded migration + # rolls back the install/upgrade because backoffLimit is 0. + activeDeadlineSeconds: 600 + + resources: + requests: + cpu: 100m + memory: 256Mi + limits: + cpu: 500m + memory: 512Mi + +# ----------------------------------------------------------------------------- +# Horizontal Pod Autoscaling — web tier only. +# +# WARNING: CPU/memory is a poor signal for WebRTC signaling workloads. +# WebSockets are long-lived, low-CPU, and steady-memory; CPU will look flat +# while you saturate per-pod connection limits. Replace this with a custom +# metric (active WS connections, active calls) once one is exposed. +# ----------------------------------------------------------------------------- +autoscaling: + web: + enabled: false + minReplicas: 2 + maxReplicas: 10 + targetCPUUtilizationPercentage: 70 + targetMemoryUtilizationPercentage: 80 + +# ----------------------------------------------------------------------------- +# Bitnami subcharts. Each is gated by its own `enabled` flag set by mode +# resolution in templates/_helpers.tpl. Override the subchart's own values +# under these keys. +# ----------------------------------------------------------------------------- +postgresql: + # enabled is set automatically based on database.mode in _helpers.tpl; + # this key is read by the subchart's `condition`. + enabled: true + auth: + username: dograh + password: "" # auto-generated if empty + database: dograh + primary: + persistence: + enabled: true + size: 8Gi + +redisInternal: + # Bitnami Redis subchart values, aliased to redisInternal to avoid + # colliding with `redis.mode` above. `redisInternal.enabled` is the gating + # flag for whether the subchart deploys. + enabled: true + auth: + enabled: true + password: "" # auto-generated if empty + master: + persistence: + enabled: true + size: 8Gi + replica: + replicaCount: 0 # standalone primary by default + +minio: + enabled: true + auth: + rootUser: minioadmin + rootPassword: "" # auto-generated if empty + defaultBuckets: "voice-audio" + persistence: + enabled: true + size: 20Gi diff --git a/scripts/run_ari_manager.sh b/scripts/run_ari_manager.sh new file mode 100755 index 0000000..95b459a --- /dev/null +++ b/scripts/run_ari_manager.sh @@ -0,0 +1,12 @@ +#!/usr/bin/env bash +set -euo pipefail + +BASE_DIR="$(cd "$(dirname "$(dirname "${BASH_SOURCE[0]}")")" && pwd)" +ENV_FILE="$BASE_DIR/api/.env" + +if [[ -f "$ENV_FILE" ]]; then + set -a && . "$ENV_FILE" && set +a +fi + +cd "$BASE_DIR" +exec python -m api.services.telephony.ari_manager diff --git a/scripts/run_arq_worker.sh b/scripts/run_arq_worker.sh new file mode 100755 index 0000000..abe0e2b --- /dev/null +++ b/scripts/run_arq_worker.sh @@ -0,0 +1,12 @@ +#!/usr/bin/env bash +set -euo pipefail + +BASE_DIR="$(cd "$(dirname "$(dirname "${BASH_SOURCE[0]}")")" && pwd)" +ENV_FILE="$BASE_DIR/api/.env" + +if [[ -f "$ENV_FILE" ]]; then + set -a && . "$ENV_FILE" && set +a +fi + +cd "$BASE_DIR" +exec python -m arq api.tasks.arq.WorkerSettings --custom-log-dict api.tasks.arq.LOG_CONFIG diff --git a/scripts/run_campaign_orchestrator.sh b/scripts/run_campaign_orchestrator.sh new file mode 100755 index 0000000..a8ed3b3 --- /dev/null +++ b/scripts/run_campaign_orchestrator.sh @@ -0,0 +1,12 @@ +#!/usr/bin/env bash +set -euo pipefail + +BASE_DIR="$(cd "$(dirname "$(dirname "${BASH_SOURCE[0]}")")" && pwd)" +ENV_FILE="$BASE_DIR/api/.env" + +if [[ -f "$ENV_FILE" ]]; then + set -a && . "$ENV_FILE" && set +a +fi + +cd "$BASE_DIR" +exec python -m api.services.campaign.campaign_orchestrator diff --git a/scripts/run_migrate.sh b/scripts/run_migrate.sh new file mode 100755 index 0000000..904b22f --- /dev/null +++ b/scripts/run_migrate.sh @@ -0,0 +1,12 @@ +#!/usr/bin/env bash +set -euo pipefail + +BASE_DIR="$(cd "$(dirname "$(dirname "${BASH_SOURCE[0]}")")" && pwd)" +ENV_FILE="$BASE_DIR/api/.env" + +if [[ -f "$ENV_FILE" ]]; then + set -a && . "$ENV_FILE" && set +a +fi + +cd "$BASE_DIR" +exec alembic -c "$BASE_DIR/api/alembic.ini" upgrade head diff --git a/scripts/run_web.sh b/scripts/run_web.sh new file mode 100755 index 0000000..913eb73 --- /dev/null +++ b/scripts/run_web.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash +set -euo pipefail + +BASE_DIR="$(cd "$(dirname "$(dirname "${BASH_SOURCE[0]}")")" && pwd)" +ENV_FILE="$BASE_DIR/api/.env" + +if [[ -f "$ENV_FILE" ]]; then + set -a && . "$ENV_FILE" && set +a +fi + +PORT="${WEB_PORT:-8000}" + +cd "$BASE_DIR" +exec uvicorn api.app:app --host 0.0.0.0 --port "$PORT" --workers 1