Merge branch 'main' into helm-deployment

2026-07-01 08:59:46 +02:00 · 2026-06-30 18:14:32 +05:30 · 2026-06-30 18:14:32 +05:30 · 89d1e5ee89
commit 89d1e5ee89
parent 0d59ae776c a616b7ff98
523 changed files with 37767 additions and 11930 deletions
--- a/.devcontainer/Dockerfile
+++ b/.devcontainer/Dockerfile
@ -0,0 +1,102 @@
+# =============================================================================
+# Stage 1: venv-builder
+# Minimal image whose only job is to populate the venv. Uses the same Python
+# source as the runtime stage (deadsnakes) so the symlinks inside the venv
+# (e.g. venv/bin/python -> /usr/bin/python3.13) stay valid after COPY --from.
+# Everything in this stage except the venv itself is discarded.
+# =============================================================================
+FROM ubuntu:24.04 AS venv-builder
+
+RUN apt-get update \
+    && export DEBIAN_FRONTEND=noninteractive \
+    && apt-get install -y --no-install-recommends \
+        build-essential \
+        curl \
+        ca-certificates \
+        git \
+        libpq-dev \
+        pkg-config \
+        software-properties-common \
+    && add-apt-repository -y ppa:deadsnakes/ppa \
+    && apt-get install -y --no-install-recommends \
+        python3.13 \
+        python3.13-venv \
+        python3.13-dev \
+    && rm -rf /var/lib/apt/lists/*
+
+COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /usr/local/bin/
+
+# Build the venv at the path it will live at in the final image, so shebangs
+# and console-scripts inside the venv reference the correct runtime location
+# once the seed step rsyncs them into the named volume.
+ENV VIRTUAL_ENV=/workspaces/dograh/venv \
+    PATH=/workspaces/dograh/venv/bin:$PATH
+RUN mkdir -p /workspaces/dograh && python3.13 -m venv "$VIRTUAL_ENV"
+
+# Layer 1: API deps. Cache invalidates only when these two files change.
+RUN --mount=type=bind,source=api/requirements.txt,target=/tmp/req.txt \
+    --mount=type=bind,source=api/requirements.dev.txt,target=/tmp/req.dev.txt \
+    --mount=type=cache,target=/root/.cache/uv \
+    uv pip install -r /tmp/req.txt -r /tmp/req.dev.txt
+
+# Layer 2: pipecat deps. Cache invalidates when pipecat source changes.
+# After installing pipecat, two hardening tweaks (mirrored from api/Dockerfile):
+#   1. Swap opencv-python (pulled by pipecat[webrtc]) for opencv-python-headless.
+#      The non-headless build links against X11/Qt (libxcb*); without those
+#      shared libs in the image, `import cv2` fails at runtime.
+#   2. Pre-download NLTK's punkt_tab tokenizer so pipecat's text processing
+#      doesn't hit the network on first agent run. NLTK auto-finds it under
+#      sys.prefix/nltk_data, so it travels with the venv on COPY/rsync.
+RUN --mount=type=bind,source=pipecat,target=/tmp/pipecat,rw \
+    --mount=type=cache,target=/root/.cache/uv \
+    uv pip install '/tmp/pipecat[cartesia,deepgram,openai,elevenlabs,groq,google,azure,sarvam,soundfile,silero,webrtc,speechmatics,openrouter,camb,mcp,inworld,smallest]' \
+ && uv pip install --group /tmp/pipecat/pyproject.toml:dev \
+ && uv pip uninstall opencv-python \
+ && uv pip install opencv-python-headless \
+ && python -c "import nltk; nltk.download('punkt_tab', download_dir='/workspaces/dograh/venv/nltk_data', quiet=True)"
+
+
+# =============================================================================
+# Stage 2: runtime devcontainer image
+# Inherits the devcontainer base (vscode user, sudo, etc.) and brings only the
+# populated venv across from the builder stage.
+# =============================================================================
+FROM mcr.microsoft.com/devcontainers/base:ubuntu-24.04
+
+RUN apt-get update \
+    && export DEBIAN_FRONTEND=noninteractive \
+    && apt-get install -y --no-install-recommends \
+        build-essential \
+        curl \
+        ffmpeg \
+        git \
+        jq \
+        libpq-dev \
+        pkg-config \
+        postgresql-client \
+        procps \
+        redis-tools \
+        rsync \
+        software-properties-common \
+    && add-apt-repository -y ppa:deadsnakes/ppa \
+    && apt-get install -y --no-install-recommends \
+        python3.13 \
+        python3.13-venv \
+        python3.13-dev \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+
+# uv is still needed at runtime so post-create.sh can do the editable
+# pipecat install (and any ad-hoc `uv pip install` users might run).
+COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /usr/local/bin/
+
+# Bring the populated venv across. At runtime, the named volume in
+# docker-compose.yml shadows /workspaces/dograh/venv; post-create.sh
+# rsyncs from /opt/venv-template into the (initially empty) volume,
+# comparing build-stamps so an image rebuild that changed deps re-seeds.
+COPY --from=venv-builder --chown=vscode:vscode /workspaces/dograh/venv /opt/venv-template
+RUN date -u +%s > /opt/venv-template/.build-stamp \
+ && chown vscode:vscode /opt/venv-template/.build-stamp
+
+ENV VIRTUAL_ENV=/workspaces/dograh/venv \
+    PATH=/workspaces/dograh/venv/bin:$PATH
--- a/.devcontainer/devcontainer-lock.json
+++ b/.devcontainer/devcontainer-lock.json
@ -0,0 +1,9 @@
+{
+  "features": {
+    "ghcr.io/devcontainers/features/node:1": {
+      "version": "1.7.1",
+      "resolved": "ghcr.io/devcontainers/features/node@sha256:8c0de46939b61958041700ee89e3493f3b2e4131a06dc46b4d9423427d06e5f6",
+      "integrity": "sha256:8c0de46939b61958041700ee89e3493f3b2e4131a06dc46b4d9423427d06e5f6"
+    }
+  }
+}
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@ -0,0 +1,70 @@
+{
+  "name": "Dograh",
+  "dockerComposeFile": [
+    "../docker-compose-local.yaml",
+    "docker-compose.yml"
+  ],
+  "service": "workspace",
+  "runServices": [
+    "workspace",
+    "postgres",
+    "redis",
+    "minio"
+  ],
+  "workspaceFolder": "/workspaces/dograh",
+  "shutdownAction": "stopCompose",
+  "overrideCommand": false,
+  "remoteUser": "vscode",
+  "features": {
+    "ghcr.io/devcontainers/features/node:1": {
+      "version": "24"
+    }
+  },
+  "initializeCommand": "git submodule update --init --recursive",
+  "postCreateCommand": "bash .devcontainer/scripts/post-create.sh",
+  "postStartCommand": "bash .devcontainer/scripts/post-start.sh",
+  "forwardPorts": [
+    5432,
+    6379,
+    9000,
+    9001
+  ],
+  "portsAttributes": {
+    "3000": {
+      "label": "Dograh UI",
+      "onAutoForward": "ignore"
+    },
+    "8000": {
+      "label": "Dograh API",
+      "onAutoForward": "ignore"
+    },
+    "5432": {
+      "label": "Postgres"
+    },
+    "6379": {
+      "label": "Redis"
+    },
+    "9000": {
+      "label": "MinIO API"
+    },
+    "9001": {
+      "label": "MinIO Console"
+    }
+  },
+  "customizations": {
+    "vscode": {
+      "settings": {
+        "python.defaultInterpreterPath": "/workspaces/dograh/venv/bin/python",
+        "terminal.integrated.defaultProfile.linux": "bash"
+      },
+      "extensions": [
+        "ms-python.python",
+        "ms-python.vscode-pylance",
+        "ms-python.debugpy",
+        "ms-azuretools.vscode-docker",
+        "dbaeumer.vscode-eslint",
+        "esbenp.prettier-vscode"
+      ]
+    }
+  }
+}
--- a/.devcontainer/docker-compose.yml
+++ b/.devcontainer/docker-compose.yml
@ -0,0 +1,38 @@
+services:
+  workspace:
+    build:
+      context: .
+      dockerfile: .devcontainer/Dockerfile
+    command: sleep infinity
+    depends_on:
+      postgres:
+        condition: service_healthy
+      redis:
+        condition: service_healthy
+      minio:
+        condition: service_healthy
+    environment:
+      PIP_DISABLE_PIP_VERSION_CHECK: "1"
+      PYTHONUNBUFFERED: "1"
+    extra_hosts:
+      - "host.docker.internal:host-gateway"
+    init: true
+    security_opt:
+      - seccomp=unconfined
+      - apparmor=unconfined
+    cap_add:
+      - SYS_ADMIN
+    networks:
+      - app-network
+    volumes:
+      - .:/workspaces/dograh:cached
+      - dograh-venv:/workspaces/dograh/venv
+      - dograh-ui-node_modules:/workspaces/dograh/ui/node_modules
+      - dograh-ts-validator-node_modules:/workspaces/dograh/api/mcp_server/ts_validator/node_modules
+    ports:
+      - "127.0.0.1:3000:3000"
+      - "127.0.0.1:8000:8000"
+volumes:
+  dograh-venv:
+  dograh-ui-node_modules:
+  dograh-ts-validator-node_modules:
--- a/.devcontainer/scripts/post-create.sh
+++ b/.devcontainer/scripts/post-create.sh
@ -0,0 +1,127 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+ROOT_DIR="/workspaces/dograh"
+UI_ENV_EXAMPLE="$ROOT_DIR/ui/.env.example"
+UI_ENV_FILE="$ROOT_DIR/ui/.env"
+VENV_PATH="$ROOT_DIR/venv"
+VENV_TEMPLATE="/opt/venv-template"
+
+TOTAL_STEPS=5
+STEP=0
+STEP_START=$SECONDS
+SCRIPT_START=$SECONDS
+
+step() {
+  STEP=$((STEP + 1))
+  STEP_START=$SECONDS
+  printf '\n==> [%d/%d] %s\n' "$STEP" "$TOTAL_STEPS" "$1"
+}
+
+step_done() {
+  printf '    done in %ds\n' "$((SECONDS - STEP_START))"
+}
+
+fail() {
+  printf '\n!! FAILED at step %d/%d (%s) after %ds\n' \
+    "$STEP" "$TOTAL_STEPS" "${1:-unknown}" "$((SECONDS - SCRIPT_START))" >&2
+  exit 1
+}
+trap 'fail "exit $?"' ERR
+
+copy_if_missing() {
+  local src=$1
+  local dst=$2
+  if [[ -f "$dst" ]]; then
+    echo "Keeping existing $dst"
+    return
+  fi
+  cp "$src" "$dst"
+  echo "Created $dst from $src"
+}
+
+# Copy an api/.env*.example template to its target, rewriting infra hostnames
+# from `localhost` to the docker service names defined in
+# docker-compose-local.yaml. MINIO_PUBLIC_ENDPOINT stays on localhost — that
+# URL ends up in UI responses and is loaded by the host browser via the
+# forwarded port. No-op if the target already exists.
+copy_env_with_docker_hostnames() {
+  local src=$1
+  local dst=$2
+  if [[ -f "$dst" ]]; then
+    echo "Keeping existing $dst"
+    return
+  fi
+  cp "$src" "$dst"
+  sed -i \
+    -e 's|@localhost:5432|@postgres:5432|g' \
+    -e 's|@localhost:6379|@redis:6379|g' \
+    -e 's|^MINIO_ENDPOINT=localhost:9000|MINIO_ENDPOINT=minio:9000|' \
+    "$dst"
+  echo "Created $dst from $src (rewrote service hostnames for docker network)"
+}
+
+# Seed the venv named volume from the image-baked template, but only when
+# the template's build-stamp differs from what's currently in the volume
+# (first start, or any rebuild that changed requirements.txt / pipecat).
+seed_venv() {
+  local image_stamp venv_stamp
+  image_stamp=$(cat "$VENV_TEMPLATE/.build-stamp" 2>/dev/null || echo missing)
+  venv_stamp=$(cat "$VENV_PATH/.build-stamp" 2>/dev/null || echo none)
+
+  if [[ "$image_stamp" == "$venv_stamp" ]]; then
+    echo "Venv already in sync with image template (stamp=$venv_stamp)"
+    return
+  fi
+
+  echo "Re-seeding venv: image=$image_stamp, volume=$venv_stamp"
+  rsync -a --delete "$VENV_TEMPLATE/" "$VENV_PATH/"
+}
+
+cd "$ROOT_DIR"
+
+step "Fixing ownership of named volume mountpoints"
+# Named volumes are created owned by root; postCreateCommand runs as the
+# remote user. Chown the mountpoint roots so the steps below can write.
+sudo chown "$(id -u):$(id -g)" \
+  "$VENV_PATH" \
+  "$ROOT_DIR/ui/node_modules" \
+  "$ROOT_DIR/api/mcp_server/ts_validator/node_modules"
+step_done
+
+step "Seeding venv from image template"
+seed_venv
+step_done
+
+step "Copying example env files into place"
+copy_env_with_docker_hostnames "$ROOT_DIR/api/.env.example"      "$ROOT_DIR/api/.env"
+copy_env_with_docker_hostnames "$ROOT_DIR/api/.env.test.example" "$ROOT_DIR/api/.env.test"
+copy_if_missing "$UI_ENV_EXAMPLE" "$UI_ENV_FILE"
+step_done
+
+step "Switching pipecat to editable install from workspace"
+# pipecat's deps are already in the seeded venv as a frozen snapshot from
+# the build context. Re-register editable from the bind-mounted workspace
+# so source edits take effect. --no-deps skips re-resolving transitive
+# dependencies (already present from the seeded image template).
+uv pip install -e "$ROOT_DIR/pipecat" --no-deps
+step_done
+
+step "Installing npm dependencies (ui + ts_validator in parallel)"
+npm ci --prefix ui &
+ui_pid=$!
+npm ci --prefix api/mcp_server/ts_validator &
+ts_pid=$!
+wait "$ui_pid" || fail "npm ci ui"
+wait "$ts_pid" || fail "npm ci ts_validator"
+step_done
+
+# Optional personal hook: gitignored script for per-developer tools (e.g.
+# claude, codex, etc.). Runs only if present; safe to omit.
+LOCAL_HOOK="$ROOT_DIR/.devcontainer/install.local.sh"
+if [[ -f "$LOCAL_HOOK" ]]; then
+  printf '\n==> Running local install hook (%s)\n' "$LOCAL_HOOK"
+  bash "$LOCAL_HOOK"
+fi
+
+printf '\nDevcontainer bootstrap complete in %ds.\n' "$((SECONDS - SCRIPT_START))"
--- a/.devcontainer/scripts/post-start.sh
+++ b/.devcontainer/scripts/post-start.sh
@ -0,0 +1,18 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+# Intentionally no `http://localhost:PORT` URLs below — VS Code's terminal
+# URL detector adds any printed URL to its auto-forwarded-ports list and
+# then polls it, which produces ECONNREFUSED log spam every ~20s for ports
+# that aren't bound yet. The Ports panel auto-detects bound ports anyway.
+cat <<'EOF'
+Dograh devcontainer ready.
+
+Start the backend:
+  bash scripts/start_services_dev.sh
+
+Start the UI in another terminal:
+  cd ui && npm run dev -- --hostname 0.0.0.0
+
+URLs and other workflow notes: docs/contribution/setup.mdx
+EOF
--- a/.dockerignore
+++ b/.dockerignore
@ -1,4 +1,17 @@
 api/.env
+.git
+.github
+.claude
+**/.claude
+**/.next
+**/__pycache__
+**/*.pyc
+**/node_modules
+.mypy_cache
+.pytest_cache
+.ruff_cache
+.venv
 evals/
 api/mcp_server/ts_validator/node_modules/
 sdk/
+venv
--- a/.github/release.yml
+++ b/.github/release.yml
@ -0,0 +1,25 @@
+changelog:
+  exclude:
+    labels:
+      - chore
+      - "autorelease: pending"
+      - "autorelease: tagged"
+  categories:
+    - title: Features
+      labels:
+        - feat
+    - title: Bug Fixes
+      labels:
+        - fix
+    - title: Documentation
+      labels:
+        - docs
+    - title: Performance Improvements
+      labels:
+        - perf
+    - title: Code Refactoring
+      labels:
+        - refactor
+    - title: Other Changes
+      labels:
+        - "*"
--- a/.github/workflows/api-tests.yml
+++ b/.github/workflows/api-tests.yml
@ -65,21 +65,21 @@ jobs:
        with:
          submodules: recursive

-      - name: Set up Python 3.12
+      - name: Set up Python 3.13
        uses: actions/setup-python@v5
        with:
-          python-version: "3.12"
-          cache: pip
-          cache-dependency-path: |
-            api/requirements.txt
-            api/requirements.dev.txt
-            pipecat/pyproject.toml
+          python-version: "3.13"

      - name: Set up Node 22 (test_ts_bridge.py shells out to node)
        uses: actions/setup-node@v4
        with:
          node-version: "22"

+      - name: Create Python virtual environment
+        run: |
+          python -m venv .venv
+          echo "${{ github.workspace }}/.venv/bin" >> "$GITHUB_PATH"
+
      - name: Install api and pipecat dependencies
        run: ./scripts/setup_requirements.sh --dev

--- a/.github/workflows/docker-image.yml
+++ b/.github/workflows/docker-image.yml
@ -3,24 +3,79 @@ name: Build and Push Docker Images
 on:
  release:
    types: [published]
+  workflow_dispatch:
+    inputs:
+      image_tag:
+        description: "Tag to publish for a manual test run. Defaults to test-<short-sha>."
+        required: false
+        type: string
+      push_latest:
+        description: "Also update :latest. Leave false for test runs."
+        required: false
+        default: false
+        type: boolean
+
+permissions:
+  contents: read
+  packages: write

-# Ensure only one workflow run per branch at a time; cancel any in-progress runs on new push
 concurrency:
  group: ${{ github.workflow }}-${{ github.ref }}
  cancel-in-progress: true

-jobs:
-  build:
-    runs-on: ubuntu-latest
-    env:
-      COMMIT_SHA: ${{ github.sha }}
+env:
+  REGISTRY_GHCR: ghcr.io

+jobs:
+  prepare:
+    runs-on: ubuntu-latest
+    outputs:
+      short_sha: ${{ steps.tags.outputs.short_sha }}
+      version: ${{ steps.tags.outputs.version }}
+      push_latest: ${{ steps.tags.outputs.push_latest }}
+    steps:
+      - name: Compute tags
+        id: tags
+        run: |
+          SHORT_SHA="${GITHUB_SHA::8}"
+
+          if [ "${{ github.event_name }}" = "release" ]; then
+            VERSION="${{ github.event.release.tag_name }}"
+            VERSION="${VERSION#dograh-}"
+            VERSION="${VERSION#v}"
+            PUSH_LATEST="true"
+          else
+            VERSION="${{ inputs.image_tag }}"
+            if [ -z "$VERSION" ]; then
+              VERSION="test-${SHORT_SHA}"
+            fi
+            PUSH_LATEST="${{ inputs.push_latest }}"
+          fi
+
+          echo "short_sha=${SHORT_SHA}" >> "$GITHUB_OUTPUT"
+          echo "version=${VERSION}" >> "$GITHUB_OUTPUT"
+          echo "push_latest=${PUSH_LATEST}" >> "$GITHUB_OUTPUT"
+
+  build:
+    needs: prepare
    strategy:
+      fail-fast: false
      matrix:
        service:
-          - "dograh-api|api/Dockerfile|."
-          - "dograh-ui|ui/Dockerfile|."
-
+          - name: dograh-api
+            dockerfile: api/Dockerfile
+            context: .
+          - name: dograh-ui
+            dockerfile: ui/Dockerfile
+            context: .
+        platform:
+          - name: linux/amd64
+            runner: ubuntu-24.04
+            short: amd64
+          - name: linux/arm64
+            runner: ubuntu-24.04-arm
+            short: arm64
+    runs-on: ${{ matrix.platform.runner }}
    steps:
      - name: Free Disk Space
        uses: jlumbroso/free-disk-space@main
@ -38,90 +93,153 @@ jobs:
        with:
          submodules: true

-      - name: Set up QEMU
-        uses: docker/setup-qemu-action@v3
-
      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
+        uses: docker/setup-buildx-action@v4

      - name: Log in to DockerHub
-        uses: docker/login-action@v3
+        uses: docker/login-action@v4
        with:
          username: ${{ secrets.DOCKERHUB_USERNAME }}
          password: ${{ secrets.DOCKERHUB_TOKEN }}

      - name: Log in to GHCR
-        uses: docker/login-action@v3
+        uses: docker/login-action@v4
        with:
-          registry: ghcr.io
+          registry: ${{ env.REGISTRY_GHCR }}
          username: ${{ secrets.GHCR_USERNAME }}
          password: ${{ secrets.GHCR_TOKEN }}

-      - name: Set build variables
-        id: build-vars
+      - name: Build and push by digest
+        id: build
+        uses: docker/build-push-action@v7
+        with:
+          context: ${{ matrix.service.context }}
+          file: ${{ matrix.service.dockerfile }}
+          platforms: ${{ matrix.platform.name }}
+          outputs: 'type=image,"name=${{ secrets.DOCKERHUB_USERNAME }}/${{ matrix.service.name }},${{ env.REGISTRY_GHCR }}/${{ secrets.GHCR_USERNAME }}/${{ matrix.service.name }}",push-by-digest=true,name-canonical=true,push=true'
+          cache-from: type=gha,scope=${{ matrix.service.name }}-${{ matrix.platform.short }}
+          cache-to: type=gha,mode=max,scope=${{ matrix.service.name }}-${{ matrix.platform.short }}
+
+      - name: Export digest
        run: |
-          SERVICE="${{ matrix.service }}"
-          IMAGE_NAME=$(echo "$SERVICE" | cut -d '|' -f1)
-          SHORT_SHA=${COMMIT_SHA::8}
+          mkdir -p "/tmp/digests/${{ matrix.service.name }}"
+          echo "${{ steps.build.outputs.digest }}" | sed 's/^sha256://' > "/tmp/digests/${{ matrix.service.name }}/${{ matrix.platform.short }}"

-          # Get version from release tag (removes 'dograh-' and 'v' prefixes if present)
-          VERSION="${{ github.event.release.tag_name }}"
-          VERSION="${VERSION#dograh-}"
-          VERSION="${VERSION#v}"
+      - name: Upload digest
+        uses: actions/upload-artifact@v4
+        with:
+          name: digest-${{ matrix.service.name }}-${{ matrix.platform.short }}
+          path: /tmp/digests/${{ matrix.service.name }}/${{ matrix.platform.short }}
+          retention-days: 1

-          echo "image_name=${IMAGE_NAME}" >> $GITHUB_OUTPUT
-          echo "short_sha=${SHORT_SHA}" >> $GITHUB_OUTPUT
-          echo "version=${VERSION}" >> $GITHUB_OUTPUT
+  merge:
+    needs:
+      - prepare
+      - build
+    runs-on: ubuntu-latest
+    steps:
+      - name: Download API digests
+        uses: actions/download-artifact@v4
+        with:
+          pattern: digest-dograh-api-*
+          merge-multiple: true
+          path: /tmp/digests/dograh-api

-      - name: Build and Push ${{ matrix.service }}
-        id: docker-build
+      - name: Download UI digests
+        uses: actions/download-artifact@v4
+        with:
+          pattern: digest-dograh-ui-*
+          merge-multiple: true
+          path: /tmp/digests/dograh-ui
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v4
+
+      - name: Log in to DockerHub
+        uses: docker/login-action@v4
+        with:
+          username: ${{ secrets.DOCKERHUB_USERNAME }}
+          password: ${{ secrets.DOCKERHUB_TOKEN }}
+
+      - name: Log in to GHCR
+        uses: docker/login-action@v4
+        with:
+          registry: ${{ env.REGISTRY_GHCR }}
+          username: ${{ secrets.GHCR_USERNAME }}
+          password: ${{ secrets.GHCR_TOKEN }}
+
+      - name: Create manifest lists
+        env:
+          DH_NAMESPACE: ${{ secrets.DOCKERHUB_USERNAME }}
+          GH_NAMESPACE: ${{ env.REGISTRY_GHCR }}/${{ secrets.GHCR_USERNAME }}
+          VERSION: ${{ needs.prepare.outputs.version }}
+          SHORT_SHA: ${{ needs.prepare.outputs.short_sha }}
+          PUSH_LATEST: ${{ needs.prepare.outputs.push_latest }}
        run: |
-          SERVICE="${{ matrix.service }}"
-          IMAGE_NAME=$(echo "$SERVICE" | cut -d '|' -f1)
-          DOCKERFILE=$(echo "$SERVICE" | cut -d '|' -f2)
-          CONTEXT=$(echo "$SERVICE" | cut -d '|' -f3)
-          SHORT_SHA=${COMMIT_SHA::8}
-          VERSION="${{ steps.build-vars.outputs.version }}"
+          inspect_digests() {
+            service="$1"
+            digest_dir="/tmp/digests/$service"
+            dh_image="$DH_NAMESPACE/$service"
+            gh_image="$GH_NAMESPACE/$service"

-          echo "Building and pushing image: $IMAGE_NAME"
-          echo "Dockerfile: $DOCKERFILE"
-          echo "Context: $CONTEXT"
-          echo "Version: $VERSION"
+            for digest_file in "$digest_dir"/*; do
+              digest="$(cat "$digest_file")"
+              docker buildx imagetools inspect "$dh_image@sha256:$digest" >/dev/null
+              docker buildx imagetools inspect "$gh_image@sha256:$digest" >/dev/null
+            done
+          }

-          echo "image_name=${IMAGE_NAME}" >> $GITHUB_OUTPUT
-          echo "dockerhub_tag=${{ secrets.DOCKERHUB_USERNAME }}/${IMAGE_NAME}:${SHORT_SHA}" >> $GITHUB_OUTPUT
-          echo "ghcr_tag=ghcr.io/${{ secrets.GHCR_USERNAME }}/${IMAGE_NAME}:${SHORT_SHA}" >> $GITHUB_OUTPUT
-          echo "short_sha=${SHORT_SHA}" >> $GITHUB_OUTPUT
+          create_manifests() {
+            service="$1"
+            digest_dir="/tmp/digests/$service"
+            dh_image="$DH_NAMESPACE/$service"
+            gh_image="$GH_NAMESPACE/$service"

-          docker buildx build \
-            -f "$DOCKERFILE" \
-            --platform linux/amd64,linux/arm64 \
-            --tag ${{ secrets.DOCKERHUB_USERNAME }}/$IMAGE_NAME:$VERSION \
-            --tag ${{ secrets.DOCKERHUB_USERNAME }}/$IMAGE_NAME:$SHORT_SHA \
-            --tag ${{ secrets.DOCKERHUB_USERNAME }}/$IMAGE_NAME:latest \
-            --tag ghcr.io/${{ secrets.GHCR_USERNAME }}/$IMAGE_NAME:$VERSION \
-            --tag ghcr.io/${{ secrets.GHCR_USERNAME }}/$IMAGE_NAME:$SHORT_SHA \
-            --tag ghcr.io/${{ secrets.GHCR_USERNAME }}/$IMAGE_NAME:latest \
-            --push "$CONTEXT"
+            dh_refs=$(printf "${dh_image}@sha256:%s " $(cat "$digest_dir"/*))
+            gh_refs=$(printf "${gh_image}@sha256:%s " $(cat "$digest_dir"/*))

-      - name: Send Slack notification - Success
-        if: success()
+            dh_tags=(-t "$dh_image:$VERSION" -t "$dh_image:$SHORT_SHA")
+            gh_tags=(-t "$gh_image:$VERSION" -t "$gh_image:$SHORT_SHA")
+
+            if [ "$PUSH_LATEST" = "true" ]; then
+              dh_tags+=(-t "$dh_image:latest")
+              gh_tags+=(-t "$gh_image:latest")
+            fi
+
+            docker buildx imagetools create "${dh_tags[@]}" $dh_refs
+            docker buildx imagetools create "${gh_tags[@]}" $gh_refs
+          }
+
+          inspect_digests dograh-api
+          inspect_digests dograh-ui
+          create_manifests dograh-api
+          create_manifests dograh-ui
+
+  notify:
+    needs:
+      - prepare
+      - merge
+    if: always()
+    runs-on: ubuntu-latest
+    steps:
+      - name: Slack success
+        if: needs.merge.result == 'success'
        uses: slackapi/slack-github-action@v1.26.0
        env:
          SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }}
        with:
          payload: |
            {
-              "text": "✅ Docker Build Successful - ${{ steps.build-vars.outputs.image_name }} (${{ steps.build-vars.outputs.version }}) on ${{ github.ref_name }} by ${{ github.actor }}"
+              "text": "✅ Docker images built for ${{ needs.prepare.outputs.version }} on ${{ github.ref_name }} by ${{ github.actor }}"
            }

-      - name: Send Slack notification - Failure
-        if: failure()
+      - name: Slack failure
+        if: needs.merge.result != 'success'
        uses: slackapi/slack-github-action@v1.26.0
        env:
          SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }}
        with:
          payload: |
            {
-              "text": "❌ Docker Build Failed - ${{ steps.build-vars.outputs.image_name }} (${{ steps.build-vars.outputs.version }}) on ${{ github.ref_name }} by ${{ github.actor }} - <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|View Logs>"
+              "text": "❌ Docker build failed for ${{ needs.prepare.outputs.version }} on ${{ github.ref_name }} by ${{ github.actor }} - <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|View Logs>"
            }
--- a/.github/workflows/pr-conventional-labeler.yml
+++ b/.github/workflows/pr-conventional-labeler.yml
@ -0,0 +1,54 @@
+name: PR Conventional Labeler
+
+# Labels each PR with its conventional-commit type (feat, fix, docs, perf,
+# refactor, chore) derived from the PR title. These labels drive the changelog
+# categories in .github/release.yml when release-please generates notes.
+# chore is labeled but excluded from the changelog (see .github/release.yml),
+# preserving the previous "chore hidden" behavior.
+
+on:
+  pull_request_target:
+    types: [opened, edited, reopened, ready_for_review]
+
+permissions:
+  contents: read
+  pull-requests: write
+
+jobs:
+  label:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/github-script@v7
+        with:
+          script: |
+            // Conventional-commit types we manage as labels.
+            const managedLabels = ['feat', 'fix', 'docs', 'perf', 'refactor', 'chore'];
+
+            const pr = context.payload.pull_request;
+            const title = pr.title || '';
+
+            // Matches: feat:, fix(scope):, perf!:, refactor(api)!: ...
+            const match = title.match(/^([a-zA-Z]+)(\([^)]*\))?!?:/);
+            const type = match ? match[1].toLowerCase() : null;
+            const target = managedLabels.includes(type) ? type : null;
+
+            const { owner, repo } = context.repo;
+            const issue_number = pr.number;
+            const current = pr.labels.map(l => l.name);
+
+            // Remove any managed label that no longer matches the title
+            // (e.g. PR retitled from feat: to fix:).
+            for (const label of current) {
+              if (managedLabels.includes(label) && label !== target) {
+                await github.rest.issues
+                  .removeLabel({ owner, repo, issue_number, name: label })
+                  .catch(() => {});
+              }
+            }
+
+            // Apply the matching label if not already present.
+            if (target && !current.includes(target)) {
+              await github.rest.issues.addLabels({
+                owner, repo, issue_number, labels: [target],
+              });
+            }
--- a/.github/workflows/pre-pr-drift-check.yml
+++ b/.github/workflows/pre-pr-drift-check.yml
@ -27,15 +27,10 @@ jobs:
        with:
          submodules: recursive

-      - name: Set up Python 3.12
+      - name: Set up Python 3.13
        uses: actions/setup-python@v5
        with:
-          python-version: "3.12"
-          cache: pip
-          cache-dependency-path: |
-            api/requirements.txt
-            api/requirements.dev.txt
-            pipecat/pyproject.toml
+          python-version: "3.13"

      - name: Set up Node 22
        uses: actions/setup-node@v4
@ -44,6 +39,11 @@ jobs:
          cache: npm
          cache-dependency-path: ui/package-lock.json

+      - name: Create Python virtual environment
+        run: |
+          python -m venv .venv
+          echo "${{ github.workspace }}/.venv/bin" >> "$GITHUB_PATH"
+
      - name: Install api and pipecat dependencies
        run: ./scripts/setup_requirements.sh --dev

--- a/.gitignore
+++ b/.gitignore
@ -4,6 +4,9 @@ __pycache__
 .env.prod
 .env.test

+# Conductor personal/per-machine overrides (settings.toml IS committed)
+.conductor/settings.local.toml
+
 # logs and run directory on production
 /logs/
 /run/
@ -11,6 +14,7 @@ infrastructure/
 prd/
 .vercel

+.devcontainer/install.local.sh
 venv/
 .venv/
 .playwright-mcp
@ -18,4 +22,8 @@ coturn/
 *.wav
 dograh_pcm_cache/
 node_modules/
-.vscode
+
+# Superpowers brainstorm mockups (local only)
+.superpowers/
+docs/superpowers/
+.gstack/
--- a/.python-version
+++ b/.python-version
@ -1 +1 @@
-3.13.7
+3.13.7
--- a/.release-please-manifest.json
+++ b/.release-please-manifest.json
@ -1,3 +1,3 @@
 {
-    ".": "1.31.0"
+    ".": "1.39.0"
 }
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@ -0,0 +1,129 @@
+// Debug configurations for Dograh contributors.
+//
+// Prerequisites:
+//   - Python interpreter selected in VS Code (devcontainer sets this
+//     automatically; otherwise pick `./venv/bin/python` via the
+//     "Python: Select Interpreter" command).
+//   - api/.env exists (copy from api/.env.example
+//     is created automatically by the devcontainer post-create script).
+//   - api/.env.test exists for the test configurations (copy from
+//     api/.env.example and point at a throwaway database).
+//
+// All Python configs set justMyCode=false so the debugger steps into
+// library code (useful for tracing through pipecat/fastapi/etc.).
+{
+    "version": "0.2.0",
+    "configurations": [
+        {
+            "name": "API: Uvicorn (reload)",
+            "type": "debugpy",
+            "request": "launch",
+            "module": "uvicorn",
+            "args": [
+                "api.app:app",
+                "--reload",
+                "--host", "0.0.0.0"
+                // Port comes from UVICORN_PORT in api/.env (per-worktree);
+                // unset -> uvicorn's default 8000. See scripts/worktree-assign-port.sh.
+            ],
+            "cwd": "${workspaceFolder}",
+            "envFile": "${workspaceFolder}/api/.env",
+            "env": {
+                "PYTHONPATH": "${workspaceFolder}"
+            },
+            "justMyCode": false
+        },
+        {
+            "name": "API: Arq worker (watch)",
+            "type": "debugpy",
+            "request": "launch",
+            "module": "arq",
+            "args": [
+                "api.tasks.arq.WorkerSettings",
+                "--watch", "${workspaceFolder}/api",
+                "--custom-log-dict", "api.tasks.arq.LOG_CONFIG"
+            ],
+            "cwd": "${workspaceFolder}",
+            "envFile": "${workspaceFolder}/api/.env",
+            "env": {
+                "PYTHONPATH": "${workspaceFolder}"
+            },
+            "justMyCode": false
+        },
+        {
+            "name": "API: Campaign orchestrator",
+            "type": "debugpy",
+            "request": "launch",
+            "module": "api.services.campaign.campaign_orchestrator",
+            "cwd": "${workspaceFolder}",
+            "envFile": "${workspaceFolder}/api/.env",
+            "env": {
+                "PYTHONPATH": "${workspaceFolder}"
+            },
+            "justMyCode": false
+        },
+        {
+            "name": "API: ARI manager",
+            "type": "debugpy",
+            "request": "launch",
+            "module": "api.services.telephony.ari_manager",
+            "cwd": "${workspaceFolder}",
+            "envFile": "${workspaceFolder}/api/.env",
+            "env": {
+                "PYTHONPATH": "${workspaceFolder}"
+            },
+            "justMyCode": false
+        },
+        {
+            "name": "Tests: API (pytest, full suite)",
+            "type": "debugpy",
+            "request": "launch",
+            "module": "pytest",
+            "args": ["tests", "-xvs"],
+            "cwd": "${workspaceFolder}/api",
+            "envFile": "${workspaceFolder}/api/.env.test",
+            "env": {
+                "PYTHONPATH": "${workspaceFolder}"
+            },
+            "justMyCode": false
+        },
+        {
+            "name": "Tests: API (pytest, current file)",
+            "type": "debugpy",
+            "request": "launch",
+            "module": "pytest",
+            "args": ["${file}", "-xvs"],
+            "cwd": "${workspaceFolder}/api",
+            "envFile": "${workspaceFolder}/api/.env.test",
+            "env": {
+                "PYTHONPATH": "${workspaceFolder}"
+            },
+            "justMyCode": false
+        },
+        {
+            "name": "Tests: Pipecat (pytest, current file)",
+            "type": "debugpy",
+            "request": "launch",
+            "module": "pytest",
+            "args": ["${file}", "-xvs"],
+            "cwd": "${workspaceFolder}/pipecat",
+            "envFile": "${workspaceFolder}/api/.env",
+            "env": {
+                "PYTHONPATH": "${workspaceFolder}/pipecat/src"
+            },
+            "justMyCode": false
+        },
+        {
+            "name": "Python: Current file",
+            "type": "debugpy",
+            "request": "launch",
+            "program": "${file}",
+            "cwd": "${workspaceFolder}",
+            "envFile": "${workspaceFolder}/api/.env",
+            "env": {
+                "PYTHONPATH": "${workspaceFolder}"
+            },
+            "justMyCode": false
+        }
+    ]
+}
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@ -0,0 +1,9 @@
+{
+    "python.defaultInterpreterPath": "${workspaceFolder}/venv/bin/python",
+    "git.detectWorktrees": true,
+    "git.worktreeIncludeFiles": [
+        "api/.env",
+        "api/.env.test",
+        "ui/.env"
+    ]
+}
--- a/.vscode/tasks.json
+++ b/.vscode/tasks.json
@ -0,0 +1,53 @@
+{
+    // Tasks that auto-run when a worktree folder is opened (runOptions.runOn:
+    // folderOpen). First time, VS Code asks to "Allow Automatic Tasks in Folder"
+    // (or set task.allowAutomaticTasks: "on" in User settings to skip the prompt).
+    //   - "Assign worktree port"        : fast; sets UVICORN_PORT + UI BACKEND_URLs
+    //   - "Set up worktree environment" : heavy first time (submodule + venv +
+    //                                     deps), instant skip after — run-once via
+    //                                     venv/.worktree-setup-complete. Follow it:
+    //                                       tail -f logs/setup-worktree.log
+    "version": "2.0.0",
+    "tasks": [
+        {
+            "label": "Assign worktree port",
+            "type": "shell",
+            "command": "${workspaceFolder}/scripts/worktree-assign-port.sh",
+            "presentation": {
+                "reveal": "silent",
+                "panel": "shared",
+                "close": true
+            },
+            "runOptions": {
+                "runOn": "folderOpen"
+            },
+            "problemMatcher": []
+        },
+        {
+            "label": "Set up worktree environment",
+            "type": "shell",
+            "command": "${workspaceFolder}/scripts/setup-worktree.sh",
+            "args": ["--if-needed"],
+            "presentation": {
+                "reveal": "silent",
+                "panel": "dedicated"
+            },
+            "runOptions": {
+                "runOn": "folderOpen"
+            },
+            "problemMatcher": []
+        },
+        {
+            // Manual: force a full re-provision, ignoring the run-once sentinel.
+            // Run via: Tasks: Run Task -> "Re-setup worktree (force)".
+            "label": "Re-setup worktree (force)",
+            "type": "shell",
+            "command": "${workspaceFolder}/scripts/setup-worktree.sh",
+            "presentation": {
+                "reveal": "always",
+                "panel": "dedicated"
+            },
+            "problemMatcher": []
+        }
+    ]
+}
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -1,5 +1,186 @@
 # Changelog

+## 1.39.0 (2026-06-27)
+
+<!-- Release notes generated using configuration in .github/release.yml at main -->
+
+## What's Changed
+### Features
+* feat(scripts): free trusted HTTPS via sslip.io for public-IP remote i… by @a6kme in https://github.com/dograh-hq/dograh/pull/460
+### Bug Fixes
+* fix: reject misrouted smallwebrtc runs on the telephony websocket by @mvanhorn in https://github.com/dograh-hq/dograh/pull/468
+
+
+**Full Changelog**: https://github.com/dograh-hq/dograh/compare/dograh-v1.38.0...dograh-v1.39.0
+
+## 1.38.0 (2026-06-25)
+
+<!-- Release notes generated using configuration in .github/release.yml at main -->
+
+## What's Changed
+### Features
+* feat(scripts): generate REDIS_PASSWORD on setup, plumb through compose by @tecnomanu in https://github.com/dograh-hq/dograh/pull/458
+* feat(storage): support custom S3 endpoint, signature version, and addressing style by @skymoore in https://github.com/dograh-hq/dograh/pull/461
+* feat(twilio): add Answering Machine Detection (AMD) support via telephony config by @nuthalapativarun in https://github.com/dograh-hq/dograh/pull/443
+### Bug Fixes
+* fix: support Gemini JSON schema tools by @snvtac in https://github.com/dograh-hq/dograh/pull/463
+### Documentation
+* docs: update Tuner integration to use Dograh provider by @mohamedsalem-bot in https://github.com/dograh-hq/dograh/pull/457
+### Other Changes
+* style(docs): add custom green scrollbar by @Gurkirat-Singh-bit in https://github.com/dograh-hq/dograh/pull/434
+* Add Hostinger (managed-Traefik) deployment files by @a6kme in https://github.com/dograh-hq/dograh/pull/459
+
+## New Contributors
+* @Gurkirat-Singh-bit made their first contribution in https://github.com/dograh-hq/dograh/pull/434
+* @tecnomanu made their first contribution in https://github.com/dograh-hq/dograh/pull/458
+* @skymoore made their first contribution in https://github.com/dograh-hq/dograh/pull/461
+* @snvtac made their first contribution in https://github.com/dograh-hq/dograh/pull/463
+
+**Full Changelog**: https://github.com/dograh-hq/dograh/compare/dograh-v1.37.0...dograh-v1.38.0
+
+## 1.37.0 (2026-06-19)
+
+<!-- Release notes generated using configuration in .github/release.yml at main -->
+
+## What's Changed
+### Features
+* feat: add Inworld TTS provider support by @manasseh-zw in https://github.com/dograh-hq/dograh/pull/420
+### Bug Fixes
+* fix(workflow): detect duplicate trigger paths when first node has no id by @Mubashirrrr in https://github.com/dograh-hq/dograh/pull/409
+* fix(qa): tolerate non-dict JSON from QA LLM instead of crashing by @Mubashirrrr in https://github.com/dograh-hq/dograh/pull/408
+* fix(devcontainer): expose UI/API ports for host access by @faisu in https://github.com/dograh-hq/dograh/pull/405
+* fix: disable duplicate trigger nodes in workflow builder by @nuthalapativarun in https://github.com/dograh-hq/dograh/pull/402
+* fix(ui): proxy WebSocket signaling upgrade so local web calls work (#425) by @yogi6969 in https://github.com/dograh-hq/dograh/pull/454
+
+## New Contributors
+* @faisu made their first contribution in https://github.com/dograh-hq/dograh/pull/405
+* @yogi6969 made their first contribution in https://github.com/dograh-hq/dograh/pull/454
+
+**Full Changelog**: https://github.com/dograh-hq/dograh/compare/dograh-v1.36.0...dograh-v1.37.0
+
+## 1.36.0 (2026-06-18)
+
+<!-- Release notes generated using configuration in .github/release.yml at main -->
+
+## What's Changed
+### Features
+* feat: add Smallest AI TTS and STT provider integration by @harshitajain165 in https://github.com/dograh-hq/dograh/pull/444
+* feat: refreshed user onboarding by @a6kme in https://github.com/dograh-hq/dograh/pull/430
+* feat: add custom sarvam tts voice by @chewwbaka in https://github.com/dograh-hq/dograh/pull/449
+* feat(examples): add load-and-edit workflow SDK example in Python and TypeScript by @nuthalapativarun in https://github.com/dograh-hq/dograh/pull/441
+* feat(examples): add multi-node Workflow SDK example in Python and TypeScript by @nuthalapativarun in https://github.com/dograh-hq/dograh/pull/440
+### Bug Fixes
+* fix: add pace option in sarvam tts config by @chewwbaka in https://github.com/dograh-hq/dograh/pull/447
+* fix(ui): release microphone stream on call teardown so a second test call works by @Aymenbenpakiss in https://github.com/dograh-hq/dograh/pull/446
+* fix: add language field to CartesiaTTSConfiguration and pass to Cartesia TTS service by @nuthalapativarun in https://github.com/dograh-hq/dograh/pull/442
+* fix: sync Smallest AI voice dropdown with selected model by @harshitajain165 in https://github.com/dograh-hq/dograh/pull/451
+### Other Changes
+* Validate workflow status filter to prevent 500 on invalid enum value by @a6kme in https://github.com/dograh-hq/dograh/pull/450
+* allow self-hosters to enable Stack Auth via Dockerfile build args (v33.0) by @neggmmm in https://github.com/dograh-hq/dograh/pull/445
+
+## New Contributors
+* @harshitajain165 made their first contribution in https://github.com/dograh-hq/dograh/pull/444
+* @Aymenbenpakiss made their first contribution in https://github.com/dograh-hq/dograh/pull/446
+* @neggmmm made their first contribution in https://github.com/dograh-hq/dograh/pull/445
+
+**Full Changelog**: https://github.com/dograh-hq/dograh/compare/dograh-v1.35.0...dograh-v1.36.0
+
+## 1.35.0 (2026-06-12)
+
+<!-- Release notes generated using configuration in .github/release.yml at main -->
+
+## What's Changed
+### Features
+* feat: add config v2 to simplify billing by @a6kme in https://github.com/dograh-hq/dograh/pull/428
+* feat: add Cartesia Sonic 3.5 as a TTS option by @manasseh-zw in https://github.com/dograh-hq/dograh/pull/423
+* feat: add a start docker script by @a6kme in https://github.com/dograh-hq/dograh/pull/426
+* feat: billing and credit management v2 by @a6kme in https://github.com/dograh-hq/dograh/pull/429
+### Bug Fixes
+* fix(telephony): handle Cloudonix CDR webhooks missing session/disposition by @Mubashirrrr in https://github.com/dograh-hq/dograh/pull/407
+
+## New Contributors
+* @manasseh-zw made their first contribution in https://github.com/dograh-hq/dograh/pull/423
+* @Mubashirrrr made their first contribution in https://github.com/dograh-hq/dograh/pull/407
+
+**Full Changelog**: https://github.com/dograh-hq/dograh/compare/dograh-v1.34.0...dograh-v1.35.0
+
+## 1.34.0 (2026-06-03)
+
+<!-- Release notes generated using configuration in .github/release.yml at main -->
+
+## What's Changed
+### Features
+* feat: add mcp guides for various topic and stages for bot building by @a6kme in https://github.com/dograh-hq/dograh/pull/380
+* feat: allow overriding base URL of OpenAI STT and TTS by @developer603 in https://github.com/dograh-hq/dograh/pull/377
+* feat: add Azure AI multi-provider support (TTS, STT, Embeddings, Realtime) by @vishaldhateria in https://github.com/dograh-hq/dograh/pull/381
+### Bug Fixes
+* fix: support object and array parameters in custom HTTP tools by @mvanhorn in https://github.com/dograh-hq/dograh/pull/373
+* fix(telephony): resolve transfer context via call-sid index instead of KEYS scan by @shiminshen in https://github.com/dograh-hq/dograh/pull/387
+* fix(webrtc): enforce embed allowed-domain policy on public signaling websocket by @shiminshen in https://github.com/dograh-hq/dograh/pull/388
+* fix: use runtime BACKEND_URL for proxying by @a6kme in https://github.com/dograh-hq/dograh/pull/411
+* fix: add CORS preflight handler and ACAO header for embed config endpoint by @nuthalapativarun in https://github.com/dograh-hq/dograh/pull/403
+### Other Changes
+* Add Sarvam LLM, update Sarvam STT models, expose usage_info on run detail by @abhaybabbar in https://github.com/dograh-hq/dograh/pull/351
+* fix: make email lookup case-insensitive in get_user_by_email by @developer603 in https://github.com/dograh-hq/dograh/pull/397
+
+## New Contributors
+* @abhaybabbar made their first contribution in https://github.com/dograh-hq/dograh/pull/351
+* @mvanhorn made their first contribution in https://github.com/dograh-hq/dograh/pull/373
+* @developer603 made their first contribution in https://github.com/dograh-hq/dograh/pull/377
+* @vishaldhateria made their first contribution in https://github.com/dograh-hq/dograh/pull/381
+* @shiminshen made their first contribution in https://github.com/dograh-hq/dograh/pull/387
+
+**Full Changelog**: https://github.com/dograh-hq/dograh/compare/dograh-v1.33.0...dograh-v1.34.0
+
+## [1.33.0](https://github.com/dograh-hq/dograh/compare/dograh-v1.32.0...dograh-v1.33.0) (2026-05-31)
+
+
+### Features
+
+* abort immediately on max call duration exceed ([c586d02](https://github.com/dograh-hq/dograh/commit/c586d02d5d7f88a5222ade71a46c2f797c89a754))
+* banner if API is not reachable ([78ba62e](https://github.com/dograh-hq/dograh/commit/78ba62e18558bb6d5407810807301cc611773d42))
+
+
+### Bug Fixes
+
+* fix inbound for Cloudonix with softphone ([e695436](https://github.com/dograh-hq/dograh/commit/e695436fb364446c8b18330d5cb22e4661a4c991))
+* store channel id in gathered context for ARI outbound ([8f10bca](https://github.com/dograh-hq/dograh/commit/8f10bcade32079af126e4e9d83061cd30936fcad))
+
+## [1.32.0](https://github.com/dograh-hq/dograh/compare/dograh-v1.31.0...dograh-v1.32.0) (2026-05-28)
+
+
+### Features
+
+* add copy-to-clipboard button for inbound webhook URL ([#359](https://github.com/dograh-hq/dograh/issues/359)) ([62d3749](https://github.com/dograh-hq/dograh/commit/62d3749219c08437774c851a9f7cae5b0fd3c299))
+* add delete button in an edge in workflow builder ([#366](https://github.com/dograh-hq/dograh/issues/366)) ([9675151](https://github.com/dograh-hq/dograh/commit/9675151549bd9c27e3ba937f458115e9900d326f))
+* add devcontainer based setup  ([#352](https://github.com/dograh-hq/dograh/issues/352)) ([0716582](https://github.com/dograh-hq/dograh/commit/0716582aa7597e2697f72313237c69b2ac0e30db))
+* add google stt and tts. add folders to organize agents ([ad2fa07](https://github.com/dograh-hq/dograh/commit/ad2fa0705882bf6ba48c5ba65cc6bfac90e105cf))
+* add MiniMax provider support (Chat + TTS) ([#309](https://github.com/dograh-hq/dograh/issues/309)) ([0e0d313](https://github.com/dograh-hq/dograh/commit/0e0d3136ca9d2986e76c982a08c957bb62e94a6f))
+* add transcript and recording public URLs in API ([3df5730](https://github.com/dograh-hq/dograh/commit/3df5730076f39c8cb981d1b5b1f4060278e75cb8))
+* add ultravox realtime and fix signature issue in telephony ([#345](https://github.com/dograh-hq/dograh/issues/345)) ([3892b58](https://github.com/dograh-hq/dograh/commit/3892b584861e4a7bec56f03950fedca5171e6079))
+* add xai grok as realtime model ([9135c2d](https://github.com/dograh-hq/dograh/commit/9135c2da1360e4d93d822375011351f2fa67f729))
+* allow overriding base URL of OpenAI models ([#368](https://github.com/dograh-hq/dograh/issues/368)) ([8a58b09](https://github.com/dograh-hq/dograh/commit/8a58b0992d588c199f6ee1f77d959efc16a2a97c))
+* stamp API key into model override at save time to survive global provider change ([#362](https://github.com/dograh-hq/dograh/issues/362)) ([5b61ad6](https://github.com/dograh-hq/dograh/commit/5b61ad645f8af066d98cec9038daa943a2c9bc9e))
+
+
+### Bug Fixes
+
+* abort docker compose when OSS_JWT_SECRET is unset ([#356](https://github.com/dograh-hq/dograh/issues/356)) ([7eecadd](https://github.com/dograh-hq/dograh/commit/7eecadd8d64c77ba4118bb6397f7eac474868bfb))
+* fix 1008 policy violation issue on ElevenLabs ([93edef3](https://github.com/dograh-hq/dograh/commit/93edef35e8a7cce0c0ebe72bbd77510a29312082))
+* fix projection to TS when fetching agnet in MCP ([bbb4f91](https://github.com/dograh-hq/dograh/commit/bbb4f91a2747c5a6b36a6675d6823396d2b44790))
+* fix service key validation in OSS ([#371](https://github.com/dograh-hq/dograh/issues/371)) ([b891091](https://github.com/dograh-hq/dograh/commit/b891091e0e2127ff704b8c3cb984b1195483cf71)), closes [#303](https://github.com/dograh-hq/dograh/issues/303)
+* fix vobiz webhook signature validation ([285de92](https://github.com/dograh-hq/dograh/commit/285de925282da9f4213bf802844f20c55127cbd8))
+* harden CORS origin allow list ([6f79bd6](https://github.com/dograh-hq/dograh/commit/6f79bd67eb2f21de9cfb3252f969e8d7f4609c9a)), closes [#322](https://github.com/dograh-hq/dograh/issues/322)
+* run api container as non-root dograh user ([#360](https://github.com/dograh-hq/dograh/issues/360)) ([573dd68](https://github.com/dograh-hq/dograh/commit/573dd68d76a689d49a2ebaca059a366331a9beb9))
+
+
+### Documentation
+
+* add github trending badge in README ([1e8f832](https://github.com/dograh-hq/dograh/commit/1e8f832bcc2174099dea5294e9fae2c4212b1e81))
+* **asterisk-ari:** add required TLS config for Dograh Cloud and reload/codec notes ([9e12d96](https://github.com/dograh-hq/dograh/commit/9e12d96ebbf9ed81c62b978a88f7964d1d0ce3da))
+* clarify Asterisk ARI WebSocket URI for Dograh Cloud vs self-hosted ([#358](https://github.com/dograh-hq/dograh/issues/358)) ([92c8dad](https://github.com/dograh-hq/dograh/commit/92c8dadd34905eb2401a742c75beb031fe586fed))
+* fix asterisk protocol in mintlify websocket client config ([a725fda](https://github.com/dograh-hq/dograh/commit/a725fda274d81e3072de9864cb63cc3eed339392))
+
 ## [1.31.0](https://github.com/dograh-hq/dograh/compare/dograh-v1.30.1...dograh-v1.31.0) (2026-05-21)


--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -4,7 +4,7 @@ Welcome to Dograh AI! ❤️ Thank you for your interest in contributing to the

 Dograh AI is a comprehensive voice agent platform that helps developers build, test, and deploy conversational AI systems with minimal setup. This guide will help you understand the project structure, set up your development environment, and start contributing effectively.

-👉 Join our community → [Dograh Community Slack](https://join.slack.com/t/dograh-community/shared_invite/zt-3czr47sw5-MSg1J0kJ7IMPOCHF~03auQ)
+👉 Join our community → [Dograh Community Slack](https://join.slack.com/t/dograh-community/shared_invite/zt-3zjb5vwvl-j7hRz3_F1SOn5cH~jm5f5g)

 ## 🏗️ Project Overview

@ -40,7 +40,7 @@ Please refer to our [Development Setup documentation](https://docs.dograh.com/co
 **Before You Start**

 - Check existing [GitHub Issues](../../issues) for similar work
- Join our [Slack community](https://join.slack.com/t/dograh-community/shared_invite/zt-3czr47sw5-MSg1J0kJ7IMPOCHF~03auQ) to discuss your plans
+- Join our [Slack community](https://join.slack.com/t/dograh-community/shared_invite/zt-3zjb5vwvl-j7hRz3_F1SOn5cH~jm5f5g) to discuss your plans
 - Look for issues tagged `good first issue` for beginner-friendly tasks

 **During Development**
@ -58,6 +58,6 @@ Our Slack community is the heart of Dograh AI development:
 - **Connect**: Meet other contributors and maintainers
 - **Stay Updated**: Learn about contribution opportunities and releases

-👉 **Join us**: [Dograh Community Slack](https://join.slack.com/t/dograh-community/shared_invite/zt-3czr47sw5-MSg1J0kJ7IMPOCHF~03auQ)
+👉 **Join us**: [Dograh Community Slack](https://join.slack.com/t/dograh-community/shared_invite/zt-3zjb5vwvl-j7hRz3_F1SOn5cH~jm5f5g)

 Thank you for helping us keep voice AI open and accessible! 🎉
--- a/PRIVATE_DEPLOYMENT_PLAN.md
+++ b/PRIVATE_DEPLOYMENT_PLAN.md
@ -0,0 +1,377 @@
+# Voice AI on GCP — Private VPC Deployment Playbook
+
+A step-by-step guide to deploying a voice AI stack (Dograh + Claude/Gemini + STT/TTS) entirely within a customer's GCP project, with no data leaving their VPC.
+
+---
+
+## Architecture overview
+
+One VPC in the customer's GCP project. Inside it:
+
+- A **GKE cluster** running Dograh (orchestration + Pipecat pipeline).
+- A single **Private Service Connect (PSC) endpoint** to the `all-apis` bundle. This gives private access to *every* `*.googleapis.com` service in one shot — Vertex AI (Claude + Gemini), Cloud Speech-to-Text, Cloud Text-to-Speech, Cloud Storage, KMS, everything.
+- Third-party TTS/STT (ElevenLabs or Deepgram) either as a Vertex Model Garden partner endpoint or as a Helm chart on the same GKE cluster.
+- A **VPC Service Controls perimeter** around the project so leaked credentials can't exfiltrate data outside the perimeter.
+
+```
+┌─────────────────────────── Customer GCP Project ───────────────────────────┐
+│  ┌─────────────────────── VPC: dograh-vpc ─────────────────────────────┐   │
+│  │                                                                     │   │
+│  │   ┌──────────────────┐         ┌──────────────────────────┐         │   │
+│  │   │  GKE Cluster     │  ────▶  │  PSC Endpoint            │ ──▶ Vertex AI
+│  │   │  (Dograh pods)   │         │  192.168.255.230         │ ──▶ Cloud STT
+│  │   │  + optional      │         │  target: all-apis bundle │ ──▶ Cloud TTS
+│  │   │  Deepgram pods   │         └──────────────────────────┘         │   │
+│  │   └──────────────────┘                                              │   │
+│  │                                                                     │   │
+│  └─────────────────────────────────────────────────────────────────────┘   │
+│                                                                            │
+│   VPC Service Controls perimeter (deny egress outside perimeter)           │
+└────────────────────────────────────────────────────────────────────────────┘
+```
+
+Traffic from GKE pods to Vertex AI, STT, and TTS resolves via private DNS to the PSC IP and stays entirely on Google's private backbone. The model inference itself still runs on Vertex's managed GPUs, but no audio or prompt content is accessible to Google or Anthropic.
+
+---
+
+## Phase 1 — VPC and PSC endpoint to Google APIs
+
+This is the single piece of plumbing that gives Dograh private access to Vertex AI (Claude/Gemini), Cloud STT, and Cloud TTS.
+
+### 1.1 Set variables and enable APIs
+
+```bash
+export PROJECT_ID=$(gcloud config get-value project)
+export NETWORK=dograh-vpc
+export REGION=us-east1
+export PSC_IP=192.168.255.230   # any unused internal IP
+
+gcloud services enable \
+  compute.googleapis.com \
+  aiplatform.googleapis.com \
+  speech.googleapis.com \
+  texttospeech.googleapis.com \
+  dns.googleapis.com \
+  servicedirectory.googleapis.com \
+  container.googleapis.com
+```
+
+### 1.2 Create VPC and subnet
+
+```bash
+gcloud compute networks create $NETWORK \
+  --subnet-mode=custom \
+  --bgp-routing-mode=global \
+  --mtu=1460
+
+gcloud compute networks subnets create dograh-subnet \
+  --network=$NETWORK \
+  --range=10.0.0.0/20 \
+  --region=$REGION \
+  --enable-private-ip-google-access
+```
+
+The `--enable-private-ip-google-access` flag is required for VMs without external IPs to reach Google APIs through the PSC endpoint.
+
+### 1.3 Reserve internal IP and create the PSC forwarding rule
+
+```bash
+gcloud compute addresses create dograh-psc-ip \
+  --global \
+  --purpose=PRIVATE_SERVICE_CONNECT \
+  --addresses=$PSC_IP \
+  --network=$NETWORK
+
+gcloud compute forwarding-rules create dograh-psc-googleapis \
+  --global \
+  --network=$NETWORK \
+  --address=dograh-psc-ip \
+  --target-google-apis-bundle=all-apis
+```
+
+### 1.4 Wire up private DNS
+
+```bash
+gcloud dns managed-zones create googleapis-private \
+  --description="Private DNS for googleapis.com" \
+  --dns-name="googleapis.com." \
+  --visibility="private" \
+  --networks=$NETWORK
+
+gcloud dns record-sets create "googleapis.com." \
+  --zone=googleapis-private \
+  --type=A \
+  --ttl=300 \
+  --rrdatas=$PSC_IP
+
+gcloud dns record-sets create "*.googleapis.com." \
+  --zone=googleapis-private \
+  --type=CNAME \
+  --ttl=300 \
+  --rrdatas="googleapis.com."
+```
+
+### 1.5 Verify
+
+From any VM inside the VPC:
+
+```bash
+dig aiplatform.googleapis.com +short
+# Should return 192.168.255.230, not a Google public IP
+```
+
+---
+
+## Phase 2 — Enable Claude and Gemini in Model Garden
+
+These run on Vertex's managed infrastructure. The PSC endpoint from Phase 1 gives the private network path.
+
+In the Cloud Console → **Vertex AI** → **Model Garden**:
+
+1. Search "Claude", select Claude Opus 4.7 (or whichever tier the customer needs), click **Enable**, accept Anthropic's terms.
+2. Search "Gemini", enable Gemini 3 Pro (typically enabled by default).
+
+> **Note:** Model Garden requires a one-time terms acceptance per model per project, and cannot be automated via Terraform or gcloud. Document this as a manual onboarding step.
+
+The Python clients then work over the PSC endpoint with no code changes:
+
+```python
+from anthropic import AnthropicVertex
+from google import genai
+
+claude = AnthropicVertex(region="us-east5", project_id=PROJECT_ID)
+gemini = genai.Client(vertexai=True, location="us-east1", project=PROJECT_ID)
+```
+
+### Region selection
+
+- For **data residency**, use regional endpoints (`us-east5`, `europe-west1`, etc.) instead of `global`. ~10% pricing premium, but requests are guaranteed to stay in that region.
+- For maximum availability and feature freshness, use `global`.
+
+---
+
+## Phase 3 — Deploy Dograh on GKE in the same VPC
+
+### 3.1 Create a private GKE cluster
+
+```bash
+gcloud container clusters create dograh-cluster \
+  --region=$REGION \
+  --network=$NETWORK \
+  --subnetwork=dograh-subnet \
+  --enable-private-nodes \
+  --enable-private-endpoint \
+  --master-ipv4-cidr=172.16.0.0/28 \
+  --enable-ip-alias \
+  --num-nodes=3 \
+  --workload-pool=$PROJECT_ID.svc.id.goog
+
+gcloud container clusters get-credentials dograh-cluster --region=$REGION
+```
+
+### 3.2 Install Dograh via Helm
+
+```bash
+helm install dograh ./charts/dograh -n dograh --create-namespace
+```
+
+Dograh pods inherit the VPC's DNS, so any call to `aiplatform.googleapis.com`, `speech.googleapis.com`, or `texttospeech.googleapis.com` automatically routes through the PSC endpoint.
+
+### 3.3 Configure Workload Identity
+
+This lets pods authenticate to Vertex AI without static service account keys.
+
+```bash
+# Kubernetes service account
+kubectl create serviceaccount dograh-ksa -n dograh
+
+# GCP service account
+gcloud iam service-accounts create dograh-gsa
+
+# Grant Vertex AI access
+gcloud projects add-iam-policy-binding $PROJECT_ID \
+  --member="serviceAccount:dograh-gsa@$PROJECT_ID.iam.gserviceaccount.com" \
+  --role="roles/aiplatform.user"
+
+# Bind KSA to GSA
+gcloud iam service-accounts add-iam-policy-binding \
+  dograh-gsa@$PROJECT_ID.iam.gserviceaccount.com \
+  --member="serviceAccount:$PROJECT_ID.svc.id.goog[dograh/dograh-ksa]" \
+  --role="roles/iam.workloadIdentityUser"
+
+# Annotate KSA
+kubectl annotate serviceaccount dograh-ksa -n dograh \
+  iam.gke.io/gcp-service-account=dograh-gsa@$PROJECT_ID.iam.gserviceaccount.com
+```
+
+---
+
+## Phase 4 — TTS and STT, pick your path
+
+### Option A — All Google native (simplest)
+
+Cloud Speech-to-Text v2 (Chirp 2) and Cloud Text-to-Speech (Chirp 3 HD voices) are already reachable through the PSC endpoint from Phase 1. **Zero additional setup.** Quality is solid for most CX use cases, though TTS expressiveness lags ElevenLabs and Cartesia.
+
+### Option B — Deepgram self-hosted on GKE
+
+Runs entirely in the customer's VPC, no egress to Deepgram cloud after licensing. Pure Helm chart.
+
+**Prerequisites:** Engage Deepgram's enterprise sales to provision container image access and distribution credentials.
+
+```bash
+# GPU node pool (L4s are the sweet spot for Deepgram)
+gcloud container node-pools create gpu-pool \
+  --cluster=dograh-cluster \
+  --region=$REGION \
+  --machine-type=g2-standard-12 \
+  --accelerator=type=nvidia-l4,count=1 \
+  --num-nodes=2 \
+  --node-locations=$REGION-b
+
+# Optional: mirror Deepgram images from Quay to private Artifact Registry
+gcloud artifacts repositories create deepgram \
+  --repository-format=docker \
+  --location=$REGION
+
+# Install via Helm
+helm repo add deepgram https://deepgram.github.io/self-hosted-resources
+helm install dg-stt deepgram/deepgram-self-hosted \
+  -f values.yaml \
+  -n deepgram \
+  --create-namespace
+```
+
+**Constraints:**
+- NVIDIA GPUs only.
+- Dedicated GPUs only — no MIG or fractional allocation.
+- Linux x86-64 only.
+
+### Option C — ElevenLabs via Vertex AI Model Garden
+
+ElevenLabs deploys as a partner model on Vertex AI, accessed via the same `aiplatform.googleapis.com` PSC path you already have. Setup is sales-led, not self-serve through Marketplace — contact ElevenLabs enterprise, they provision the partner model in the customer's project, you call it via the standard Vertex Prediction API.
+
+---
+
+## Phase 5 — VPC Service Controls perimeter
+
+This is the lock that turns "data flows over private network" into "data *cannot* leave the perimeter, even if a service account key is leaked."
+
+```bash
+# Get the org's access policy ID
+gcloud access-context-manager policies list --organization=YOUR_ORG_ID
+
+# Create the perimeter
+gcloud access-context-manager perimeters create dograh-perimeter \
+  --title="Dograh VPC-SC Perimeter" \
+  --resources=projects/$(gcloud projects describe $PROJECT_ID --format='value(projectNumber)') \
+  --restricted-services=aiplatform.googleapis.com,speech.googleapis.com,texttospeech.googleapis.com,storage.googleapis.com,cloudkms.googleapis.com \
+  --policy=YOUR_POLICY_ID
+```
+
+Any call to a restricted API from outside the perimeter (e.g., a developer laptop with leaked credentials) is denied at the API layer with `PERMISSION_DENIED` / `violationReason: VPC_SERVICE_CONTROLS`.
+
+### CMEK (recommended)
+
+Enable Customer-Managed Encryption Keys on the project for at-rest encryption with customer-controlled keys. This is the line-item that passes "data encrypted with our keys" in security reviews.
+
+```bash
+# Create a key ring and key
+gcloud kms keyrings create dograh-keyring --location=$REGION
+gcloud kms keys create dograh-key \
+  --keyring=dograh-keyring \
+  --location=$REGION \
+  --purpose=encryption
+```
+
+Then attach the key to resources as needed (Vertex AI, Cloud Storage, GKE etcd, etc.).
+
+---
+
+## Phase 6 — Verification checklist
+
+Run these from a Dograh pod inside the cluster:
+
+```bash
+# DNS resolves to PSC IP, not public Google IPs
+dig aiplatform.googleapis.com +short
+dig speech.googleapis.com +short
+dig texttospeech.googleapis.com +short
+
+# Vertex AI call succeeds over PSC
+curl -H "Authorization: Bearer $(gcloud auth print-access-token)" \
+  "https://us-east1-aiplatform.googleapis.com/v1/projects/$PROJECT_ID/locations/us-east1/publishers/google/models/gemini-3-pro:generateContent" \
+  -d '{"contents":[{"role":"user","parts":[{"text":"ping"}]}]}'
+
+# Cloud STT reachable
+curl -H "Authorization: Bearer $(gcloud auth print-access-token)" \
+  "https://speech.googleapis.com/v2/projects/$PROJECT_ID/locations/global/recognizers"
+
+# From outside the perimeter (e.g., laptop with valid creds), the Vertex call
+# should return PERMISSION_DENIED with violationReason: VPC_SERVICE_CONTROLS
+```
+
+---
+
+## Two things to flag in the customer conversation
+
+### 1. PSC endpoint ≠ model runs in their VPC
+
+With this setup, audio and prompts travel from GKE pods to Vertex AI over Google's private backbone — they never touch the public internet, and neither Anthropic nor Google has access to the content. But the model inference itself runs on Vertex's managed GPUs in Google's infrastructure.
+
+For roughly 95% of enterprise security reviews, this is acceptable and accurately described as "in our VPC." If the customer is a defense, sovereign-cloud, or air-gapped buyer who requires the GPU itself to be in their data center, skip this playbook entirely and use **Google Distributed Cloud air-gapped** with Gemini — a different motion (hardware-shipped, sales-led, Dell + NVIDIA Blackwell appliance).
+
+### 2. Model Garden enablement is manual
+
+Claude and other partner models require a one-time terms acceptance in the Cloud Console that cannot be automated via Terraform or gcloud. For multi-customer rollouts, document this as a manual step in onboarding.
+
+---
+
+## Quick reference: full command sequence
+
+```bash
+# 1. Variables
+export PROJECT_ID=$(gcloud config get-value project)
+export NETWORK=dograh-vpc
+export REGION=us-east1
+export PSC_IP=192.168.255.230
+
+# 2. APIs
+gcloud services enable compute.googleapis.com aiplatform.googleapis.com \
+  speech.googleapis.com texttospeech.googleapis.com dns.googleapis.com \
+  servicedirectory.googleapis.com container.googleapis.com
+
+# 3. VPC
+gcloud compute networks create $NETWORK --subnet-mode=custom --bgp-routing-mode=global
+gcloud compute networks subnets create dograh-subnet --network=$NETWORK \
+  --range=10.0.0.0/20 --region=$REGION --enable-private-ip-google-access
+
+# 4. PSC endpoint
+gcloud compute addresses create dograh-psc-ip --global \
+  --purpose=PRIVATE_SERVICE_CONNECT --addresses=$PSC_IP --network=$NETWORK
+gcloud compute forwarding-rules create dograh-psc-googleapis --global \
+  --network=$NETWORK --address=dograh-psc-ip --target-google-apis-bundle=all-apis
+
+# 5. Private DNS
+gcloud dns managed-zones create googleapis-private --dns-name="googleapis.com." \
+  --visibility="private" --networks=$NETWORK --description="Private DNS"
+gcloud dns record-sets create "googleapis.com." --zone=googleapis-private \
+  --type=A --ttl=300 --rrdatas=$PSC_IP
+gcloud dns record-sets create "*.googleapis.com." --zone=googleapis-private \
+  --type=CNAME --ttl=300 --rrdatas="googleapis.com."
+
+# 6. GKE
+gcloud container clusters create dograh-cluster --region=$REGION \
+  --network=$NETWORK --subnetwork=dograh-subnet \
+  --enable-private-nodes --enable-private-endpoint \
+  --master-ipv4-cidr=172.16.0.0/28 --enable-ip-alias --num-nodes=3 \
+  --workload-pool=$PROJECT_ID.svc.id.goog
+
+# 7. Manual step: enable Claude + Gemini in Model Garden via console
+
+# 8. VPC-SC perimeter (after getting org policy ID)
+gcloud access-context-manager perimeters create dograh-perimeter \
+  --title="Dograh VPC-SC Perimeter" \
+  --resources=projects/$(gcloud projects describe $PROJECT_ID --format='value(projectNumber)') \
+  --restricted-services=aiplatform.googleapis.com,speech.googleapis.com,texttospeech.googleapis.com,storage.googleapis.com,cloudkms.googleapis.com \
+  --policy=YOUR_POLICY_ID
+```
--- a/README.ja-JP.md
+++ b/README.ja-JP.md
@ -0,0 +1,203 @@
+# Dograh AI
+
+> 💡 **Notice**: This documentation is community-maintained. If you spot any translation inaccuracies or content that has drifted from the English version, please feel free to open a PR!
+>
+> 💡 **注記**: このドキュメントはコミュニティによって保守されています。翻訳の不正確さや英語版からの内容のずれを見つけた場合は、ぜひ PR を作成してください。
+
+**オープンソースでセルフホスト可能な Vapi / Retell の代替手段** -- ドラッグ&ドロップのワークフロービルダーで本番向け音声エージェントを構築できます。ゼロから 2 分以内で動作するボットを立ち上げられます。
+
+<p align="center">
+  <a href="https://app.dograh.com">
+    <img src="https://img.shields.io/badge/▶_クラウド版を試す-app.dograh.com-2563eb?style=for-the-badge" alt="クラウド版を試す">
+  </a>
+  &nbsp;
+  <a href="#-クイックスタート">
+    <img src="https://img.shields.io/badge/⚡_60秒でセルフホスト-コマンド1つ-111827?style=for-the-badge" alt="60秒でセルフホスト">
+  </a>
+  &nbsp;
+  <a href="https://join.slack.com/t/dograh-community/shared_invite/zt-3zjb5vwvl-j7hRz3_F1SOn5cH~jm5f5g">
+    <img src="https://img.shields.io/badge/💬_Slackに参加-コミュニティ-4A154B?style=for-the-badge&logo=slack" alt="Slackに参加">
+  </a>
+</p>
+
+<p align="center">
+  <a href="https://docs.dograh.com">📖 ドキュメント</a> &nbsp;·&nbsp;
+  <a href="LICENSE">📜 BSD 2-Clause</a> &nbsp;·&nbsp;
+  <a href="README.md">🌐 English</a> &nbsp;·&nbsp;
+  <a href="README.zh-CN.md">🌐 中文</a>
+</p>
+
+<p align="center">
+  <img src="docs/images/hero.gif" alt="Dograh の動作デモ -- ワークフローを構築し、音声エージェントを起動して会話する" width="80%">
+</p>
+
+- **100% オープンソース**でセルフホスト可能 -- Vapi や Retell と違い、ベンダーロックインはありません
+- **完全な制御と透明性** -- すべてのコードが公開され、LLM / TTS / STT の統合も柔軟に差し替えられます
+- **YC 卒業生と事業売却を経験した創業者が保守**し、音声 AI をオープンに保つことに取り組んでいます
+
+<p align="center">
+  <a href="https://trendshift.io/repositories/31007" target="_blank"><img src="https://trendshift.io/api/badge/repositories/31007" alt="dograh-hq%2Fdograh | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
+</p>
+
+## 🎥 メディア掲載
+
+<div align="center">
+  <a href="https://www.youtube.com/watch?v=xD9JEvfCH9k">
+    <img src="https://img.youtube.com/vi/xD9JEvfCH9k/maxresdefault.jpg" alt="Better Stack による Dograh 紹介" width="80%" style="border-radius: 8px; box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);">
+  </a>
+  <br>
+  <em><strong>Better Stack</strong> による実践レビュー -- Dograh を詳しく紹介</em>
+</div>
+
+<details>
+<summary>📺 2 分のプロダクト紹介動画を見たい場合はこちら。</summary>
+
+<div align="center">
+  <a href="https://youtu.be/9gPneyf9M9w">
+    <img src="docs/images/video_thumbnail_1.png" alt="Dograh AI のデモ動画を見る" width="70%" style="border-radius: 8px; box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);">
+  </a>
+</div>
+
+</details>
+
+## ⚖️ Dograh vs Vapi vs Retell
+
+音声 AI プラットフォームを評価しているチームに向けて、重要な観点を率直に比較します。
+
+|  | **Dograh** | **Vapi** | **Retell** |
+|---|---|---|---|
+| **ライセンス** | BSD 2-Clause (オープンソース) | プロプライエタリ | プロプライエタリ |
+| **セルフホスト** | ✅ 可能 -- Docker コマンド 1 つ | ❌ SaaS のみ | ❌ SaaS のみ |
+| **料金** | 無料(セルフホスト)・従量課金(クラウド) | 分単位課金の SaaS | 分単位課金の SaaS |
+| **独自 LLM / STT / TTS の利用** | ✅ 任意のプロバイダー、または Dograh 標準スタック | 提供範囲内で設定可能 | 提供範囲内で設定可能 |
+| **ソースコードレベルのカスタマイズ** | ✅ すべてのコードを自由に変更可能 | ❌ クローズドソース | ❌ クローズドソース |
+| **データレジデンシー** | 自社インフラ、自社ルール | ベンダーのクラウド | ベンダーのクラウド |
+| **ベンダーロックイン** | なし | あり | あり |
+
+
+## 🚀 クイックスタート
+
+##### ローカルマシンに Dograh をダウンロードしてセットアップ
+
+> **注記**
+> 製品改善のため、匿名の利用状況データを収集します。無効にするには、起動スクリプトを実行する前に `ENABLE_TELEMETRY=false` を設定してください。
+
+> **注記**
+> リモートサーバーでプラットフォームを実行したい場合は、[ドキュメント](https://docs.dograh.com/deployment/docker#option-2:-remote-server-deployment)を参照してください。
+
+```bash
+curl -o docker-compose.yaml https://raw.githubusercontent.com/dograh-hq/dograh/main/docker-compose.yaml && curl -o start_docker.sh https://raw.githubusercontent.com/dograh-hq/dograh/main/scripts/start_docker.sh && chmod +x start_docker.sh && ./start_docker.sh
+```
+
+> **⚡ AI エージェントにセットアップを任せたいですか?**
+> **Claude Code** または **Codex** を使っている場合は、公式の [Dograh セットアップ skill](https://github.com/dograh-hq/dograh-plugins) をインストールすると、インストール、設定、トラブルシューティングをエージェントに任せられます。OS を検出し、適切なデプロイ方法を選び、Dograh 付属のセットアップスクリプトを実行して結果を検証します。
+>
+> ```text
+> # Claude Code の場合
+> /plugin marketplace add dograh-hq/dograh-plugins
+> /plugin install dograh@dograh
+> ```
+>
+> その後、新しいセッションを開始して _"set up Dograh"_ と依頼するか、`/dograh-setup` を実行してください。Codex も対応しています。詳しくは[プラグインリポジトリ](https://github.com/dograh-hq/dograh-plugins#install)を参照してください。
+
+> **注記**
+> 初回起動では、すべてのイメージをダウンロードするため 2-3 分かかる場合があります。起動後、http://localhost:3010 を開くと最初の AI 音声アシスタントを作成できます。
+> よくある問題と解決策は 🔧 **[トラブルシューティング](docs/getting-started/troubleshooting.mdx)** を参照してください。
+
+### 🎙️ 最初の音声ボット
+
+1. ブラウザで [http://localhost:3010](http://localhost:3010) を開きます。
+2. **Inbound(着信)** または **Outbound(発信)** を選び、ボットに名前を付けます(例: _リード判定_)。続けて用途を 5-10 語で説明します(例: _保険フォーム送信者の購入意向を確認_)。
+3. **Web Call** をクリックすると、ボットと直接会話できます。
+
+> 🔑 **API キーは不要です。** Dograh には自動生成されるキーと、組み込みの LLM / TTS / STT スタックが付属しています。必要に応じて、独自の LLM、TTS、STT、または Twilio、Vonage、Telnyx などの電話連携プロバイダーをいつでも接続できます。
+
+## 機能
+
+### 音声機能
+
+- 電話連携: Twilio、Vonage、Vobiz、Cloudonix などを標準搭載(他のプロバイダーも簡単に追加可能)。有人オペレーターへの転送にも対応
+- 言語: 英語をサポート(他言語へ拡張可能)
+- カスタムモデル: 独自の TTS / STT モデルを持ち込み可能
+- リアルタイム処理: 低遅延の音声インタラクション
+
+### 開発者体験
+
+- ゼロ設定で開始: API キーを自動生成し、すぐにテスト可能
+- Python ベース: Python で構築されており、カスタマイズしやすい
+- Docker ファースト: コンテナ化により一貫したデプロイが可能
+- モジュラー構成: 必要に応じて各コンポーネントを差し替え可能
+
+### テストと品質
+
+- **テストモード**: 本番通話や本番データに影響を与えず、公開前にエージェントをエンドツーエンドで試せます
+- **ダッシュボード内 Web 通話**: 電話連携を設定しなくても、構築中にボットと直接会話できます
+- **QA ノード**: 他のノードに含まれるプロンプト品質を分析する組み込みワークフローノード
+
+## デプロイ方法
+
+### ローカル開発
+
+[ローカルセットアップ](https://docs.dograh.com/contribution/setup)を参照してください。
+
+### セルフホストデプロイ
+
+リモートサーバーへのデプロイや HTTPS 設定を含む詳しい手順は、[Docker デプロイガイド](https://docs.dograh.com/deployment/docker)を参照してください。
+
+### クラウド版
+
+マネージドクラウド版は [https://www.dograh.com](https://www.dograh.com/) から利用できます。
+
+## 📚 ドキュメント
+
+完全なドキュメントは [https://docs.dograh.com](https://docs.dograh.com/) を参照してください。
+
+## 📦 SDKs
+
+- **Python SDK** -- [pypi.org/project/dograh-sdk](https://pypi.org/project/dograh-sdk/)
+- **Node SDK** -- [npmjs.com/package/@dograh/sdk](https://www.npmjs.com/package/@dograh/sdk)
+
+## 🤝 コミュニティとサポート
+
+> 👋 **Better Stack の動画から来ましたか?** [固定された GitHub Discussion](https://github.com/orgs/dograh-hq/discussions/291) にユースケースを投稿してください。すべての返信を確認し、創業チームが初期ユーザーを直接オンボーディングします。
+
+- **Slack** -- Dograh AI のコラボレーションの中心です。メンテナーとつながり、実装前に機能を相談し、セットアップの支援を受け、コントリビューション活動の最新情報を追えます。
+- **GitHub Discussions** -- ユースケースを共有し、質問し、ワークフローのレシピを交換できます。
+- **GitHub Issues** -- バグ報告や機能リクエストに利用してください。
+
+👉 参加はこちら → [Dograh Community Slack](https://join.slack.com/t/dograh-community/shared_invite/zt-3zjb5vwvl-j7hRz3_F1SOn5cH~jm5f5g)
+
+## 🙌 コントリビューション
+
+コントリビューションを歓迎します。Dograh AI は 100% オープンソースであり、今後もそうあり続けます。
+
+### はじめに
+
+- このリポジトリを Fork する
+- 機能ブランチを作成する(`git checkout -b feature/AmazingFeature`)
+- 変更をコミットする(`git commit -m 'Add some AmazingFeature'`)
+- ブランチへプッシュする(`git push origin feature/AmazingFeature`)
+- Pull Request を作成する
+
+## ⭐ Star 履歴
+
+<a href="https://star-history.com/#dograh-hq/dograh&Date">
+  <img src="https://api.star-history.com/svg?repos=dograh-hq/dograh&type=Date" alt="Dograh の Star 履歴" width="80%">
+</a>
+
+## 📄 ライセンス
+
+Dograh AI は [BSD 2-Clause License](LICENSE) のもとで公開されています。Dograh AI の構築に使われたプロジェクトと同じライセンスであり、互換性と、利用・変更・配布の自由を確保しています。
+
+## 🏢 私たちについて
+
+**Dograh** (Zansat Technologies Private Limited) が ❤️ を込めて開発しています。
+創業チームは YC 卒業生と事業売却を経験した創業者で構成され、音声 AI をオープンで誰もが利用できるものに保つことに取り組んでいます。
+
+<br><br><br>
+
+  <p align="center">
+    <a href="https://github.com/dograh-hq/dograh/stargazers">⭐ GitHub で Star する</a> |
+    <a href="https://app.dograh.com">☁️ クラウド版を試す</a> |
+    <a href="https://join.slack.com/t/dograh-community/shared_invite/zt-3zjb5vwvl-j7hRz3_F1SOn5cH~jm5f5g">💬 Slack に参加</a>
+  </p>
--- a/README.md
+++ b/README.md
@ -11,7 +11,7 @@
    <img src="https://img.shields.io/badge/⚡_Self--host_in_60s-One_command-111827?style=for-the-badge" alt="Self-host in 60s">
  </a>
  &nbsp;
-  <a href="https://join.slack.com/t/dograh-community/shared_invite/zt-3czr47sw5-MSg1J0kJ7IMPOCHF~03auQ">
+  <a href="https://join.slack.com/t/dograh-community/shared_invite/zt-3zjb5vwvl-j7hRz3_F1SOn5cH~jm5f5g">
    <img src="https://img.shields.io/badge/💬_Join_Slack-Community-4A154B?style=for-the-badge&logo=slack" alt="Join Slack">
  </a>
 </p>
@ -19,7 +19,8 @@
 <p align="center">
  <a href="https://docs.dograh.com">📖 Docs</a> &nbsp;·&nbsp;
  <a href="LICENSE">📜 BSD 2-Clause</a> &nbsp;·&nbsp;
-  <a href="README.zh-CN.md">🌐 中文</a>
+  <a href="README.zh-CN.md">🌐 中文</a> &nbsp;·&nbsp;
+  <a href="README.ja-JP.md">🌐 日本語</a>
 </p>

 <p align="center">
@ -30,6 +31,10 @@
 - **Full control & transparency** — every line of code is open, with flexible LLM / TTS / STT integration
 - **Maintained by YC alumni and exit founders**, committed to keeping voice AI open

+<p align="center">
+  <a href="https://trendshift.io/repositories/31007" target="_blank"><img src="https://trendshift.io/api/badge/repositories/31007" alt="dograh-hq%2Fdograh | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
+</p>
+
 ## 🎥 Featured

 <div align="center">
@ -71,18 +76,29 @@ An honest comparison on the axes that matter most to teams evaluating voice AI p
 ##### Download and setup Dograh on your Local Machine

 > **Note**
-> We collect anonymous usage data to improve the product. You can opt out by setting the `ENABLE_TELEMETRY` to `false` in the below command.
+> We collect anonymous usage data to improve the product. You can opt out by setting `ENABLE_TELEMETRY=false` before running the startup script.

 > **Note**
 > If you wish to run the platform on a remote server instead, checkout our [Documentation](https://docs.dograh.com/deployment/docker#option-2:-remote-server-deployment)

 ```bash
-curl -o docker-compose.yaml https://raw.githubusercontent.com/dograh-hq/dograh/main/docker-compose.yaml && REGISTRY=ghcr.io/dograh-hq ENABLE_TELEMETRY=true docker compose up --pull always
+curl -o docker-compose.yaml https://raw.githubusercontent.com/dograh-hq/dograh/main/docker-compose.yaml && curl -o start_docker.sh https://raw.githubusercontent.com/dograh-hq/dograh/main/scripts/start_docker.sh && chmod +x start_docker.sh && ./start_docker.sh
 ```

+> **⚡ Prefer an AI agent to set it up for you?**
+> If you use **Claude Code** or **Codex**, install the official [Dograh setup skill](https://github.com/dograh-hq/dograh-plugins) and let your agent handle installation, configuration, and troubleshooting — it detects your OS, picks the right deploy path, runs Dograh's own setup scripts, and verifies the result.
+>
+> ```text
+> # In Claude Code
+> /plugin marketplace add dograh-hq/dograh-plugins
+> /plugin install dograh@dograh
+> ```
+>
+> Then start a new session and ask it to _"set up Dograh"_ (or run `/dograh-setup`). Codex is supported too — see the [plugin repo](https://github.com/dograh-hq/dograh-plugins#install).
+
 > **Note**
 > First startup may take 2-3 minutes to download all images. Once running, open http://localhost:3010 to create your first AI voice assistant!
-> For common issues and solutions, see 🔧 **[Troubleshooting](docs/troubleshooting.md)**.
+> For common issues and solutions, see 🔧 **[Troubleshooting](docs/getting-started/troubleshooting.mdx)**.

 ### 🎙️ Your First Voice Bot

@ -145,7 +161,7 @@ You can go to [https://docs.dograh.com](https://docs.dograh.com/) for our docume
 - **GitHub Discussions** — share use cases, ask questions, swap workflow recipes.
 - **GitHub Issues** — report bugs or request features.

-👉 Join us → [Dograh Community Slack](https://join.slack.com/t/dograh-community/shared_invite/zt-3czr47sw5-MSg1J0kJ7IMPOCHF~03auQ)
+👉 Join us → [Dograh Community Slack](https://join.slack.com/t/dograh-community/shared_invite/zt-3zjb5vwvl-j7hRz3_F1SOn5cH~jm5f5g)

 ## 🙌 Contributing

@ -179,5 +195,5 @@ Founded by YC alumni and exit founders committed to keeping voice AI open and ac
  <p align="center">
    <a href="https://github.com/dograh-hq/dograh/stargazers">⭐ Star us on GitHub</a> |
    <a href="https://app.dograh.com">☁️ Try Cloud Version</a> |
-    <a href="https://join.slack.com/t/dograh-community/shared_invite/zt-3czr47sw5-MSg1J0kJ7IMPOCHF~03auQ">💬 Join Slack</a>
+    <a href="https://join.slack.com/t/dograh-community/shared_invite/zt-3zjb5vwvl-j7hRz3_F1SOn5cH~jm5f5g">💬 Join Slack</a>
  </p>
--- a/README.zh-CN.md
+++ b/README.zh-CN.md
@ -15,7 +15,7 @@
    <img src="https://img.shields.io/badge/⚡_60_秒自托管-一行命令-111827?style=for-the-badge" alt="60 秒自托管">
  </a>
  &nbsp;
-  <a href="https://join.slack.com/t/dograh-community/shared_invite/zt-3czr47sw5-MSg1J0kJ7IMPOCHF~03auQ">
+  <a href="https://join.slack.com/t/dograh-community/shared_invite/zt-3zjb5vwvl-j7hRz3_F1SOn5cH~jm5f5g">
    <img src="https://img.shields.io/badge/💬_加入_Slack-社区-4A154B?style=for-the-badge&logo=slack" alt="加入 Slack">
  </a>
 </p>
@ -23,7 +23,8 @@
 <p align="center">
  <a href="https://docs.dograh.com">📖 文档</a> &nbsp;·&nbsp;
  <a href="LICENSE">📜 BSD 2-Clause</a> &nbsp;·&nbsp;
-  <a href="README.md">🌐 English</a>
+  <a href="README.md">🌐 English</a> &nbsp;·&nbsp;
+  <a href="README.ja-JP.md">🌐 日本語</a>
 </p>

 <p align="center">
@ -84,9 +85,20 @@
 curl -o docker-compose.yaml https://raw.githubusercontent.com/dograh-hq/dograh/main/docker-compose.yaml && REGISTRY=ghcr.io/dograh-hq ENABLE_TELEMETRY=true docker compose up --pull always
 ```

+> **⚡ 想让 AI 智能体帮你完成部署?**
+> 如果你使用 **Claude Code** 或 **Codex**,可以安装官方的 [Dograh 部署技能(skill)](https://github.com/dograh-hq/dograh-plugins),让智能体替你完成安装、配置与排障——它会识别你的操作系统、选择合适的部署方式、运行 Dograh 自带的部署脚本并验证结果。
+>
+> ```text
+> # 在 Claude Code 中
+> /plugin marketplace add dograh-hq/dograh-plugins
+> /plugin install dograh@dograh
+> ```
+>
+> 然后开启一个新会话,让它 _"set up Dograh"_(或运行 `/dograh-setup`)。Codex 同样支持——详见[插件仓库](https://github.com/dograh-hq/dograh-plugins#install)。
+
 > **提示**
 > 首次启动需要 2-3 分钟拉取所有镜像。启动完成后,打开 http://localhost:3010 即可创建你的第一个 AI 语音助手!
-> 常见问题及解决方案请参见 🔧 **[故障排查](docs/troubleshooting.md)**。
+> 常见问题及解决方案请参见 🔧 **[故障排查](docs/getting-started/troubleshooting.mdx)**。

 ### 🎙️ 你的第一个语音机器人

@ -144,7 +156,7 @@ curl -o docker-compose.yaml https://raw.githubusercontent.com/dograh-hq/dograh/m
 - **GitHub Discussions** —— 分享使用场景、提问、交流工作流配方。
 - **GitHub Issues** —— 报告 bug 或提交功能请求。

-👉 加入我们 → [Dograh 社区 Slack](https://join.slack.com/t/dograh-community/shared_invite/zt-3czr47sw5-MSg1J0kJ7IMPOCHF~03auQ)
+👉 加入我们 → [Dograh 社区 Slack](https://join.slack.com/t/dograh-community/shared_invite/zt-3zjb5vwvl-j7hRz3_F1SOn5cH~jm5f5g)

 ## 🙌 参与贡献

@ -178,5 +190,5 @@ Dograh AI 基于 [BSD 2-Clause 协议](LICENSE)开源 —— 与构建 Dograh AI
  <p align="center">
    <a href="https://github.com/dograh-hq/dograh/stargazers">⭐ 给我们一个 Star</a> |
    <a href="https://app.dograh.com">☁️ 试用云端版本</a> |
-    <a href="https://join.slack.com/t/dograh-community/shared_invite/zt-3czr47sw5-MSg1J0kJ7IMPOCHF~03auQ">💬 加入 Slack</a>
+    <a href="https://join.slack.com/t/dograh-community/shared_invite/zt-3zjb5vwvl-j7hRz3_F1SOn5cH~jm5f5g">💬 加入 Slack</a>
  </p>
--- a/api/.env.example
+++ b/api/.env.example
@ -12,12 +12,24 @@ UI_APP_URL="http://localhost:3000"
 DATABASE_URL="postgresql+asyncpg://postgres:postgres@localhost:5432/postgres"
 REDIS_URL="redis://:redissecret@localhost:6379"

+# Internal devops secret for deployment scripts and lifecycle hooks.
+# scripts/rolling_update.sh sends this to protected operational endpoints via
+# X-Dograh-Devops-Secret. Use a unique random value in production.
+DOGRAH_DEVOPS_SECRET="change-me-dograh-devops-secret"
+
 # AWS S3 Configuration
 ENABLE_AWS_S3="false"
 # AWS_ACCESS_KEY_ID=""
 # AWS_SECRET_ACCESS_KEY=""
 # S3_BUCKET=""
 # S3_REGION=""
+# --- S3-compatible servers (MinIO, rustfs, Ceph, ...) ---
+# Use the S3 backend (ENABLE_AWS_S3=true) against a non-AWS, S3-compatible
+# server by overriding the endpoint and signing. Unlike the MinIO backend, the
+# S3 backend emits real presigned URLs, so the bucket can stay private.
+# S3_ENDPOINT_URL=""        # e.g. https://s3.example.com (blank = AWS default)
+# S3_SIGNATURE_VERSION=""   # blank = botocore default; set "s3v4" if the server requires SigV4
+# S3_ADDRESSING_STYLE=""    # blank = auto; set "path" if the server / TLS cert requires path-style

 # MinIO Configuration if using containerised MinIO instead of
 # AWS S3
--- a/api/.env.test.example
+++ b/api/.env.test.example
@ -0,0 +1,19 @@
+# Test environment. Read by pytest runs and the "Tests: API" launch
+# configurations in .vscode/launch.json.
+#
+# Tests target a separate database (`test_db`) so they don't clobber dev
+# data. Create it once after the postgres container is up:
+#   docker compose -f docker-compose-local.yaml exec postgres \
+#       createdb -U postgres test_db
+
+ENVIRONMENT="test"
+LOG_LEVEL="DEBUG"
+
+UI_APP_URL=http://localhost:3000
+
+DATABASE_URL="postgresql+asyncpg://postgres:postgres@localhost:5432/test_db"
+REDIS_URL="redis://:redissecret@localhost:6379/0"
+
+DOGRAH_DEVOPS_SECRET="test-dograh-devops-secret"
+
+MINIO_PUBLIC_ENDPOINT=http://localhost:9000
--- a/api/Dockerfile
+++ b/api/Dockerfile
@ -1,41 +1,53 @@
+# syntax=docker/dockerfile:1
 # Multi-stage Dockerfile
-# Stage 1: Builder - Install Python dependencies
-FROM python:3.12-slim AS builder
+# Stage 1: Builder - Install Python dependencies into a venv via uv
+# (mirrors .devcontainer/Dockerfile's venv-builder stage).
+FROM python:3.13-slim AS builder

 WORKDIR /app

-# Install git in builder stage (needed for pip install from git)
+# Install git in builder stage (needed for any pip install from git URLs)
 RUN apt-get update && apt-get install -y \
    git \
    && apt-get clean \
    && rm -rf /var/lib/apt/lists/*

-# Copy and install requirements
-COPY api/requirements.txt .
+# uv (https://github.com/astral-sh/uv) for ~5-10x faster installs than pip.
+COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /usr/local/bin/

-# Install dependencies to user directory for easy copying
-RUN pip install --user --no-cache-dir -r requirements.txt && \
-    # Clean up pip cache after installation
-    rm -rf /root/.cache/pip
+# Build the venv at the path it will live at in the final image, so shebangs
+# and console-scripts inside the venv reference the correct runtime location
+# after COPY --from.
+ENV VIRTUAL_ENV=/opt/venv \
+    PATH=/opt/venv/bin:$PATH
+RUN python -m venv "$VIRTUAL_ENV"

-# Copy and install pipecat from local submodule
-COPY pipecat /tmp/pipecat
-RUN pip install --user --no-cache-dir '/tmp/pipecat[cartesia,deepgram,openai,elevenlabs,groq,google,azure,sarvam,soundfile,silero,webrtc,speechmatics,openrouter,camb,mcp]' && \
-    # Swap opencv-python (pulled by pipecat[webrtc]) for opencv-python-headless
-    # to drop X11/Qt dependencies that otherwise require libxcb etc. in runner.
-    pip uninstall -y opencv-python && \
-    pip install --user --no-cache-dir opencv-python-headless && \
-    # Pre-download NLTK punkt_tab tokenizer data (required by pipecat at runtime)
-    python -c "import nltk; nltk.download('punkt_tab', quiet=True)" && \
-    # Clean up pip cache and temporary pipecat directory
-    rm -rf /root/.cache/pip /tmp/pipecat
+# Layer 1: API deps. Cache invalidates only when requirements.txt changes.
+RUN --mount=type=bind,source=api/requirements.txt,target=/tmp/req.txt \
+    --mount=type=cache,target=/root/.cache/uv \
+    uv pip install -r /tmp/req.txt

-# Strip cache files, test/example dirs, and type stubs from installed packages
-RUN find /root/.local -type f -name '*.pyc' -delete && \
-    find /root/.local -type d -name '__pycache__' -prune -exec rm -rf {} + && \
-    find /root/.local -type f -name '*.pyo' -delete && \
-    find /root/.local -type d \( -name tests -o -name test -o -name examples \) -prune -exec rm -rf {} + && \
-    find /root/.local -name '*.pyi' -delete
+# Layer 2: pipecat deps. Cache invalidates when pipecat source changes.
+# After installing pipecat, two hardening tweaks:
+#   1. Swap opencv-python (pulled by pipecat[webrtc]) for opencv-python-headless.
+#      The non-headless build links against X11/Qt (libxcb*); without those
+#      shared libs in the image, `import cv2` fails at runtime.
+#   2. Pre-download NLTK's punkt_tab tokenizer so pipecat's text processing
+#      doesn't hit the network on first agent run. NLTK auto-finds it under
+#      sys.prefix/nltk_data, so it travels with the venv on COPY.
+RUN --mount=type=bind,source=pipecat,target=/tmp/pipecat,rw \
+    --mount=type=cache,target=/root/.cache/uv \
+    uv pip install '/tmp/pipecat[cartesia,deepgram,openai,elevenlabs,groq,google,azure,sarvam,soundfile,silero,webrtc,speechmatics,openrouter,camb,mcp,inworld,smallest]' \
+ && uv pip uninstall opencv-python \
+ && uv pip install opencv-python-headless \
+ && python -c "import nltk; nltk.download('punkt_tab', download_dir='/opt/venv/nltk_data', quiet=True)"
+
+# Strip cache files, test/example dirs, and type stubs from the venv
+RUN find /opt/venv -type f -name '*.pyc' -delete && \
+    find /opt/venv -type d -name '__pycache__' -prune -exec rm -rf {} + && \
+    find /opt/venv -type f -name '*.pyo' -delete && \
+    find /opt/venv -type d \( -name tests -o -name test -o -name examples \) -prune -exec rm -rf {} + && \
+    find /opt/venv -name '*.pyi' -delete

 # Stage 2: Node deps for ts_validator (built with full node:22-slim, only
 # node_modules is copied into the runner).
@ -46,62 +58,108 @@ RUN npm ci --omit=dev && npm cache clean --force

 # Stage 3: Static ffmpeg binary (avoids apt ffmpeg pulling mesa/libllvm for
 # hardware acceleration we don't use server-side).
+#
+# Source: BtbN/FFmpeg-Builds, served from GitHub's release-assets CDN (fast,
+# highly available, multi-arch). We pin a specific build for reproducibility,
+# but to a *month-end* autobuild tag — not a daily one. BtbN prunes daily
+# autobuilds after ~2 weeks (the previous pin was a daily tag and started
+# 404ing once GC'd), but keeps one month-end snapshot per month long-term
+# (~2 years back). A dated tag's assets are immutable, so the per-arch sha256
+# below never rots: builds stay reproducible AND integrity-verified.
+#
+# To upgrade ffmpeg: bump BTBN_TAG + BTBN_REV to a newer month-end autobuild
+# and refresh the two sha256s. No download needed — read tag, revision and
+# per-asset sha256 straight from the GitHub release-asset metadata:
+#   gh api repos/BtbN/FFmpeg-Builds/releases/tags/<tag> \
+#     --jq '.assets[] | select(.name|test("(linux64|linuxarm64)-gpl\\.tar\\.xz$")) | "\(.name) \(.digest)"'
+#
+# `--speed-limit/--speed-time` aborts a *stalled* transfer after 30s of <1KB/s
+# (the cause of "stuck" builds) without killing a slow-but-progressing
+# download; `--max-time` is a hard backstop; `--retry` rides out transient CDN
+# hiccups. The archive nests binaries under bin/, so locate them with `find`.
 FROM debian:trixie-slim AS ffmpeg-static
-RUN apt-get update && apt-get install -y --no-install-recommends \
-        curl ca-certificates xz-utils \
-    && curl -fsSL -o /tmp/ffmpeg.tar.xz https://johnvansickle.com/ffmpeg/releases/ffmpeg-release-amd64-static.tar.xz \
-    && mkdir -p /tmp/ffmpeg \
-    && tar -xJf /tmp/ffmpeg.tar.xz -C /tmp/ffmpeg --strip-components=1 \
-    && mv /tmp/ffmpeg/ffmpeg /tmp/ffmpeg/ffprobe /usr/local/bin/ \
-    && chmod +x /usr/local/bin/ffmpeg /usr/local/bin/ffprobe
+ARG TARGETARCH
+ARG BTBN_TAG=autobuild-2026-05-31-13-22
+ARG BTBN_REV=N-124714-g49a77d37be
+RUN set -eu ; \
+    apt-get update && apt-get install -y --no-install-recommends \
+        curl ca-certificates xz-utils ; \
+    rm -rf /var/lib/apt/lists/* ; \
+    case "${TARGETARCH}" in \
+      amd64) btbn_arch=linux64 ; \
+             sha256=ee052121296e6479325e09c6097d48e72a4af472d18c2b94388b5405dcde6cce ;; \
+      arm64) btbn_arch=linuxarm64 ; \
+             sha256=e97545305043794cdf7b698d713e29291464e0c35bb8e0f3ff1f62e4c56eedd6 ;; \
+      *) echo "unsupported TARGETARCH: ${TARGETARCH}" >&2 ; exit 1 ;; \
+    esac ; \
+    url="https://github.com/BtbN/FFmpeg-Builds/releases/download/${BTBN_TAG}/ffmpeg-${BTBN_REV}-${btbn_arch}-gpl.tar.xz" ; \
+    mkdir -p /tmp/ffmpeg ; cd /tmp/ffmpeg ; \
+    echo "Downloading ffmpeg (${BTBN_TAG}) from ${url}" ; \
+    curl -fsSL --connect-timeout 20 --speed-limit 1024 --speed-time 30 \
+         --max-time 600 --retry 3 --retry-delay 5 --retry-all-errors \
+         -o ffmpeg.tar.xz "${url}" ; \
+    echo "${sha256}  ffmpeg.tar.xz" | sha256sum -c - ; \
+    tar -xJf ffmpeg.tar.xz ; \
+    ffmpeg_bin="$(find /tmp/ffmpeg -type f -name ffmpeg | head -n1)" ; \
+    ffprobe_bin="$(find /tmp/ffmpeg -type f -name ffprobe | head -n1)" ; \
+    [ -n "${ffmpeg_bin}" ] && [ -n "${ffprobe_bin}" ] ; \
+    mv "${ffmpeg_bin}" "${ffprobe_bin}" /usr/local/bin/ ; \
+    chmod +x /usr/local/bin/ffmpeg /usr/local/bin/ffprobe ; \
+    rm -rf /tmp/ffmpeg

 # Stage 4: Runtime - Minimal image with only runtime dependencies
-FROM python:3.12-slim AS runner
+FROM python:3.13-slim AS runner

 WORKDIR /app

+RUN groupadd --system dograh \
+ && useradd --system --gid dograh --no-log-init --home-dir /app --shell /usr/sbin/nologin dograh \
+ && chown dograh:dograh /app
+
 # Static ffmpeg + ffprobe (used by audio_converter, audio_file_cache, etc.)
 COPY --from=ffmpeg-static /usr/local/bin/ffmpeg /usr/local/bin/ffmpeg
 COPY --from=ffmpeg-static /usr/local/bin/ffprobe /usr/local/bin/ffprobe

 # Node.js 22 binary only (ts_validator subprocess needs node >=22.6 for
-# native TypeScript stripping; see api/mcp_server/ts_bridge.py). python:3.12-slim
+# native TypeScript stripping; see api/mcp_server/ts_bridge.py). python:3.13-slim
 # already provides libstdc++6, libgcc-s1, and ca-certificates that node needs.
 COPY --from=node:22-slim /usr/local/bin/node /usr/local/bin/node

-# Copy Python packages from builder stage
-COPY --from=builder /root/.local /root/.local
+# Copy the populated venv from the builder stage. NLTK data lives at
+# /opt/venv/nltk_data and is auto-discovered via sys.prefix.
+COPY --from=builder /opt/venv /opt/venv

-# Copy NLTK data (punkt_tab tokenizer) from builder stage
-COPY --from=builder /root/nltk_data /root/nltk_data
-
-# Make sure scripts in .local are available
-ENV PATH=/root/.local/bin:$PATH
+# Activate the venv for subsequent RUN/CMD layers.
+ENV VIRTUAL_ENV=/opt/venv \
+    PATH=/opt/venv/bin:$PATH

 # Set Python to not generate .pyc files in runtime
 ENV PYTHONDONTWRITEBYTECODE=1
 # Unbuffered output for better container logging
 ENV PYTHONUNBUFFERED=1

-# Copy application code
-COPY ./api ./api
-COPY ./scripts/start_services_docker.sh ./scripts/start_services_docker.sh
+# Copy application code (chown at copy-time avoids a duplicate /app layer
+# from a later `RUN chown -R`, which would double the on-disk size of /app).
+COPY --chown=dograh:dograh ./api ./api
+COPY --chown=dograh:dograh ./scripts/start_services_docker.sh ./scripts/start_services_docker.sh

 # ts_validator Node deps (built in ts-deps stage with full node:22-slim image).
 # The validator runs as a short-lived subprocess from api/mcp_server/ts_bridge.py.
-COPY --from=ts-deps /ts_validator/node_modules ./api/mcp_server/ts_validator/node_modules
+COPY --from=ts-deps --chown=dograh:dograh /ts_validator/node_modules ./api/mcp_server/ts_validator/node_modules

 # Product documentation — read at runtime by the MCP docs tools
 # (search_dograh_docs / fetch_dograh_doc) so agents can learn Dograh.
-COPY ./docs ./docs
+COPY --chown=dograh:dograh ./docs ./docs

 ENV PYTHONPATH=/app

 # Disable file logging in Docker - logs go to stdout for docker logs
 ENV LOG_TO_FILE=false

+USER dograh
+
 # Expose the port FastAPI will run on
 EXPOSE 8000

 # Run the FastAPI app with uvicorn
-CMD ["./scripts/start_services_docker.sh"]
+CMD ["./scripts/start_services_docker.sh"]
--- a/api/alembic/versions/2159d4ac431a_added_quota_tables.py
+++ b/api/alembic/versions/2159d4ac431a_added_quota_tables.py
@ -18,6 +18,9 @@ branch_labels: Union[str, Sequence[str], None] = None
 depends_on: Union[str, Sequence[str], None] = None


+DEPRECATED_QUOTA_COMMENT = "Deprecated. MPS owns quota and credit ledger state."
+
+
 def upgrade() -> None:
    # ### commands auto generated by Alembic - please adjust! ###
    # 1) Create the `quota_type` enum *before* we add the column that references it.
@ -34,7 +37,12 @@ def upgrade() -> None:
        sa.Column("organization_id", sa.Integer(), nullable=False),
        sa.Column("period_start", sa.DateTime(), nullable=False),
        sa.Column("period_end", sa.DateTime(), nullable=False),
-        sa.Column("quota_dograh_tokens", sa.Integer(), nullable=False),
+        sa.Column(
+            "quota_dograh_tokens",
+            sa.Integer(),
+            nullable=False,
+            comment=DEPRECATED_QUOTA_COMMENT,
+        ),
        sa.Column("used_dograh_tokens", sa.Integer(), nullable=False),
        sa.Column("created_at", sa.DateTime(timezone=True), nullable=True),
        sa.Column("updated_at", sa.DateTime(timezone=True), nullable=True),
@ -63,7 +71,11 @@ def upgrade() -> None:
    op.add_column(
        "organizations",
        sa.Column(
-            "quota_type", quota_type_enum, nullable=False, server_default="monthly"
+            "quota_type",
+            quota_type_enum,
+            nullable=False,
+            server_default="monthly",
+            comment=DEPRECATED_QUOTA_COMMENT,
        ),
    )
    op.add_column(
@ -73,6 +85,7 @@ def upgrade() -> None:
            sa.Integer(),
            nullable=False,
            server_default=sa.text("0"),
+            comment=DEPRECATED_QUOTA_COMMENT,
        ),
    )
    op.add_column(
@ -82,10 +95,17 @@ def upgrade() -> None:
            sa.Integer(),
            nullable=False,
            server_default=sa.text("LEAST(EXTRACT(DAY FROM CURRENT_DATE)::int, 28)"),
+            comment=DEPRECATED_QUOTA_COMMENT,
        ),
    )
    op.add_column(
-        "organizations", sa.Column("quota_start_date", sa.DateTime(), nullable=True)
+        "organizations",
+        sa.Column(
+            "quota_start_date",
+            sa.DateTime(),
+            nullable=True,
+            comment=DEPRECATED_QUOTA_COMMENT,
+        ),
    )
    op.add_column(
        "organizations",
@ -94,6 +114,7 @@ def upgrade() -> None:
            sa.Boolean(),
            nullable=False,
            server_default=sa.text("false"),
+            comment=DEPRECATED_QUOTA_COMMENT,
        ),
    )
    # ### end Alembic commands ###
--- a/api/alembic/versions/384be6596b36_make_email_case_insensitive.py
+++ b/api/alembic/versions/384be6596b36_make_email_case_insensitive.py
@ -0,0 +1,42 @@
+"""make email case insensitive
+
+Revision ID: 384be6596b36
+Revises: 6bd9f67ec994
+Create Date: 2026-06-02 07:58:00.002359
+
+"""
+
+from typing import Sequence, Union
+
+import sqlalchemy as sa
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision: str = "384be6596b36"
+down_revision: Union[str, None] = "6bd9f67ec994"
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def upgrade() -> None:
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_index(op.f("ix_users_email"), table_name="users")
+    op.create_index(
+        "ix_users_email_lower",
+        "users",
+        [sa.literal_column("lower(email)")],
+        unique=True,
+        postgresql_where=sa.text("email IS NOT NULL"),
+    )
+    # ### end Alembic commands ###
+
+
+def downgrade() -> None:
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_index(
+        "ix_users_email_lower",
+        table_name="users",
+        postgresql_where=sa.text("email IS NOT NULL"),
+    )
+    op.create_index(op.f("ix_users_email"), "users", ["email"], unique=True)
+    # ### end Alembic commands ###
--- a/api/alembic/versions/91cc6ba3e1c7_add_key_to_user_configurations.py
+++ b/api/alembic/versions/91cc6ba3e1c7_add_key_to_user_configurations.py
@ -0,0 +1,52 @@
+"""add key to user_configurations
+
+Turns user_configurations into a per-user keyed JSON store mirroring
+organization_configurations. Existing rows (the legacy v1 AI model
+configuration blob) are backfilled with key MODEL_CONFIGURATION.
+
+Revision ID: 91cc6ba3e1c7
+Revises: efe356f488f9
+Create Date: 2026-06-12 21:04:25.561529
+
+"""
+
+from typing import Sequence, Union
+
+import sqlalchemy as sa
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision: str = "91cc6ba3e1c7"
+down_revision: Union[str, None] = "efe356f488f9"
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def upgrade() -> None:
+    # Backfill existing rows (all legacy model-config blobs) via the server
+    # default, then drop the default — application code always supplies key.
+    op.add_column(
+        "user_configurations",
+        sa.Column(
+            "key",
+            sa.String(),
+            nullable=False,
+            server_default="MODEL_CONFIGURATION",
+        ),
+    )
+
+    op.create_unique_constraint(
+        "_user_configuration_key_uc", "user_configurations", ["user_id", "key"]
+    )
+    op.alter_column("user_configurations", "key", server_default=None)
+
+
+def downgrade() -> None:
+    op.drop_constraint(
+        "_user_configuration_key_uc", "user_configurations", type_="unique"
+    )
+    # Non-model-config rows (e.g. ONBOARDING) have no meaning in the old
+    # single-blob schema; the old code would read them as the user's model
+    # config, so they must not survive the downgrade.
+    op.execute("DELETE FROM user_configurations WHERE key != 'MODEL_CONFIGURATION'")
+    op.drop_column("user_configurations", "key")
--- a/api/alembic/versions/c425d3445750_add_columns_in_usage_table.py
+++ b/api/alembic/versions/c425d3445750_add_columns_in_usage_table.py
@ -18,6 +18,9 @@ branch_labels: Union[str, Sequence[str], None] = None
 depends_on: Union[str, Sequence[str], None] = None


+DEPRECATED_QUOTA_COMMENT = "Deprecated. MPS owns quota and credit ledger state."
+
+
 def upgrade() -> None:
    # ### commands auto generated by Alembic - please adjust! ###
    op.add_column(
@ -26,7 +29,12 @@ def upgrade() -> None:
    )
    op.add_column(
        "organization_usage_cycles",
-        sa.Column("quota_amount_usd", sa.Float(), nullable=True),
+        sa.Column(
+            "quota_amount_usd",
+            sa.Float(),
+            nullable=True,
+            comment=DEPRECATED_QUOTA_COMMENT,
+        ),
    )
    # ### end Alembic commands ###

--- a/api/alembic/versions/efe356f488f9_add_extra_column_in_workflow_runs.py
+++ b/api/alembic/versions/efe356f488f9_add_extra_column_in_workflow_runs.py
@ -0,0 +1,34 @@
+"""add extra column in workflow runs
+
+Revision ID: efe356f488f9
+Revises: 384be6596b36
+Create Date: 2026-06-16 12:24:30.081058
+
+"""
+
+from typing import Sequence, Union
+
+import sqlalchemy as sa
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision: str = "efe356f488f9"
+down_revision: Union[str, None] = "384be6596b36"
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def upgrade() -> None:
+    op.add_column(
+        "workflow_runs",
+        sa.Column(
+            "extra",
+            sa.JSON(),
+            server_default=sa.text("'{}'::json"),
+            nullable=False,
+        ),
+    )
+
+
+def downgrade() -> None:
+    op.drop_column("workflow_runs", "extra")
--- a/api/app.py
+++ b/api/app.py
@ -2,7 +2,12 @@

 import sentry_sdk

-from api.constants import DEPLOYMENT_MODE, ENABLE_TELEMETRY, SENTRY_DSN
+from api.constants import (
+    CORS_ALLOWED_ORIGINS,
+    DEPLOYMENT_MODE,
+    ENABLE_TELEMETRY,
+    SENTRY_DSN,
+)
 from api.logging_config import ENVIRONMENT, setup_logging

 # Set up logging and get the listener for cleanup
@ -83,15 +88,44 @@ app = FastAPI(
 )


-# Configure CORS
+# Configure CORS.
+# OSS is typically deployed with UI and API behind a single reverse proxy
+# (same-origin, so CORS does not apply). Keep it permissive without
+# credentials — wildcard + credentials is rejected by browsers and unsafe.
+# SaaS deployments must set CORS_ALLOWED_ORIGINS to an explicit allowlist.
+if DEPLOYMENT_MODE == "oss":
+    cors_origins: list[str] = ["*"]
+    cors_allow_credentials = False
+else:
+    if not CORS_ALLOWED_ORIGINS:
+        raise RuntimeError(
+            "CORS_ALLOWED_ORIGINS must be set to an explicit origin allowlist "
+            "when DEPLOYMENT_MODE != 'oss'"
+        )
+    if "*" in CORS_ALLOWED_ORIGINS:
+        raise RuntimeError(
+            "CORS_ALLOWED_ORIGINS cannot contain '*' with credentialed requests"
+        )
+    cors_origins = CORS_ALLOWED_ORIGINS
+    cors_allow_credentials = True
+
 app.add_middleware(
    CORSMiddleware,
-    allow_origins=["*"],  # Allows all origins
-    allow_credentials=True,
-    allow_methods=["*"],  # Allows all methods
-    allow_headers=["*"],  # Allows all headers
+    allow_origins=cors_origins,
+    allow_credentials=cors_allow_credentials,
+    allow_methods=["*"],
+    allow_headers=["*"],
 )

+
+def _add_public_embed_cors_middleware() -> None:
+    from api.routes.public_embed import PublicEmbedCORSMiddleware
+
+    app.add_middleware(PublicEmbedCORSMiddleware, api_prefix=API_PREFIX)
+
+
+_add_public_embed_cors_middleware()
+
 api_router = APIRouter()

 # include subrouters here
--- a/api/conftest.py
+++ b/api/conftest.py
@ -30,6 +30,11 @@ import sys
 import loguru
 import pytest

+REPO_ROOT = Path(__file__).resolve().parents[1]
+SDK_PY_SRC = REPO_ROOT / "sdk" / "python" / "src"
+if str(SDK_PY_SRC) not in sys.path:
+    sys.path.insert(0, str(SDK_PY_SRC))
+
 from api.constants import APP_ROOT_DIR  # noqa: E402


--- a/api/constants.py
+++ b/api/constants.py
@ -19,23 +19,54 @@ LANGFUSE_PUBLIC_KEY = os.getenv("LANGFUSE_PUBLIC_KEY")
 LANGFUSE_SECRET_KEY = os.getenv("LANGFUSE_SECRET_KEY")

 # URLs for deployment
-BACKEND_API_ENDPOINT = os.getenv("BACKEND_API_ENDPOINT", "http://localhost:8000")
+#
+# PUBLIC_BASE_URL is the single canonical origin a deployment is reached at
+# (scheme + host, e.g. https://203-0-113-10.sslip.io). For a standard single-host
+# install it is the only endpoint value an operator sets — the per-subsystem URLs
+# below derive from it (and from PUBLIC_HOST for the TURN/ICE host). Each derived
+# var can still be set explicitly to override it for a split deployment.
+PUBLIC_BASE_URL = os.getenv("PUBLIC_BASE_URL") or None
+PUBLIC_HOST = os.getenv("PUBLIC_HOST") or None
+
+# Public URL the backend builds webhook/callback/embed links from. Derives from
+# PUBLIC_BASE_URL (public IP / domain), falling back to localhost for local dev.
+# When this is a non-public address (localhost or a private/reserved IP) the host
+# isn't reachable from the internet, so get_backend_endpoints() resolves a running
+# Cloudflare tunnel's URL at runtime instead (see api/utils/common.py).
+BACKEND_API_ENDPOINT = (
+    os.getenv("BACKEND_API_ENDPOINT") or PUBLIC_BASE_URL or "http://localhost:8000"
+)
 UI_APP_URL = os.getenv("UI_APP_URL", "http://localhost:3010")

 DATABASE_URL = os.environ["DATABASE_URL"]
 REDIS_URL = os.environ["REDIS_URL"]

 DEPLOYMENT_MODE = os.getenv("DEPLOYMENT_MODE", "oss")
+CORS_ALLOWED_ORIGINS = [
+    o.strip() for o in os.getenv("CORS_ALLOWED_ORIGINS", "").split(",") if o.strip()
+]
 AUTH_PROVIDER = os.getenv("AUTH_PROVIDER", "local")
+# Stack Auth public client config. These are safe to expose to the browser (the
+# publishable client key is public by design, and the project id is non-sensitive),
+# and are served to the UI at runtime via /api/v1/health so the frontend no longer
+# needs them baked into the bundle at build time.
+STACK_AUTH_PROJECT_ID = os.getenv("STACK_AUTH_PROJECT_ID")
+STACK_PUBLISHABLE_CLIENT_KEY = os.getenv("STACK_PUBLISHABLE_CLIENT_KEY")
 DOGRAH_MPS_SECRET_KEY = os.getenv("DOGRAH_MPS_SECRET_KEY", None)
 MPS_API_URL = os.getenv("MPS_API_URL", "https://services.dograh.com")
+DOGRAH_DEVOPS_SECRET = os.getenv("DOGRAH_DEVOPS_SECRET") or None

 # Storage Configuration
 ENABLE_AWS_S3 = os.getenv("ENABLE_AWS_S3", "false").lower() == "true"

 # MinIO Configuration
 MINIO_ENDPOINT = os.getenv("MINIO_ENDPOINT", "localhost:9000")
-MINIO_PUBLIC_ENDPOINT = os.getenv("MINIO_PUBLIC_ENDPOINT")
+# Full URL (scheme + host) browsers use to reach object storage. Derives from
+# PUBLIC_BASE_URL (remote nginx proxies /voice-audio/ to MinIO); set explicitly
+# only to point object storage at a separate origin.
+MINIO_PUBLIC_ENDPOINT = (
+    os.getenv("MINIO_PUBLIC_ENDPOINT") or PUBLIC_BASE_URL or "http://localhost:9000"
+)
 MINIO_ACCESS_KEY = os.getenv("MINIO_ACCESS_KEY", "minioadmin")
 MINIO_SECRET_KEY = os.getenv("MINIO_SECRET_KEY", "minioadmin")
 MINIO_BUCKET = os.getenv("MINIO_BUCKET", "voice-audio")
@ -44,6 +75,17 @@ MINIO_SECURE = os.getenv("MINIO_SECURE", "false").lower() == "true"
 # AWS S3 Configuration
 S3_BUCKET = os.environ.get("S3_BUCKET")
 S3_REGION = os.environ.get("S3_REGION", "us-east-1")
+# Optional overrides for S3-compatible backends (e.g. MinIO, rustfs, Ceph).
+# S3_ENDPOINT_URL: full URL of a custom S3 endpoint (e.g. "https://s3.example.com").
+#   Leave unset to use AWS's default endpoint resolution.
+# S3_SIGNATURE_VERSION: botocore signature version used to sign requests and
+#   presigned URLs. Defaults to None (botocore's default, currently SigV2 for
+#   presigned URLs). Set to "s3v4" for S3-compatible servers that require SigV4.
+# S3_ADDRESSING_STYLE: "auto" (default), "path", or "virtual". Many S3-compatible
+#   servers and TLS setups require "path".
+S3_ENDPOINT_URL = os.environ.get("S3_ENDPOINT_URL")
+S3_SIGNATURE_VERSION = os.environ.get("S3_SIGNATURE_VERSION")
+S3_ADDRESSING_STYLE = os.environ.get("S3_ADDRESSING_STYLE")

 # Sentry configuration
 SENTRY_DSN = os.getenv("SENTRY_DSN")
@ -64,7 +106,7 @@ LOG_LEVEL = os.getenv("LOG_LEVEL", "DEBUG").upper()
 LOG_ROTATION_SIZE = os.getenv("LOG_ROTATION_SIZE", "100 MB")
 LOG_RETENTION = os.getenv("LOG_RETENTION", "7 days")
 LOG_COMPRESSION = os.getenv("LOG_COMPRESSION", "gz")
-ENABLE_TELEMETRY = os.getenv("ENABLE_TELEMETRY", "false").lower() == "true"
+ENABLE_TELEMETRY = os.getenv("ENABLE_TELEMETRY", "true").lower() == "true"


 def _get_version() -> str:
@ -129,7 +171,9 @@ DEFAULT_CIRCUIT_BREAKER_CONFIG = {


 TURN_SECRET = os.getenv("TURN_SECRET")
-TURN_HOST = os.getenv("TURN_HOST", "localhost")
+# Host browsers dial for TURN/ICE. Derives from PUBLIC_HOST; set explicitly only
+# when the TURN server runs on a separate host from the app.
+TURN_HOST = os.getenv("TURN_HOST") or PUBLIC_HOST or "localhost"
 TURN_PORT = int(os.getenv("TURN_PORT", "3478"))
 TURN_TLS_PORT = int(os.getenv("TURN_TLS_PORT", "5349"))
 TURN_CREDENTIAL_TTL = int(os.getenv("TURN_CREDENTIAL_TTL", "86400"))
--- a/api/db/campaign_client.py
+++ b/api/db/campaign_client.py
@ -2,13 +2,15 @@ import json
 from datetime import UTC, datetime
 from typing import Any, Dict, List, Optional

-from sqlalchemy import func, text
+from sqlalchemy import func, text, update
 from sqlalchemy.future import select

 from api.db.base_client import BaseDBClient
 from api.db.filters import apply_workflow_run_filters, get_workflow_run_order_clause
 from api.db.models import CampaignModel, QueuedRunModel, WorkflowRunModel
 from api.schemas.workflow import WorkflowRunResponseSchema
+from api.services.workflow.run_usage_response import format_public_cost_info
+from api.utils.recording_artifacts import get_recording_storage_key


 class CampaignClient(BaseDBClient):
@ -44,9 +46,11 @@ class CampaignClient(BaseDBClient):
                source_id=source_id,
                created_by=user_id,
                organization_id=organization_id,
-                retry_config=retry_config
-                if retry_config
-                else CampaignModel.retry_config.default.arg,
+                retry_config=(
+                    retry_config
+                    if retry_config
+                    else CampaignModel.retry_config.default.arg
+                ),
                orchestrator_metadata=orchestrator_metadata,
                telephony_configuration_id=telephony_configuration_id,
            )
@ -215,26 +219,15 @@ class CampaignClient(BaseDBClient):
                        "is_completed": run.is_completed,
                        "recording_url": run.recording_url,
                        "transcript_url": run.transcript_url,
-                        "cost_info": {
-                            "dograh_token_usage": (
-                                run.cost_info.get("dograh_token_usage")
-                                if run.cost_info
-                                and "dograh_token_usage" in run.cost_info
-                                else round(
-                                    float(run.cost_info.get("total_cost_usd", 0)) * 100,
-                                    2,
-                                )
-                                if run.cost_info and "total_cost_usd" in run.cost_info
-                                else 0
-                            ),
-                            "call_duration_seconds": int(
-                                round(run.cost_info.get("call_duration_seconds") or 0)
-                            )
-                            if run.cost_info
-                            else None,
-                        }
-                        if run.cost_info
-                        else None,
+                        "user_recording_url": get_recording_storage_key(
+                            run.extra, "user"
+                        ),
+                        "bot_recording_url": get_recording_storage_key(
+                            run.extra, "bot"
+                        ),
+                        "cost_info": format_public_cost_info(
+                            run.cost_info, run.usage_info
+                        ),
                        "definition_id": run.definition_id,
                        "initial_context": run.initial_context,
                        "gathered_context": run.gathered_context,
@ -286,9 +279,11 @@ class CampaignClient(BaseDBClient):
                source_id=parent_campaign.source_id,
                created_by=parent_campaign.created_by,
                organization_id=parent_campaign.organization_id,
-                retry_config=retry_config
-                if retry_config
-                else CampaignModel.retry_config.default.arg,
+                retry_config=(
+                    retry_config
+                    if retry_config
+                    else CampaignModel.retry_config.default.arg
+                ),
                orchestrator_metadata=child_meta,
                rate_limit_per_second=parent_campaign.rate_limit_per_second,
                total_rows=len(queued_runs_data),
@ -354,8 +349,7 @@ class CampaignClient(BaseDBClient):
        # Retries create new queued_runs with suffixed source_uuids linked via
        # parent_queued_run_id, so group by the ROOT queued_run using a
        # recursive walk and pick the latest workflow_run across the tree.
-        sql = text(
-            f"""
+        sql = text(f"""
            WITH RECURSIVE run_tree AS (
                SELECT id AS root_id, id AS run_id
                FROM queued_runs
@ -382,8 +376,7 @@ class CampaignClient(BaseDBClient):
            JOIN latest_run_per_root lr ON lr.root_id = q0.id
            WHERE q0.campaign_id = :cid
              AND ({tag_filter})
-            """
-        )
+            """)

        async with self.async_session() as session:
            result = await session.execute(sql, {"cid": campaign_id})
@ -466,6 +459,63 @@ class CampaignClient(BaseDBClient):
                await session.rollback()
                raise

+    async def increment_campaign_metadata_counter(
+        self, campaign_id: int, key: str
+    ) -> int:
+        """Atomically increment an integer field in campaign orchestrator_metadata."""
+        async with self.async_session() as session:
+            result = await session.execute(
+                text(
+                    "UPDATE campaigns "
+                    "SET orchestrator_metadata = ("
+                    "        COALESCE(orchestrator_metadata::jsonb, '{}'::jsonb) "
+                    "        || jsonb_build_object("
+                    "            :key, "
+                    "            COALESCE((orchestrator_metadata::jsonb ->> :key)::int, 0) + 1"
+                    "        )"
+                    "    )::json, "
+                    "    updated_at = :now "
+                    "WHERE id = :campaign_id "
+                    "RETURNING (orchestrator_metadata::jsonb ->> :key)::int"
+                ),
+                {
+                    "campaign_id": campaign_id,
+                    "key": key,
+                    "now": datetime.now(UTC),
+                },
+            )
+            attempt = result.scalar_one()
+            try:
+                await session.commit()
+            except Exception:
+                await session.rollback()
+                raise
+            return attempt
+
+    async def reset_campaign_metadata_counter(self, campaign_id: int, key: str) -> None:
+        """Remove a counter field from campaign orchestrator_metadata."""
+        async with self.async_session() as session:
+            await session.execute(
+                text(
+                    "UPDATE campaigns "
+                    "SET orchestrator_metadata = ("
+                    "        COALESCE(orchestrator_metadata::jsonb, '{}'::jsonb) - :key"
+                    "    )::json, "
+                    "    updated_at = :now "
+                    "WHERE id = :campaign_id"
+                ),
+                {
+                    "campaign_id": campaign_id,
+                    "key": key,
+                    "now": datetime.now(UTC),
+                },
+            )
+            try:
+                await session.commit()
+            except Exception:
+                await session.rollback()
+                raise
+
    # QueuedRun methods
    async def bulk_create_queued_runs(self, queued_runs_data: list[dict]) -> None:
        """Bulk create queued runs"""
@ -501,6 +551,35 @@ class CampaignClient(BaseDBClient):
            await session.refresh(queued_run)
            return queued_run

+    async def return_processing_queued_runs_without_workflow(
+        self, queued_run_ids: list[int]
+    ) -> int:
+        """Return claimed queued_runs to queued if no workflow was created for them."""
+        if not queued_run_ids:
+            return 0
+
+        workflow_exists = (
+            select(WorkflowRunModel.id)
+            .where(WorkflowRunModel.queued_run_id == QueuedRunModel.id)
+            .exists()
+        )
+        async with self.async_session() as session:
+            result = await session.execute(
+                update(QueuedRunModel)
+                .where(
+                    QueuedRunModel.id.in_(queued_run_ids),
+                    QueuedRunModel.state == "processing",
+                    ~workflow_exists,
+                )
+                .values(state="queued")
+            )
+            try:
+                await session.commit()
+            except Exception:
+                await session.rollback()
+                raise
+            return result.rowcount or 0
+
    async def count_queued_runs(
        self, campaign_id: int, state: Optional[str] = None
    ) -> int:
@ -576,7 +655,7 @@ class CampaignClient(BaseDBClient):
        async with self.async_session() as session:
            conditions = [
                WorkflowRunModel.is_completed.is_(True),
-                WorkflowRunModel.cost_info["call_duration_seconds"]
+                WorkflowRunModel.usage_info["call_duration_seconds"]
                .as_string()
                .isnot(None),
            ]
@ -599,6 +678,7 @@ class CampaignClient(BaseDBClient):
                    WorkflowRunModel.initial_context,
                    WorkflowRunModel.gathered_context,
                    WorkflowRunModel.cost_info,
+                    WorkflowRunModel.usage_info,
                    WorkflowRunModel.public_access_token,
                )
                .where(*conditions)
--- a/api/db/db_client.py
+++ b/api/db/db_client.py
@ -53,7 +53,7 @@ class DBClient(
    - UserClient: handles user and user configuration operations
    - OrganizationClient: handles organization operations
    - OrganizationConfigurationClient: handles organization configuration operations
-    - OrganizationUsageClient: handles organization usage and quota operations
+    - OrganizationUsageClient: handles organization usage reporting aggregates
    - IntegrationClient: handles integration operations
    - WorkflowTemplateClient: handles workflow template operations
    - CampaignClient: handles campaign operations
--- a/api/db/filters.py
+++ b/api/db/filters.py
@ -25,7 +25,7 @@ def get_workflow_run_order_clause(
    """
    # Determine sort column
    if sort_by == "duration":
-        sort_column = WorkflowRunModel.cost_info.op("->>")(
+        sort_column = WorkflowRunModel.usage_info.op("->>")(
            "call_duration_seconds"
        ).cast(Float)
    else:
@ -43,7 +43,7 @@ def get_workflow_run_order_clause(
 ATTRIBUTE_FIELD_MAPPING = {
    "dateRange": "created_at",
    "dispositionCode": "gathered_context.mapped_call_disposition",
-    "duration": "cost_info.call_duration_seconds",
+    "duration": "usage_info.call_duration_seconds",
    "status": "is_completed",
    "tokenUsage": "cost_info.total_cost_usd",
    "runId": "id",
@ -208,7 +208,7 @@ def apply_workflow_run_filters(
                min_val = value.get("min")
                max_val = value.get("max")

-                if field == "cost_info.call_duration_seconds":
+                if field == "usage_info.call_duration_seconds":
                    # Use ->> operator for compatibility with all PostgreSQL versions
                    # (subscript [] only works in PostgreSQL 14+)
                    duration_text = cast(WorkflowRunModel.usage_info, JSONB).op("->>")(
--- a/api/db/knowledge_base_client.py
+++ b/api/db/knowledge_base_client.py
@ -5,7 +5,7 @@ from pathlib import Path
 from typing import List, Optional

 from loguru import logger
-from sqlalchemy import select
+from sqlalchemy import delete, select
 from sqlalchemy.orm import selectinload

 from api.db.base_client import BaseDBClient
@ -300,6 +300,31 @@ class KnowledgeBaseClient(BaseDBClient):
            logger.info(f"Created {len(chunks)} chunks")
            return chunks

+    async def replace_chunks_for_document(
+        self,
+        document_id: int,
+        organization_id: int,
+        chunks: List[KnowledgeBaseChunkModel],
+    ) -> List[KnowledgeBaseChunkModel]:
+        """Replace all chunks for a document with a new precomputed batch."""
+        async with self.async_session() as session:
+            await session.execute(
+                delete(KnowledgeBaseChunkModel).where(
+                    KnowledgeBaseChunkModel.document_id == document_id,
+                    KnowledgeBaseChunkModel.organization_id == organization_id,
+                )
+            )
+            session.add_all(chunks)
+            await session.commit()
+
+            for chunk in chunks:
+                await session.refresh(chunk)
+
+            logger.info(
+                f"Replaced chunks for document {document_id}: {len(chunks)} chunks"
+            )
+            return chunks
+
    async def get_chunks_for_document(
        self,
        document_id: int,
--- a/api/db/models.py
+++ b/api/db/models.py
@ -17,6 +17,7 @@ from sqlalchemy import (
    Text,
    UniqueConstraint,
    and_,
+    func,
    text,
 )
 from sqlalchemy.orm import declarative_base, relationship
@ -67,17 +68,38 @@ class UserModel(Base):
        back_populates="users",
    )
    is_superuser = Column(Boolean, default=False)
-    email = Column(String, unique=True, index=True, nullable=True)
+    email = Column(String, nullable=True)
    password_hash = Column(String, nullable=True)

+    __table_args__ = (
+        Index(
+            "ix_users_email_lower",
+            func.lower(email),
+            unique=True,
+            postgresql_where=text("email IS NOT NULL"),
+        ),
+    )
+

 class UserConfigurationModel(Base):
+    """Per-user keyed JSON store, mirroring organization_configurations.
+
+    Keys are defined in UserConfigurationKey. The legacy v1 AI model
+    configuration lives under MODEL_CONFIGURATION; last_validated_at is only
+    meaningful for that key.
+    """
+
    __tablename__ = "user_configurations"
    id = Column(Integer, primary_key=True, index=True)
    user_id = Column(Integer, ForeignKey("users.id"), nullable=True)
+    key = Column(String, nullable=False)
    configuration = Column(JSON, nullable=False, default=dict)
    last_validated_at = Column(DateTime(timezone=True), nullable=True)

+    __table_args__ = (
+        UniqueConstraint("user_id", "key", name="_user_configuration_key_uc"),
+    )
+

 # New Organization model
 class OrganizationModel(Base):
@ -87,22 +109,44 @@ class OrganizationModel(Base):
    provider_id = Column(String, unique=True, index=True, nullable=False)
    created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(UTC))

-    # Quota fields
+    # Deprecated: MPS owns quota and credit ledger state.
    quota_type = Column(
        Enum("monthly", "annual", name="quota_type"),
        nullable=False,
        default="monthly",
        server_default=text("'monthly'::quota_type"),
+        comment="Deprecated. MPS owns quota and credit ledger state.",
+        info={"deprecated": True},
    )
    quota_dograh_tokens = Column(
-        Integer, nullable=False, default=0, server_default=text("0")
+        Integer,
+        nullable=False,
+        default=0,
+        server_default=text("0"),
+        comment="Deprecated. MPS owns quota and credit ledger state.",
+        info={"deprecated": True},
    )
    quota_reset_day = Column(
-        Integer, nullable=False, default=1, server_default=text("1")
-    )  # 1-28, only for monthly
-    quota_start_date = Column(DateTime(timezone=True), nullable=True)  # Only for annual
+        Integer,
+        nullable=False,
+        default=1,
+        server_default=text("1"),
+        comment="Deprecated. MPS owns quota and credit ledger state.",
+        info={"deprecated": True},
+    )
+    quota_start_date = Column(
+        DateTime(timezone=True),
+        nullable=True,
+        comment="Deprecated. MPS owns quota and credit ledger state.",
+        info={"deprecated": True},
+    )
    quota_enabled = Column(
-        Boolean, nullable=False, default=False, server_default=text("false")
+        Boolean,
+        nullable=False,
+        default=False,
+        server_default=text("false"),
+        comment="Deprecated. MPS owns quota and credit ledger state.",
+        info={"deprecated": True},
    )

    price_per_second_usd = Column(Float, nullable=True)
@ -500,6 +544,9 @@ class WorkflowRunModel(Base):
    is_completed = Column(Boolean, default=False)
    recording_url = Column(String, nullable=True)
    transcript_url = Column(String, nullable=True)
+    extra = Column(
+        JSON, nullable=False, default=dict, server_default=text("'{}'::json")
+    )
    # Store storage backend as string enum (s3, minio)
    storage_backend = Column(
        Enum("s3", "minio", name="storage_backend"),
@ -583,8 +630,9 @@ class WorkflowRunTextSessionModel(Base):

 class OrganizationUsageCycleModel(Base):
    """
-    This model is used to track the usage of Dograh tokens for an organization for a given usage
-    cycle.
+    This model is used to track reporting aggregates for an organization for a given
+    usage cycle. Quota fields on this model are deprecated; MPS owns quota and
+    credit ledger state.
    """

    __tablename__ = "organization_usage_cycles"
@ -593,14 +641,24 @@ class OrganizationUsageCycleModel(Base):
    organization_id = Column(Integer, ForeignKey("organizations.id"), nullable=False)
    period_start = Column(DateTime(timezone=True), nullable=False)
    period_end = Column(DateTime(timezone=True), nullable=False)
-    quota_dograh_tokens = Column(Integer, nullable=False)
+    quota_dograh_tokens = Column(
+        Integer,
+        nullable=False,
+        comment="Deprecated. MPS owns quota and credit ledger state.",
+        info={"deprecated": True},
+    )
    used_dograh_tokens = Column(Float, nullable=False, default=0)
    total_duration_seconds = Column(
        Integer, nullable=False, default=0, server_default=text("0")
    )
    # New USD tracking fields
    used_amount_usd = Column(Float, nullable=True, default=0)
-    quota_amount_usd = Column(Float, nullable=True)
+    quota_amount_usd = Column(
+        Float,
+        nullable=True,
+        comment="Deprecated. MPS owns quota and credit ledger state.",
+        info={"deprecated": True},
+    )
    created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(UTC))
    updated_at = Column(
        DateTime(timezone=True),
--- a/api/db/organization_usage_client.py
+++ b/api/db/organization_usage_client.py
@ -10,6 +10,7 @@ from sqlalchemy.orm import joinedload
 from api.db.base_client import BaseDBClient
 from api.db.filters import apply_workflow_run_filters
 from api.db.models import (
+    OrganizationConfigurationModel,
    OrganizationModel,
    OrganizationUsageCycleModel,
    UserConfigurationModel,
@ -17,11 +18,13 @@ from api.db.models import (
    WorkflowModel,
    WorkflowRunModel,
 )
-from api.schemas.user_configuration import UserConfiguration
+from api.enums import OrganizationConfigurationKey, UserConfigurationKey
+from api.schemas.ai_model_configuration import EffectiveAIModelConfiguration
+from api.utils.recording_artifacts import get_recording_storage_key


 class OrganizationUsageClient(BaseDBClient):
-    """Client for managing organization usage and quota operations."""
+    """Client for managing organization usage reporting aggregates."""

    async def get_or_create_current_cycle(
        self, organization_id: int, session=None
@ -47,14 +50,7 @@ class OrganizationUsageClient(BaseDBClient):
        self, organization_id: int, session, commit: bool
    ) -> OrganizationUsageCycleModel:
        """Internal implementation for get_or_create_current_cycle."""
-        # Get organization to determine quota type
-        org_result = await session.execute(
-            select(OrganizationModel).where(OrganizationModel.id == organization_id)
-        )
-        org = org_result.scalar_one()
-
-        # Calculate current period
-        period_start, period_end = self._calculate_current_period(org)
+        period_start, period_end = self._calculate_current_period()

        # Try to get existing cycle
        cycle_result = await session.execute(
@ -76,7 +72,8 @@ class OrganizationUsageClient(BaseDBClient):
            organization_id=organization_id,
            period_start=period_start,
            period_end=period_end,
-            quota_dograh_tokens=org.quota_dograh_tokens,
+            # Deprecated non-null column retained for historical schema compatibility.
+            quota_dograh_tokens=0,
        )
        # Handle concurrent inserts gracefully
        stmt = stmt.on_conflict_do_nothing(
@ -100,95 +97,9 @@ class OrganizationUsageClient(BaseDBClient):
        )
        return cycle_result.scalar_one()

-    async def check_and_reserve_quota(
-        self, organization_id: int, estimated_tokens: int = 0
-    ) -> bool:
-        """
-        Check if organization has sufficient quota and optionally reserve tokens.
-        Returns True if quota is available, False otherwise.
-
-        This method is fully atomic and safe for concurrent access from multiple processes.
-        """
-        async with self.async_session() as session:
-            # Get organization
-            org_result = await session.execute(
-                select(OrganizationModel).where(OrganizationModel.id == organization_id)
-            )
-            org = org_result.scalar_one_or_none()
-
-            if not org or not org.quota_enabled:
-                # No quota enforcement if not enabled
-                return True
-
-            # Get or create current cycle within the same session/transaction
-            cycle = await self._get_or_create_current_cycle_impl(
-                organization_id, session, commit=False
-            )
-
-            # Atomic check and update with row-level lock
-            result = await session.execute(
-                select(OrganizationUsageCycleModel)
-                .where(
-                    and_(
-                        OrganizationUsageCycleModel.id == cycle.id,
-                        OrganizationUsageCycleModel.used_dograh_tokens
-                        + estimated_tokens
-                        <= OrganizationUsageCycleModel.quota_dograh_tokens,
-                    )
-                )
-                .with_for_update(skip_locked=False)
-            )
-
-            cycle_locked = result.scalar_one_or_none()
-            if cycle_locked:
-                # Update the usage atomically
-                cycle_locked.used_dograh_tokens += estimated_tokens
-                await session.commit()
-                return True
-
-            return False
-
-    async def update_usage_after_run(
-        self,
-        organization_id: int,
-        actual_tokens: float,
-        duration_seconds: float = 0,
-        charge_usd: float | None = None,
-    ) -> None:
-        """Update usage after a workflow run completes with actual token count and duration.
-
-        This method is fully atomic and safe for concurrent access from multiple processes.
-        """
-        async with self.async_session() as session:
-            # Get or create current cycle within the same session/transaction
-            cycle = await self._get_or_create_current_cycle_impl(
-                organization_id, session, commit=False
-            )
-
-            # Acquire a row-level lock for atomic update
-            result = await session.execute(
-                select(OrganizationUsageCycleModel)
-                .where(OrganizationUsageCycleModel.id == cycle.id)
-                .with_for_update(skip_locked=False)
-            )
-            cycle_locked = result.scalar_one()
-
-            # Update usage atomically
-            cycle_locked.used_dograh_tokens += actual_tokens
-            cycle_locked.total_duration_seconds += int(round(duration_seconds))
-
-            # Update USD amount if provided
-            if charge_usd is not None:
-                if cycle_locked.used_amount_usd is None:
-                    cycle_locked.used_amount_usd = 0
-                cycle_locked.used_amount_usd += charge_usd
-
-            await session.commit()
-
    async def get_current_usage(self, organization_id: int) -> dict:
-        """Get current period usage information."""
+        """Get current reporting-period usage information."""
        async with self.async_session() as session:
-            # Get organization
            org_result = await session.execute(
                select(OrganizationModel).where(OrganizationModel.id == organization_id)
            )
@ -199,42 +110,19 @@ class OrganizationUsageClient(BaseDBClient):
                organization_id, session, commit=False
            )

-            # Calculate next refresh date
-            if org.quota_type == "monthly":
-                next_refresh = cycle.period_end + relativedelta(days=1)
-            else:  # annual
-                next_refresh = cycle.period_end + relativedelta(days=1)
-
            result = {
                "period_start": cycle.period_start.isoformat(),
                "period_end": cycle.period_end.isoformat(),
                "used_dograh_tokens": cycle.used_dograh_tokens,
-                "quota_dograh_tokens": cycle.quota_dograh_tokens,
-                "percentage_used": (
-                    round(
-                        (cycle.used_dograh_tokens / cycle.quota_dograh_tokens) * 100, 2
-                    )
-                    if cycle.quota_dograh_tokens > 0
-                    else 0
-                ),
-                "next_refresh_date": next_refresh.date().isoformat(),
-                "quota_enabled": org.quota_enabled,
                "total_duration_seconds": cycle.total_duration_seconds,
            }

            # Add USD fields if organization has pricing
            if org.price_per_second_usd is not None:
                result["used_amount_usd"] = cycle.used_amount_usd or 0
-                result["quota_amount_usd"] = cycle.quota_amount_usd
                result["currency"] = "USD"
                result["price_per_second_usd"] = org.price_per_second_usd

-                # Calculate percentage based on USD if available
-                if cycle.quota_amount_usd and cycle.quota_amount_usd > 0:
-                    result["percentage_used"] = round(
-                        ((cycle.used_amount_usd or 0) / cycle.quota_amount_usd) * 100, 2
-                    )
-
            return result

    async def get_usage_history(
@ -254,7 +142,7 @@ class OrganizationUsageClient(BaseDBClient):
                .join(UserModel, WorkflowModel.user_id == UserModel.id)
                .where(
                    UserModel.selected_organization_id == organization_id,
-                    WorkflowRunModel.cost_info.isnot(None),
+                    WorkflowRunModel.usage_info.isnot(None),
                )
                .order_by(WorkflowRunModel.created_at.desc())
            )
@ -307,19 +195,8 @@ class OrganizationUsageClient(BaseDBClient):
            total_tokens = 0
            total_duration_seconds = 0
            for run in runs:
-                if run.cost_info:
-                    # Try to get dograh_token_usage first (new format)
-                    dograh_tokens = run.cost_info.get("dograh_token_usage", 0)
-                    # If not present, calculate from total_cost_usd (old format)
-                    if dograh_tokens == 0 and "total_cost_usd" in run.cost_info:
-                        dograh_tokens = round(
-                            float(run.cost_info["total_cost_usd"]) * 100, 2
-                        )
-                    # Get call duration
-                    call_duration = run.cost_info.get("call_duration_seconds", 0)
-                else:
-                    dograh_tokens = 0
-                    call_duration = 0
+                dograh_tokens = 0
+                call_duration = (run.usage_info or {}).get("call_duration_seconds", 0)
                total_tokens += dograh_tokens
                total_duration_seconds += int(round(call_duration))

@ -350,6 +227,10 @@ class OrganizationUsageClient(BaseDBClient):
                    "call_duration_seconds": int(round(call_duration)),
                    "recording_url": run.recording_url,
                    "transcript_url": run.transcript_url,
+                    "user_recording_url": get_recording_storage_key(run.extra, "user"),
+                    "bot_recording_url": get_recording_storage_key(run.extra, "bot"),
+                    "extra": run.extra,
+                    "public_access_token": run.public_access_token,
                    "phone_number": phone_number,
                    "caller_number": caller_number,
                    "called_number": called_number,
@ -392,13 +273,14 @@ class OrganizationUsageClient(BaseDBClient):
                    WorkflowRunModel.initial_context,
                    WorkflowRunModel.gathered_context,
                    WorkflowRunModel.cost_info,
+                    WorkflowRunModel.usage_info,
                    WorkflowRunModel.public_access_token,
                )
                .join(WorkflowModel, WorkflowRunModel.workflow_id == WorkflowModel.id)
                .join(UserModel, WorkflowModel.user_id == UserModel.id)
                .where(
                    UserModel.selected_organization_id == organization_id,
-                    WorkflowRunModel.cost_info.isnot(None),
+                    WorkflowRunModel.usage_info.isnot(None),
                )
                .order_by(WorkflowRunModel.created_at.desc())
            )
@ -439,21 +321,44 @@ class OrganizationUsageClient(BaseDBClient):
        """Get daily usage breakdown for an organization with pricing."""

        async with self.async_session() as session:
-            # Get user timezone if user_id is provided
+            # Get org timezone preference first, then fall back to legacy user config.
            user_timezone = "UTC"  # Default timezone
+            pref_result = await session.execute(
+                select(OrganizationConfigurationModel).where(
+                    OrganizationConfigurationModel.organization_id == organization_id,
+                    OrganizationConfigurationModel.key.in_(
+                        [
+                            OrganizationConfigurationKey.ORGANIZATION_PREFERENCES.value,
+                            OrganizationConfigurationKey.MODEL_CONFIGURATION_PREFERENCES.value,
+                        ]
+                    ),
+                )
+            )
+            pref_rows = pref_result.scalars().all()
+            pref_by_key = {pref.key: pref for pref in pref_rows}
+            pref_obj = pref_by_key.get(
+                OrganizationConfigurationKey.ORGANIZATION_PREFERENCES.value
+            ) or pref_by_key.get(
+                OrganizationConfigurationKey.MODEL_CONFIGURATION_PREFERENCES.value
+            )
+            if pref_obj and pref_obj.value:
+                user_timezone = pref_obj.value.get("timezone") or user_timezone
+
            if user_id:
                config_result = await session.execute(
                    select(UserConfigurationModel).where(
-                        UserConfigurationModel.user_id == user_id
+                        UserConfigurationModel.user_id == user_id,
+                        UserConfigurationModel.key
+                        == UserConfigurationKey.MODEL_CONFIGURATION.value,
                    )
                )
                config_obj = config_result.scalar_one_or_none()
                if config_obj and config_obj.configuration:
-                    user_config = UserConfiguration.model_validate(
+                    effective_config = EffectiveAIModelConfiguration.model_validate(
                        config_obj.configuration
                    )
-                    if user_config.timezone:
-                        user_timezone = user_config.timezone
+                    if effective_config.timezone and user_timezone == "UTC":
+                        user_timezone = effective_config.timezone

            # Validate timezone string
            try:
@ -472,7 +377,7 @@ class OrganizationUsageClient(BaseDBClient):
                select(
                    date_expr.label("date"),
                    func.sum(
-                        WorkflowRunModel.cost_info["call_duration_seconds"].as_float()
+                        WorkflowRunModel.usage_info["call_duration_seconds"].as_float()
                    ).label("total_seconds"),
                    func.count(WorkflowRunModel.id).label("call_count"),
                )
@ -521,83 +426,11 @@ class OrganizationUsageClient(BaseDBClient):
                "currency": "USD",
            }

-    async def update_organization_quota(
-        self,
-        organization_id: int,
-        quota_type: str,
-        quota_dograh_tokens: int,
-        quota_reset_day: Optional[int] = None,
-        quota_start_date: Optional[datetime] = None,
-    ) -> OrganizationModel:
-        """Update organization quota settings."""
-        async with self.async_session() as session:
-            result = await session.execute(
-                select(OrganizationModel).where(OrganizationModel.id == organization_id)
-            )
-            org = result.scalar_one()
-
-            org.quota_type = quota_type
-            org.quota_dograh_tokens = quota_dograh_tokens
-            org.quota_enabled = True
-
-            if quota_type == "monthly" and quota_reset_day:
-                org.quota_reset_day = quota_reset_day
-            elif quota_type == "annual" and quota_start_date:
-                org.quota_start_date = quota_start_date
-
-            await session.commit()
-            await session.refresh(org)
-            return org
-
-    def _calculate_current_period(
-        self, org: OrganizationModel
-    ) -> tuple[datetime, datetime]:
-        """Calculate the current billing period based on organization settings."""
+    def _calculate_current_period(self) -> tuple[datetime, datetime]:
+        """Calculate the current calendar-month reporting period."""
        now = datetime.now(timezone.utc)

-        if org.quota_type == "monthly":
-            # Find the start of the current billing month
-            reset_day = org.quota_reset_day
-
-            # Handle month boundaries
-            if now.day >= reset_day:
-                period_start = now.replace(
-                    day=reset_day, hour=0, minute=0, second=0, microsecond=0
-                )
-            else:
-                # Previous month
-                period_start = (now - relativedelta(months=1)).replace(
-                    day=reset_day, hour=0, minute=0, second=0, microsecond=0
-                )
-
-            # End is one month later minus 1 second
-            period_end = (
-                period_start + relativedelta(months=1) - relativedelta(seconds=1)
-            )
-
-        else:  # annual
-            if not org.quota_start_date:
-                # Default to calendar year
-                period_start = now.replace(
-                    month=1, day=1, hour=0, minute=0, second=0, microsecond=0
-                )
-                period_end = (
-                    period_start + relativedelta(years=1) - relativedelta(seconds=1)
-                )
-            else:
-                # Find current annual period
-                start_date = org.quota_start_date.replace(tzinfo=timezone.utc)
-                years_diff = now.year - start_date.year
-
-                # Adjust for whether we've passed the anniversary
-                if now.month < start_date.month or (
-                    now.month == start_date.month and now.day < start_date.day
-                ):
-                    years_diff -= 1
-
-                period_start = start_date + relativedelta(years=years_diff)
-                period_end = (
-                    period_start + relativedelta(years=1) - relativedelta(seconds=1)
-                )
+        period_start = now.replace(day=1, hour=0, minute=0, second=0, microsecond=0)
+        period_end = period_start + relativedelta(months=1) - relativedelta(seconds=1)

        return period_start, period_end
--- a/api/db/telephony_configuration_client.py
+++ b/api/db/telephony_configuration_client.py
@ -103,6 +103,30 @@ class TelephonyConfigurationClient(BaseDBClient):
            )
            return int(result.scalar() or 0)

+    async def count_vonage_configs_missing_signature_secret(
+        self, organization_id: int
+    ) -> int:
+        """Count Vonage configs in this org with no signature_secret."""
+        async with self.async_session() as session:
+            result = await session.execute(
+                select(func.count(TelephonyConfigurationModel.id)).where(
+                    TelephonyConfigurationModel.organization_id == organization_id,
+                    TelephonyConfigurationModel.provider == "vonage",
+                    (
+                        TelephonyConfigurationModel.credentials.op("->>")(
+                            "signature_secret"
+                        ).is_(None)
+                    )
+                    | (
+                        TelephonyConfigurationModel.credentials.op("->>")(
+                            "signature_secret"
+                        )
+                        == ""
+                    ),
+                )
+            )
+            return int(result.scalar() or 0)
+
    async def list_all_telephony_configurations_by_provider(
        self, provider: str
    ) -> List[TelephonyConfigurationModel]:
--- a/api/db/user_client.py
+++ b/api/db/user_client.py
@ -3,11 +3,14 @@ from datetime import datetime, timezone

 from loguru import logger
 from pydantic import ValidationError
+from sqlalchemy import func
+from sqlalchemy.dialects.postgresql import insert
 from sqlalchemy.future import select

 from api.db.base_client import BaseDBClient
 from api.db.models import UserConfigurationModel, UserModel
-from api.schemas.user_configuration import UserConfiguration
+from api.enums import UserConfigurationKey
+from api.schemas.ai_model_configuration import EffectiveAIModelConfiguration


 class UserClient(BaseDBClient):
@ -27,8 +30,6 @@ class UserClient(BaseDBClient):

            # Use PostgreSQL's INSERT ... ON CONFLICT DO NOTHING
            # This is atomic and handles race conditions at the database level
-            from sqlalchemy.dialects.postgresql import insert
-
            stmt = insert(UserModel.__table__).values(
                provider_id=provider_id,
                created_at=datetime.now(timezone.utc),
@ -64,19 +65,57 @@ class UserClient(BaseDBClient):
            )
            return result.scalars().first()

-    async def get_user_configurations(self, user_id: int) -> UserConfiguration:
-        async with self.async_session() as session:
-            result = await session.execute(
-                select(UserConfigurationModel).where(
-                    UserConfigurationModel.user_id == user_id
-                )
+    async def _get_user_configuration_row(
+        self, session, user_id: int, key: str
+    ) -> UserConfigurationModel | None:
+        result = await session.execute(
+            select(UserConfigurationModel).where(
+                UserConfigurationModel.user_id == user_id,
+                UserConfigurationModel.key == key,
+            )
+        )
+        return result.scalars().first()
+
+    async def get_user_configuration_value(self, user_id: int, key: str) -> dict | None:
+        """Get the JSON value stored for a user under `key`, or None."""
+        async with self.async_session() as session:
+            row = await self._get_user_configuration_row(session, user_id, key)
+            return row.configuration if row else None
+
+    async def upsert_user_configuration_value(
+        self, user_id: int, key: str, value: dict
+    ) -> dict:
+        """Create or update the JSON value stored for a user under `key`."""
+        async with self.async_session() as session:
+            stmt = insert(UserConfigurationModel.__table__).values(
+                user_id=user_id,
+                key=key,
+                configuration=value,
+            )
+            stmt = stmt.on_conflict_do_update(
+                constraint="_user_configuration_key_uc",
+                set_={"configuration": stmt.excluded.configuration},
+            ).returning(UserConfigurationModel.configuration)
+            try:
+                result = await session.execute(stmt)
+                await session.commit()
+            except Exception as e:
+                await session.rollback()
+                raise e
+            return result.scalar_one()
+
+    async def get_user_configurations(
+        self, user_id: int
+    ) -> EffectiveAIModelConfiguration:
+        async with self.async_session() as session:
+            configuration_obj = await self._get_user_configuration_row(
+                session, user_id, UserConfigurationKey.MODEL_CONFIGURATION.value
            )
-            configuration_obj = result.scalars().first()
            if not configuration_obj:
-                return UserConfiguration()
+                return EffectiveAIModelConfiguration()

            try:
-                return UserConfiguration.model_validate(
+                return EffectiveAIModelConfiguration.model_validate(
                    {
                        **configuration_obj.configuration,
                        "last_validated_at": configuration_obj.last_validated_at,
@ -89,41 +128,23 @@ class UserClient(BaseDBClient):
                    f"Failed to validate user configuration for user {user_id}: {e}. "
                    "Returning default configuration."
                )
-                return UserConfiguration()
+                return EffectiveAIModelConfiguration()

    async def update_user_configuration(
-        self, user_id: int, configuration: UserConfiguration
-    ) -> UserConfiguration:
-        async with self.async_session() as session:
-            result = await session.execute(
-                select(UserConfigurationModel).where(
-                    UserConfigurationModel.user_id == user_id
-                )
-            )
-            configuration_obj = result.scalars().first()
-            if not configuration_obj:
-                configuration_obj = UserConfigurationModel(
-                    user_id=user_id, configuration=configuration.model_dump()
-                )
-                session.add(configuration_obj)
-            else:
-                configuration_obj.configuration = configuration.model_dump()
-            try:
-                await session.commit()
-            except Exception as e:
-                await session.rollback()
-                raise e
-            await session.refresh(configuration_obj)
-        return UserConfiguration.model_validate(configuration_obj.configuration)
+        self, user_id: int, configuration: EffectiveAIModelConfiguration
+    ) -> EffectiveAIModelConfiguration:
+        value = await self.upsert_user_configuration_value(
+            user_id,
+            UserConfigurationKey.MODEL_CONFIGURATION.value,
+            configuration.model_dump(),
+        )
+        return EffectiveAIModelConfiguration.model_validate(value)

    async def update_user_configuration_last_validated_at(self, user_id: int) -> None:
        async with self.async_session() as session:
-            result = await session.execute(
-                select(UserConfigurationModel).where(
-                    UserConfigurationModel.user_id == user_id
-                )
+            configuration_obj = await self._get_user_configuration_row(
+                session, user_id, UserConfigurationKey.MODEL_CONFIGURATION.value
            )
-            configuration_obj = result.scalars().first()
            if not configuration_obj:
                raise ValueError(f"User configuration with ID {user_id} not found")
            configuration_obj.last_validated_at = datetime.now()
@ -161,15 +182,26 @@ class UserClient(BaseDBClient):
        async with self.async_session() as session:
            from sqlalchemy import update

-            stmt = update(UserModel).where(UserModel.id == user_id).values(email=email)
+            stmt = (
+                update(UserModel)
+                .where(UserModel.id == user_id)
+                .values(email=email.lower())
+            )
            await session.execute(stmt)
            await session.commit()

    async def get_user_by_email(self, email: str) -> UserModel | None:
-        """Fetch a user by their email address."""
+        """Fetch a user by their email address (case-insensitive).
+
+        Email addresses are case-insensitive in practice, so a user who
+        signed up as "User@example.com" must still be found when they later
+        log in as "user@example.com". Compare on lower(email) so lookups are
+        robust to capitalization differences across sign-in flows.
+        """
+        normalized_email = email.lower()
        async with self.async_session() as session:
            result = await session.execute(
-                select(UserModel).where(UserModel.email == email)
+                select(UserModel).where(func.lower(UserModel.email) == normalized_email)
            )
            return result.scalars().first()

@ -180,7 +212,7 @@ class UserClient(BaseDBClient):
        async with self.async_session() as session:
            user = UserModel(
                provider_id=f"oss_{int(datetime.now(timezone.utc).timestamp())}_{uuid.uuid4()}",
-                email=email,
+                email=email.lower(),
                password_hash=password_hash,
            )
            session.add(user)
--- a/api/db/workflow_run_client.py
+++ b/api/db/workflow_run_client.py
@ -16,6 +16,8 @@ from api.db.models import (
 )
 from api.enums import CallType, StorageBackend
 from api.schemas.workflow import WorkflowRunResponseSchema
+from api.services.workflow.run_usage_response import format_public_cost_info
+from api.utils.recording_artifacts import get_recording_storage_key


 class WorkflowRunClient(BaseDBClient):
@ -91,12 +93,17 @@ class WorkflowRunClient(BaseDBClient):
                else workflow.template_context_variables
            )

+            merged_initial_context = {
+                **(default_context or {}),
+                **(initial_context or {}),
+            }
+
            new_run = WorkflowRunModel(
                name=name,
                workflow=workflow,
                mode=mode,
                definition_id=target_def.id if target_def else None,
-                initial_context=initial_context or default_context,
+                initial_context=merged_initial_context,
                gathered_context=gathered_context or {},
                logs=logs or {},
                campaign_id=campaign_id,
@ -187,13 +194,19 @@ class WorkflowRunClient(BaseDBClient):
                        "workflow_name": run.workflow.name if run.workflow else None,
                        "user_id": run.workflow.user_id if run.workflow else None,
                        "organization_id": organization.id if organization else None,
-                        "organization_name": organization.provider_id
-                        if organization
-                        else None,
+                        "organization_name": (
+                            organization.provider_id if organization else None
+                        ),
                        "mode": run.mode,
                        "is_completed": run.is_completed,
                        "recording_url": run.recording_url,
                        "transcript_url": run.transcript_url,
+                        "user_recording_url": get_recording_storage_key(
+                            run.extra, "user"
+                        ),
+                        "bot_recording_url": get_recording_storage_key(
+                            run.extra, "bot"
+                        ),
                        "usage_info": run.usage_info,
                        "cost_info": run.cost_info,
                        "initial_context": run.initial_context,
@ -312,26 +325,15 @@ class WorkflowRunClient(BaseDBClient):
                        "is_completed": run.is_completed,
                        "recording_url": run.recording_url,
                        "transcript_url": run.transcript_url,
-                        "cost_info": {
-                            "dograh_token_usage": (
-                                run.cost_info.get("dograh_token_usage")
-                                if run.cost_info
-                                and "dograh_token_usage" in run.cost_info
-                                else round(
-                                    float(run.cost_info.get("total_cost_usd", 0)) * 100,
-                                    2,
-                                )
-                                if run.cost_info and "total_cost_usd" in run.cost_info
-                                else 0
-                            ),
-                            "call_duration_seconds": int(
-                                round(run.cost_info.get("call_duration_seconds") or 0)
-                            )
-                            if run.cost_info
-                            else None,
-                        }
-                        if run.cost_info
-                        else None,
+                        "user_recording_url": get_recording_storage_key(
+                            run.extra, "user"
+                        ),
+                        "bot_recording_url": get_recording_storage_key(
+                            run.extra, "bot"
+                        ),
+                        "cost_info": format_public_cost_info(
+                            run.cost_info, run.usage_info
+                        ),
                        "definition_id": run.definition_id,
                        "initial_context": run.initial_context,
                        "gathered_context": run.gathered_context,
@ -356,6 +358,7 @@ class WorkflowRunClient(BaseDBClient):
        logs: dict | None = None,
        state: str | None = None,
        annotations: dict | None = None,
+        extra: dict | None = None,
    ) -> WorkflowRunModel:
        async with self.async_session() as session:
            # Use SELECT FOR UPDATE to lock the row during the update
@ -378,7 +381,12 @@ class WorkflowRunClient(BaseDBClient):
            if cost_info:
                run.cost_info = cost_info
            if initial_context:
-                run.initial_context = initial_context
+                # Merge initial context patches so independent call-start/runtime
+                # writers do not erase keys stored earlier in the run lifecycle.
+                run.initial_context = {
+                    **(run.initial_context or {}),
+                    **initial_context,
+                }
            if gathered_context:
                # Lets merge the incoming gathered context keys with the existing ones
                run.gathered_context = {
@ -390,6 +398,8 @@ class WorkflowRunClient(BaseDBClient):
                run.logs = {**run.logs, **logs}
            if annotations:
                run.annotations = {**run.annotations, **annotations}
+            if extra:
+                run.extra = {**run.extra, **extra}
            if is_completed:
                run.is_completed = is_completed
            if state:
--- a/api/enums.py
+++ b/api/enums.py
@ -17,6 +17,32 @@ class CallType(Enum):
    OUTBOUND = "outbound"


+class TelephonyCallStatus(str, Enum):
+    INITIATED = "initiated"
+    RINGING = "ringing"
+    IN_PROGRESS = "in-progress"
+    ANSWERED = "answered"
+    COMPLETED = "completed"
+    FAILED = "failed"
+    BUSY = "busy"
+    NO_ANSWER = "no-answer"
+    CANCELED = "canceled"
+    ERROR = "error"
+
+    @classmethod
+    def from_raw(cls, value: object) -> "TelephonyCallStatus | None":
+        if isinstance(value, cls):
+            return value
+
+        if value in (None, ""):
+            return None
+
+        try:
+            return cls(str(value).lower())
+        except ValueError:
+            return None
+
+
 class WorkflowRunMode(Enum):
    ARI = "ari"
    PLIVO = "plivo"
@ -77,8 +103,6 @@ class WorkflowRunStatus(Enum):


 class OrganizationConfigurationKey(Enum):
-    DISPOSITION_CODE_MAPPING = "DISPOSITION_CODE_MAPPING"
-    DISPOSITION_MESSAGE_TEMPLATE = "DISPOSITION_MESSAGE_TEMPLATE"
    CONCURRENT_CALL_LIMIT = "CONCURRENT_CALL_LIMIT"
    TELEPHONY_CONFIGURATION = (
        "TELEPHONY_CONFIGURATION"  # Stores all providers + active one
@ -89,6 +113,20 @@ class OrganizationConfigurationKey(Enum):
    LANGFUSE_CREDENTIALS = (
        "LANGFUSE_CREDENTIALS"  # Org-level Langfuse tracing credentials
    )
+    MODEL_CONFIGURATION_V2 = (
+        "MODEL_CONFIGURATION_V2"  # Org-level v2 AI model configuration
+    )
+    ORGANIZATION_PREFERENCES = "ORGANIZATION_PREFERENCES"  # Org-level defaults such as timezone/test call number
+    MODEL_CONFIGURATION_PREFERENCES = "MODEL_CONFIGURATION_PREFERENCES"  # Deprecated; read fallback for old org preferences
+
+
+class UserConfigurationKey(Enum):
+    """Keys for the per-user keyed JSON store (user_configurations)."""
+
+    MODEL_CONFIGURATION = (
+        "MODEL_CONFIGURATION"  # Legacy per-user v1 AI model configuration
+    )
+    ONBOARDING = "ONBOARDING"  # Post-signup onboarding state (gate, tooltips, actions)


 class WorkflowStatus(Enum):
@ -160,3 +198,5 @@ class PostHogEvent(str, Enum):
    AGENT_EMBEDDED = "agent_embedded"
    SIGNED_UP = "signed_up"
    SIGNED_IN = "signed_in"
+    ORGANIZATION_CREATED = "organization_created"
+    ORGANIZATION_USER_ASSOCIATED = "organization_user_associated"
--- a/api/mcp_server/instructions.py
+++ b/api/mcp_server/instructions.py
@ -22,8 +22,25 @@ mistake the system has seen at least once.
 DOGRAH_MCP_INSTRUCTIONS = """\
 You build and edit Dograh voice-AI workflows by emitting TypeScript that uses the `@dograh/sdk` package. Workflows are stored as JSON; this server projects them to TypeScript for editing and parses them back on save.

+## Stages
+
+Every authoring session runs through three stages. Inject the right guidance at each by calling `get_voice_prompting_guide` before you write or revise prompts. Do not skip plan when creating; do not skip review when editing prompt-bearing fields.
+
+1. **Plan** — call `get_voice_prompting_guide` with `stage="plan"` first. Decide persona, ordered node list, edges, exit conditions, and tools/credentials needed. Enumerate available `list_node_types`, `list_tools`, `list_credentials`, `list_documents`, `list_recordings` as needed. Present a structured plan to the user and wait for confirmation before writing any code.
+
+2. **Create** — call `get_voice_prompting_guide` with `stage="create"` and (when applicable) `node_type=<type>` before writing each node type's prompts. Drill into specific topics via `get_voice_prompting_guide` with `topic=<id>` only when complexity warrants it. Then emit TypeScript and call `create_workflow` (new) or `save_workflow` (edit).
+
+3. **Review** — after a successful save, read any `tips[]` returned and surface them to the user with proposed fixes. Call `get_voice_prompting_guide` with `stage="review"` to enumerate review-time concerns (instruction collision, missing handoff cues, success-criteria gaps).
+
+The guide tool is the authoritative source for prompt-authoring craft (turn-taking, persona, readback, disfluencies). Product-mechanics questions (how a node type works at runtime, what `template_variables` resolve to) belong in `search_docs` / `read_doc` instead — don't conflate the two.
+
 ## Call order

+### Creating a reusable tool
+1. If authentication is needed, call `list_credentials` and use an existing `credential_uuid`; the user creates credential secrets in the UI.
+2. Build a typed tool definition and call `create_tool`. The request schema is authoritative for allowed tool categories and config fields.
+3. Use the returned `tool_uuid` in workflow node `tool_uuids`, then call `create_workflow` or `save_workflow`.
+
 ### Reading documentation
 1. `search_docs` — use first for keyword or acronym lookup when the user is asking how Dograh works or how to configure something.
 2. `read_doc` — fetch the full page once one result looks likely. Prefer this over reasoning from search summaries alone.
@ -33,14 +50,17 @@ You build and edit Dograh voice-AI workflows by emitting TypeScript that uses th
 1. `list_workflows` — locate the target workflow.
 2. `get_workflow_code` — fetch the current source for that workflow.
 3. (optional) `list_node_types` / `get_node_type` — consult before adding or editing a node type whose fields aren't already visible in the current code.
-4. Mutate the code in place. Preserve existing nodes, edges, and variable names unless the task requires removing or renaming them.
-5. `save_workflow` — persist as a new draft. The published version is untouched.
+4. (optional) `get_voice_prompting_guide` with `stage="create"` and `node_type=<type>` — call before revising any node's prompt field.
+5. Mutate the code in place. Preserve existing nodes, edges, and variable names unless the task requires removing or renaming them.
+6. `save_workflow` — persist as a new draft. The published version is untouched.

 ### Creating a new workflow
-1. Create a simple 1-node workflow with only `startCall`. The user can iteratively add complexity by editing it.
-2. `list_node_types` / `get_node_type` — consult to learn the fields available on the node types you intend to use.
-3. Author SDK TypeScript from scratch. The `new Workflow({ name: "..." })` call is required — `name` becomes the workflow's display name.
-4. `create_workflow` — persists a new workflow as version 1 (published). Returns the new `workflow_id`. For subsequent edits use `save_workflow` (which writes a draft).
+1. Run the plan stage (see above) before any code.
+2. Create a simple 1-node workflow with only `startCall` if the user just wants a starter. The user can iteratively add complexity by editing it.
+3. `list_node_types` / `get_node_type` — consult to learn the fields available on the node types you intend to use.
+4. `get_voice_prompting_guide` with `stage="create"` and `node_type=<type>` — call before writing each node's prompt.
+5. Author SDK TypeScript from scratch. The `new Workflow({ name: "..." })` call is required — `name` becomes the workflow's display name.
+6. `create_workflow` — persists a new workflow as version 1 (published). Returns the new `workflow_id`. For subsequent edits use `save_workflow` (which writes a draft).

 ## Allowed source shape

--- a/api/mcp_server/server.py
+++ b/api/mcp_server/server.py
@ -13,12 +13,15 @@ from api.mcp_server.tools.docs_search import list_docs, read_doc, search_docs
 from api.mcp_server.tools.get_workflow_code import get_workflow_code
 from api.mcp_server.tools.node_types import get_node_type, list_node_types
 from api.mcp_server.tools.save_workflow import save_workflow
+from api.mcp_server.tools.tool_creation import create_tool
+from api.mcp_server.tools.voice_prompting_guide import get_voice_prompting_guide
 from api.mcp_server.tools.workflows import get_workflow, list_workflows

 mcp = FastMCP("dograh", instructions=DOGRAH_MCP_INSTRUCTIONS)

 for _tool in (
    create_workflow,
+    create_tool,
    get_node_type,
    get_workflow,
    get_workflow_code,
@ -32,6 +35,15 @@ for _tool in (
 ):
    mcp.tool(_tool)

+_GUIDE_TOOL_ANNOTATIONS = ToolAnnotations(
+    readOnlyHint=True,
+    idempotentHint=True,
+    destructiveHint=False,
+    openWorldHint=False,
+)
+
+mcp.tool(get_voice_prompting_guide, annotations=_GUIDE_TOOL_ANNOTATIONS)
+
 _DOCS_TOOL_ANNOTATIONS = ToolAnnotations(
    readOnlyHint=True,
    idempotentHint=True,
--- a/api/mcp_server/tools/tool_creation.py
+++ b/api/mcp_server/tools/tool_creation.py
@ -0,0 +1,63 @@
+"""MCP tool for creating reusable Dograh tools."""
+
+from __future__ import annotations
+
+from typing import Any
+
+from pydantic import ValidationError as PydanticValidationError
+
+from api.mcp_server.auth import authenticate_mcp_request
+from api.mcp_server.tracing import traced_tool
+from api.schemas.tool import CreateToolRequest
+from api.services.tool_management import ToolManagementError, create_tool_for_user
+
+
+def _error_result(code: str, message: str, **extra: Any) -> dict[str, Any]:
+    return {"created": False, "error_code": code, "error": message, **extra}
+
+
+@traced_tool
+async def create_tool(request: CreateToolRequest) -> dict[str, Any]:
+    """Create a reusable tool the agent can invoke during calls.
+
+    The request schema is the same `CreateToolRequest` used by the REST API
+    and generated SDKs. Use it to create HTTP API, end-call, transfer-call,
+    calculator, or MCP-server tools. For authenticated HTTP or MCP tools,
+    reference an existing `credential_uuid` from `list_credentials`; users
+    create credential secrets in the UI, and this flow only stores the UUID
+    reference. For MCP tools, the server best-effort discovers the remote
+    tool catalog and caches it in `definition.config.discovered_tools`.
+
+    On success, returns `created: true` and the new `tool_uuid`; use that
+    UUID in workflow node `tool_uuids`. On failure, returns `created: false`,
+    a machine-readable `error_code`, and a human-readable `error`. Possible
+    `error_code` values:
+    - `validation_error` — the request failed schema validation.
+    - `credential_not_found` — a supplied credential_uuid is not in this
+      organization; ask the user to create/select it in the UI first.
+    - `organization_required` — the API key user has no selected organization.
+    - `create_failed` — unexpected persistence or backend failure; retry once,
+      then surface the error.
+    """
+    user = await authenticate_mcp_request()
+
+    try:
+        parsed_request = CreateToolRequest.model_validate(request)
+    except PydanticValidationError as e:
+        return _error_result("validation_error", str(e))
+
+    try:
+        tool = await create_tool_for_user(parsed_request, user, source="mcp")
+    except ToolManagementError as e:
+        return _error_result(e.error_code, e.message)
+    except Exception as e:  # noqa: BLE001
+        return _error_result("create_failed", str(e))
+
+    return {
+        "created": True,
+        "tool_uuid": tool.tool_uuid,
+        "name": tool.name,
+        "category": tool.category,
+        "status": tool.status,
+        "definition": tool.definition,
+    }
--- a/api/mcp_server/tools/voice_prompting_guide.py
+++ b/api/mcp_server/tools/voice_prompting_guide.py
@ -0,0 +1,105 @@
+"""MCP tool that surfaces voice-prompting guidance to the workflow-authoring LLM.
+
+The guide is split into stages (plan / create / review) and atoms
+(topics). Stage calls return a tight briefing — an intro plus a list of
+relevant topics with one-line lenses. Topic calls return the full
+reference content for one atom. No-arg calls return a flat index.
+
+The LLM is expected to read the briefing for the current stage first,
+then drill into specific topics only when complexity warrants it. The
+authoritative guidance lives in `api.services.voice_prompting_guide`;
+this tool is a thin MCP-facing projection.
+"""
+
+from __future__ import annotations
+
+from typing import Any, Optional
+
+from fastapi import HTTPException
+
+from api.mcp_server.auth import authenticate_mcp_request
+from api.mcp_server.tracing import traced_tool
+from api.services.voice_prompting_guide import (
+    Stage,
+    build_briefing,
+    get_topic,
+    list_topic_index,
+)
+
+
+@traced_tool
+async def get_voice_prompting_guide(
+    stage: Optional[str] = None,
+    topic: Optional[str] = None,
+    node_type: Optional[str] = None,
+) -> dict[str, Any]:
+    """Fetch staged voice-prompting guidance for authoring Dograh workflows.
+
+    Call this BEFORE composing or revising any prompt field on a node. The
+    guide is the authoritative source for prompt-authoring craft (turn-taking,
+    persona, readback rules, disfluencies); product-mechanics questions
+    (how a node type works at runtime) belong in `search_docs` / `read_doc`.
+
+    Args:
+        stage: "plan" | "create" | "review". Returns a stage briefing — a
+            short intro plus the list of topics relevant at this stage,
+            each with a one-line lens. Combine with `node_type` during the
+            create stage to narrow to topics that apply to that node type's
+            prompts (e.g. `node_type="agent"`).
+        topic: A topic id from a prior briefing. Returns the full content
+            for that atom. Use after the briefing flags a topic worth
+            drilling into. Mutually exclusive with `stage`.
+        node_type: Optional filter. Most useful with `stage="create"`.
+
+    Returns:
+        - With `topic`: { id, title, severity, content, stages_relevant,
+          applies_to_node_types?, cross_refs? }.
+        - With `stage`: { stage, intro, topics: [{id, title, lens}],
+          drill_in, filtered_to_node_type? }.
+        - With no args: { topics: [{id, title}], next }.
+
+    Briefings are designed to be cheap — read the lens, decide what to
+    drill into, then ask for full content for the 1–3 topics that matter
+    for the prompt you're about to write. Do not pull every topic.
+    """
+    await authenticate_mcp_request()
+
+    if topic is not None and stage is not None:
+        raise ValueError(
+            "Pass either `topic` or `stage`, not both. Use `stage` for a "
+            "briefing index; use `topic` for full content of one atom."
+        )
+
+    if topic is not None:
+        atom = get_topic(topic)
+        if atom is None:
+            available = ", ".join(t["id"] for t in list_topic_index())
+            raise HTTPException(
+                status_code=404,
+                detail=(
+                    f"Unknown voice-prompting topic: {topic!r}. "
+                    f"Available topics: {available or '(none registered)'}."
+                ),
+            )
+        return atom.to_deep_dict()
+
+    if stage is not None:
+        try:
+            stage_enum = Stage(stage)
+        except ValueError:
+            raise HTTPException(
+                status_code=400,
+                detail=(
+                    f"Unknown stage: {stage!r}. "
+                    f"Use one of: {', '.join(s.value for s in Stage)}."
+                ),
+            )
+        return build_briefing(stage_enum, node_type=node_type)
+
+    return {
+        "topics": list_topic_index(),
+        "next": (
+            "Call with stage='plan'|'create'|'review' for a briefing, or "
+            "topic=<id> for the full content of one atom."
+        ),
+    }
--- a/api/pyproject.toml
+++ b/api/pyproject.toml
@ -1,5 +1,5 @@
 [project]
 name = "dograh-api"
-version = "1.31.0"
+version = "1.39.0"
 description = "Backend API for Dograh voice AI platform"
-requires-python = ">=3.12"
+requires-python = ">=3.13,<3.14"
--- a/api/requirements.dev.txt
+++ b/api/requirements.dev.txt
@ -2,4 +2,3 @@ mypy==2.0.0
 watchfiles==1.1.1
 datamodel-code-generator==0.56.1
 twine==6.2.0
-e ./sdk/python
--- a/api/requirements.txt
+++ b/api/requirements.txt
@ -16,7 +16,7 @@ msgpack==1.1.2
 pgvector==0.4.2
 bcrypt==5.0.0
 email-validator==2.3.0
-posthog==7.11.1
+posthog==7.19.1
 fastmcp==3.2.4
 tuner-pipecat-sdk==0.2.0
 PyNaCl==1.6.2
--- a/api/routes/agent_stream.py
+++ b/api/routes/agent_stream.py
@ -22,7 +22,7 @@ from starlette.websockets import WebSocketDisconnect

 from api.db import db_client
 from api.enums import CallType, WorkflowRunState
-from api.services.quota_service import check_dograh_quota_by_user_id
+from api.services.quota_service import authorize_workflow_run_start
 from api.services.telephony import registry as telephony_registry

 router = APIRouter(prefix="/agent-stream")
@ -67,19 +67,6 @@ async def agent_stream_websocket(
        await websocket.close(code=1008, reason="Workflow not found")
        return

-    quota_result = await check_dograh_quota_by_user_id(
-        workflow.user_id, workflow_id=workflow.id
-    )
-    if not quota_result.has_quota:
-        logger.warning(
-            f"agent-stream quota exceeded for user {workflow.user_id}: "
-            f"{quota_result.error_message}"
-        )
-        await websocket.close(
-            code=1008, reason=quota_result.error_message or "Quota exceeded"
-        )
-        return
-
    numeric_suffix = int(str(uuid.uuid4()).replace("-", "")[:8], 16) % 100000000
    workflow_run_name = f"WR-AGS-{numeric_suffix:08d}"
    call_id = params.get("callId") or params.get("CallSid")
@ -108,6 +95,20 @@ async def agent_stream_websocket(
    set_current_run_id(workflow_run.id)
    set_current_org_id(workflow.organization_id)

+    quota_result = await authorize_workflow_run_start(
+        workflow_id=workflow.id,
+        workflow_run_id=workflow_run.id,
+    )
+    if not quota_result.has_quota:
+        logger.warning(
+            f"agent-stream quota exceeded for user {workflow.user_id}: "
+            f"{quota_result.error_message}"
+        )
+        await websocket.close(
+            code=1008, reason=quota_result.error_message or "Quota exceeded"
+        )
+        return
+
    await db_client.update_workflow_run(
        run_id=workflow_run.id, state=WorkflowRunState.RUNNING.value
    )
--- a/api/routes/auth.py
+++ b/api/routes/auth.py
@ -3,9 +3,12 @@ from loguru import logger

 from api.db import db_client
 from api.db.models import UserModel
-from api.enums import PostHogEvent
+from api.enums import OrganizationConfigurationKey, PostHogEvent
 from api.schemas.auth import AuthResponse, LoginRequest, SignupRequest, UserResponse
 from api.services.auth.depends import create_user_configuration_with_mps_key, get_user
+from api.services.configuration.ai_model_configuration import (
+    convert_legacy_ai_model_configuration_to_v2,
+)
 from api.services.posthog_client import capture_event
 from api.utils.auth import create_jwt_token, hash_password, verify_password

@ -47,6 +50,12 @@ async def signup(request: SignupRequest):
        )
        if mps_config:
            await db_client.update_user_configuration(user.id, mps_config)
+            model_config_v2 = convert_legacy_ai_model_configuration_to_v2(mps_config)
+            await db_client.upsert_configuration(
+                organization.id,
+                OrganizationConfigurationKey.MODEL_CONFIGURATION_V2.value,
+                model_config_v2.model_dump(mode="json", exclude_none=True),
+            )
    except Exception:
        logger.warning(
            "Failed to create default configuration for OSS user", exc_info=True
--- a/api/routes/campaign.py
+++ b/api/routes/campaign.py
@ -18,7 +18,7 @@ from api.services.auth.depends import get_user
 from api.services.campaign.runner import campaign_runner_service
 from api.services.campaign.source_sync import CampaignSourceSyncService
 from api.services.campaign.source_sync_factory import get_sync_service
-from api.services.quota_service import check_dograh_quota
+from api.services.quota_service import authorize_workflow_run_start
 from api.services.reports import generate_campaign_report_csv
 from api.services.storage import storage_fs

@ -375,7 +375,7 @@ async def create_campaign(
        if workflow_def:
            try:
                dto = ReactFlowDTO(**workflow_def)
-                graph = WorkflowGraph(dto)
+                graph = WorkflowGraph(dto, skip_instance_constraints_for={"trigger"})
                required_vars = graph.get_required_template_variables()

                if (
@ -550,7 +550,10 @@ async def start_campaign(

    # Check Dograh quota before starting campaign (apply per-workflow
    # model_overrides so we evaluate the keys this campaign will use).
-    quota_result = await check_dograh_quota(user, workflow_id=campaign.workflow_id)
+    quota_result = await authorize_workflow_run_start(
+        workflow_id=campaign.workflow_id,
+        actor_user=user,
+    )
    if not quota_result.has_quota:
        raise HTTPException(status_code=402, detail=quota_result.error_message)

@ -872,7 +875,10 @@ async def resume_campaign(

    # Check Dograh quota before resuming campaign (apply per-workflow
    # model_overrides so we evaluate the keys this campaign will use).
-    quota_result = await check_dograh_quota(user, workflow_id=campaign.workflow_id)
+    quota_result = await authorize_workflow_run_start(
+        workflow_id=campaign.workflow_id,
+        actor_user=user,
+    )
    if not quota_result.has_quota:
        raise HTTPException(status_code=402, detail=quota_result.error_message)

--- a/api/routes/knowledge_base.py
+++ b/api/routes/knowledge_base.py
@ -369,25 +369,51 @@ async def search_chunks(

    try:
        # Import here to avoid circular dependency
-        from api.services.gen_ai import OpenAIEmbeddingService
+        from api.services.configuration.ai_model_configuration import (
+            apply_managed_embeddings_base_url,
+            get_resolved_ai_model_configuration,
+        )
+        from api.services.gen_ai import build_embedding_service

        # Try to get user's embeddings configuration
-        user_config = await db_client.get_user_configurations(user.id)
+        resolved_config = await get_resolved_ai_model_configuration(
+            user_id=user.id,
+            organization_id=user.selected_organization_id,
+        )
+        effective_config = resolved_config.effective
        embeddings_api_key = None
        embeddings_model = None
+        embeddings_provider = None
+        embeddings_base_url = None
+        embeddings_endpoint = None
+        embeddings_api_version = None

-        if user_config.embeddings:
-            embeddings_api_key = user_config.embeddings.api_key
-            embeddings_model = user_config.embeddings.model
+        if effective_config.embeddings:
+            embeddings_api_key = effective_config.embeddings.api_key
+            embeddings_model = effective_config.embeddings.model
+            embeddings_provider = getattr(effective_config.embeddings, "provider", None)
+            embeddings_endpoint = getattr(effective_config.embeddings, "endpoint", None)
+            embeddings_base_url = apply_managed_embeddings_base_url(
+                provider=embeddings_provider,
+                base_url=getattr(effective_config.embeddings, "base_url", None),
+            )
+            embeddings_api_version = getattr(
+                effective_config.embeddings, "api_version", None
+            )

-        # Initialize embedding service with user config or fallback to env
-        embedding_service = OpenAIEmbeddingService(
+        # Manual search runs outside any workflow run, so resolve the MPS
+        # correlation id here (mint only for orgs already on v2; never create one).
+        embedding_service = await build_embedding_service(
            db_client=db_client,
+            provider=embeddings_provider,
            api_key=embeddings_api_key,
-            model_id=embeddings_model or "text-embedding-3-small",
-            base_url=getattr(user_config.embeddings, "base_url", None)
-            if user_config.embeddings
-            else None,
+            model=embeddings_model,
+            base_url=embeddings_base_url,
+            endpoint=embeddings_endpoint,
+            api_version=embeddings_api_version,
+            organization_id=user.selected_organization_id,
+            created_by=str(user.provider_id),
+            resolve_correlation=True,
        )

        # Perform search
--- a/api/routes/main.py
+++ b/api/routes/main.py
@ -1,4 +1,7 @@
-from fastapi import APIRouter
+import secrets
+from typing import Annotated
+
+from fastapi import APIRouter, Header, HTTPException, status
 from loguru import logger
 from pydantic import BaseModel

@ -68,10 +71,19 @@ class HealthResponse(BaseModel):
    status: str
    version: str
    backend_api_endpoint: str
+    # Public URL the deployment is reachable at when it sits behind a Cloudflare
+    # tunnel (the host has no public IP). null for a directly-reachable deployment.
+    # The UI shows this so operators know the URL telephony providers should call.
+    tunnel_url: str | None = None
    deployment_mode: str
    auth_provider: str
    turn_enabled: bool
    force_turn_relay: bool
+    # Public Stack Auth client config — only populated when auth_provider == "stack".
+    # The UI reads these at runtime to initialize Stack, so they no longer need to
+    # be baked into the browser bundle at build time. Both are public values.
+    stack_project_id: str | None = None
+    stack_publishable_client_key: str | None = None


@router.get("/health", response_model=HealthResponse)
@ -79,20 +91,88 @@ async def health() -> HealthResponse:
    from api.constants import (
        APP_VERSION,
        AUTH_PROVIDER,
+        BACKEND_API_ENDPOINT,
        DEPLOYMENT_MODE,
        FORCE_TURN_RELAY,
+        STACK_AUTH_PROJECT_ID,
+        STACK_PUBLISHABLE_CLIENT_KEY,
        TURN_SECRET,
    )
-    from api.utils.common import get_backend_endpoints
+    from api.utils.common import get_backend_endpoints, is_local_or_private_url

    logger.debug("Health endpoint called")
    backend_endpoint, _ = await get_backend_endpoints()
+    # tunnel_url is set only when a Cloudflare tunnel was actually resolved: the
+    # configured address isn't publicly reachable, but get_backend_endpoints found
+    # a public tunnel URL for it. This is the URL the UI shows for inbound webhooks.
+    # It stays null for a directly-reachable (public IP / domain) deployment, where
+    # backend_api_endpoint itself is the public URL.
+    tunnel_url = (
+        backend_endpoint
+        if is_local_or_private_url(BACKEND_API_ENDPOINT)
+        and not is_local_or_private_url(backend_endpoint)
+        else None
+    )
+    is_stack = AUTH_PROVIDER == "stack"
    return HealthResponse(
        status="ok",
        version=APP_VERSION,
-        backend_api_endpoint=backend_endpoint,
+        backend_api_endpoint=BACKEND_API_ENDPOINT,
+        tunnel_url=tunnel_url,
        deployment_mode=DEPLOYMENT_MODE,
        auth_provider=AUTH_PROVIDER,
        turn_enabled=bool(TURN_SECRET),
        force_turn_relay=FORCE_TURN_RELAY,
+        stack_project_id=STACK_AUTH_PROJECT_ID if is_stack else None,
+        stack_publishable_client_key=(
+            STACK_PUBLISHABLE_CLIENT_KEY if is_stack else None
+        ),
    )
+
+
+class ActiveCallsResponse(BaseModel):
+    active_calls: int
+
+
+DOGRAH_DEVOPS_SECRET_HEADER = "X-Dograh-Devops-Secret"
+
+
+def _verify_devops_secret(
+    configured_secret: str | None,
+    provided_secret: str | None,
+) -> None:
+    if not configured_secret:
+        raise HTTPException(
+            status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
+            detail="Devops secret is not configured",
+        )
+    if not provided_secret or not secrets.compare_digest(
+        provided_secret,
+        configured_secret,
+    ):
+        raise HTTPException(
+            status_code=status.HTTP_403_FORBIDDEN,
+            detail="Forbidden",
+        )
+
+
+@router.get("/health/active-calls", response_model=ActiveCallsResponse)
+async def active_calls(
+    x_dograh_devops_secret: Annotated[
+        str | None,
+        Header(alias=DOGRAH_DEVOPS_SECRET_HEADER),
+    ] = None,
+) -> ActiveCallsResponse:
+    """In-flight call count for THIS worker — the drain signal for deploys.
+
+    A deploy orchestrator polls this per worker and waits for zero before
+    sending SIGTERM, because uvicorn force-closes live call WebSockets (close
+    code 1012) on SIGTERM and would cut calls mid-conversation otherwise. The
+    count is per-process: one uvicorn per VM port (scripts/rolling_update.sh)
+    or per Kubernetes pod (preStop hook). See api/services/pipecat/active_calls.py.
+    """
+    from api.constants import DOGRAH_DEVOPS_SECRET
+    from api.services.pipecat.active_calls import active_call_count
+
+    _verify_devops_secret(DOGRAH_DEVOPS_SECRET, x_dograh_devops_secret)
+    return ActiveCallsResponse(active_calls=active_call_count())
--- a/api/routes/organization.py
+++ b/api/routes/organization.py
@ -1,15 +1,30 @@
 from typing import List, Optional

-from fastapi import APIRouter, Depends, HTTPException
+from fastapi import APIRouter, Depends, HTTPException, Query
 from loguru import logger
 from pydantic import BaseModel
 from sqlalchemy.exc import IntegrityError

-from api.constants import DEFAULT_CAMPAIGN_RETRY_CONFIG, DEFAULT_ORG_CONCURRENCY_LIMIT
+from api.constants import (
+    DEFAULT_CAMPAIGN_RETRY_CONFIG,
+    DEFAULT_ORG_CONCURRENCY_LIMIT,
+    DEPLOYMENT_MODE,
+)
 from api.db import db_client
 from api.db.models import UserModel
 from api.db.telephony_configuration_client import TelephonyConfigurationInUseError
 from api.enums import OrganizationConfigurationKey, PostHogEvent
+from api.schemas.ai_model_configuration import (
+    DOGRAH_DEFAULT_LANGUAGE,
+    DOGRAH_DEFAULT_VOICE,
+    DOGRAH_SPEED_MAX,
+    DOGRAH_SPEED_MIN,
+    DOGRAH_SPEED_OPTIONS,
+    DOGRAH_SPEED_STEP,
+    OrganizationAIModelConfigurationResponse,
+    OrganizationAIModelConfigurationV2,
+)
+from api.schemas.organization_preferences import OrganizationPreferences
 from api.schemas.telephony_config import (
    TelephonyConfigRequest,
    TelephonyConfigurationCreateRequest,
@ -26,8 +41,42 @@ from api.schemas.telephony_phone_number import (
    PhoneNumberUpdateRequest,
    ProviderSyncStatus,
 )
-from api.services.auth.depends import get_user
-from api.services.configuration.masking import is_mask_of, mask_key
+from api.services.auth.depends import (
+    _sync_posthog_organization_mps_billing_v2_status,
+    get_user,
+    get_user_with_selected_organization,
+)
+from api.services.configuration.ai_model_configuration import (
+    check_for_masked_keys_in_ai_model_configuration_v2,
+    compile_ai_model_configuration_v2,
+    convert_legacy_ai_model_configuration_to_v2,
+    get_organization_ai_model_configuration_v2,
+    get_resolved_ai_model_configuration,
+    mask_ai_model_configuration_v2,
+    merge_ai_model_configuration_v2_secrets,
+    migrate_workflow_model_configurations_to_v2,
+    upsert_organization_ai_model_configuration_v2,
+)
+from api.services.configuration.check_validity import UserConfigurationValidator
+from api.services.configuration.defaults import DEFAULT_SERVICE_PROVIDERS
+from api.services.configuration.masking import is_mask_of, mask_key, mask_user_config
+from api.services.configuration.registry import (
+    DOGRAH_MULTILINGUAL_AUTODETECT_LANGUAGES,
+    DOGRAH_STT_LANGUAGES,
+    REGISTRY,
+    DograhTTSService,
+    ServiceProviders,
+    ServiceType,
+)
+from api.services.mps_billing import ensure_hosted_mps_billing_account_v2
+from api.services.organization_context import (
+    OrganizationContextResponse,
+    get_organization_context,
+)
+from api.services.organization_preferences import (
+    get_organization_preferences,
+    upsert_organization_preferences,
+)
 from api.services.posthog_client import capture_event
 from api.services.telephony import registry as telephony_registry
 from api.services.telephony.factory import get_telephony_provider_by_id
@ -96,6 +145,13 @@ class TelephonyConfigWarningsResponse(BaseModel):
    """

    telnyx_missing_webhook_public_key_count: int
+    vonage_missing_signature_secret_count: int
+
+
+@router.get("/context", response_model=OrganizationContextResponse)
+async def get_current_organization_context(user: UserModel = Depends(get_user)):
+    """Return organization-scoped configuration signals owned by Dograh."""
+    return await get_organization_context(user)


@router.get(
@ -145,8 +201,7 @@ async def get_telephony_providers_metadata(user: UserModel = Depends(get_user)):
 async def get_telephony_config_warnings(user: UserModel = Depends(get_user)):
    """Return aggregated warning counts for the current org's telephony configs.

-    Today this surfaces only Telnyx configs missing ``webhook_public_key``;
-    additional warning types should be added as new fields on the response.
+    Surfaces provider configs missing webhook-verification credentials.
    """
    if not user.selected_organization_id:
        raise HTTPException(status_code=400, detail="No organization selected")
@ -154,11 +209,271 @@ async def get_telephony_config_warnings(user: UserModel = Depends(get_user)):
    telnyx_missing = await db_client.count_telnyx_configs_missing_webhook_public_key(
        user.selected_organization_id
    )
+    vonage_missing = await db_client.count_vonage_configs_missing_signature_secret(
+        user.selected_organization_id
+    )
    return TelephonyConfigWarningsResponse(
        telnyx_missing_webhook_public_key_count=telnyx_missing,
+        vonage_missing_signature_secret_count=vonage_missing,
    )


+# ---------------------------------------------------------------------------
+# AI model configurations v2
+# ---------------------------------------------------------------------------
+
+
+def _dograh_allows_custom_voice() -> bool:
+    extra = DograhTTSService.model_fields["voice"].json_schema_extra
+    if isinstance(extra, dict):
+        return bool(extra.get("allow_custom_input", False))
+    return False
+
+
+def _byok_provider_schemas(service_type: ServiceType) -> dict[str, dict]:
+    return {
+        provider: model_cls.model_json_schema()
+        for provider, model_cls in REGISTRY[service_type].items()
+        if provider != ServiceProviders.DOGRAH.value
+    }
+
+
+async def _model_configuration_v2_response(
+    *,
+    user: UserModel,
+    configuration: OrganizationAIModelConfigurationV2 | None = None,
+) -> OrganizationAIModelConfigurationResponse:
+    resolved = await get_resolved_ai_model_configuration(
+        user_id=user.id,
+        organization_id=user.selected_organization_id,
+    )
+    raw_configuration = (
+        configuration
+        if configuration is not None
+        else resolved.organization_configuration
+    )
+    return OrganizationAIModelConfigurationResponse(
+        configuration=mask_ai_model_configuration_v2(raw_configuration),
+        effective_configuration=mask_user_config(resolved.effective),
+        source=resolved.source,
+    )
+
+
+@router.get("/model-configurations/v2/defaults")
+async def get_model_configuration_v2_defaults(
+    user: UserModel = Depends(get_user_with_selected_organization),
+):
+    byok_default_providers = {
+        service: provider
+        for service, provider in DEFAULT_SERVICE_PROVIDERS.items()
+        if provider != ServiceProviders.DOGRAH.value
+    }
+    return {
+        "dograh": {
+            "voices": [DOGRAH_DEFAULT_VOICE],
+            "allow_custom_input": _dograh_allows_custom_voice(),
+            "speeds": list(DOGRAH_SPEED_OPTIONS),
+            "speed_range": {
+                "min": DOGRAH_SPEED_MIN,
+                "max": DOGRAH_SPEED_MAX,
+                "step": DOGRAH_SPEED_STEP,
+            },
+            "languages": DOGRAH_STT_LANGUAGES,
+            "multilingual_languages": DOGRAH_MULTILINGUAL_AUTODETECT_LANGUAGES,
+            "defaults": {
+                "voice": DOGRAH_DEFAULT_VOICE,
+                "speed": 1.0,
+                "language": DOGRAH_DEFAULT_LANGUAGE,
+            },
+        },
+        "byok": {
+            "pipeline": {
+                "llm": _byok_provider_schemas(ServiceType.LLM),
+                "tts": _byok_provider_schemas(ServiceType.TTS),
+                "stt": _byok_provider_schemas(ServiceType.STT),
+                "embeddings": _byok_provider_schemas(ServiceType.EMBEDDINGS),
+                "default_providers": byok_default_providers,
+            },
+            "realtime": {
+                "realtime": _byok_provider_schemas(ServiceType.REALTIME),
+                "llm": _byok_provider_schemas(ServiceType.LLM),
+                "embeddings": _byok_provider_schemas(ServiceType.EMBEDDINGS),
+                "default_providers": byok_default_providers,
+            },
+        },
+    }
+
+
+@router.get(
+    "/model-configurations/v2",
+    response_model=OrganizationAIModelConfigurationResponse,
+)
+async def get_model_configuration_v2(
+    user: UserModel = Depends(get_user_with_selected_organization),
+):
+    return await _model_configuration_v2_response(user=user)
+
+
+@router.put(
+    "/model-configurations/v2",
+    response_model=OrganizationAIModelConfigurationResponse,
+)
+async def save_model_configuration_v2(
+    request: OrganizationAIModelConfigurationV2,
+    user: UserModel = Depends(get_user_with_selected_organization),
+):
+    organization_id = user.selected_organization_id
+    existing = await get_organization_ai_model_configuration_v2(organization_id)
+    configuration = merge_ai_model_configuration_v2_secrets(request, existing)
+    try:
+        check_for_masked_keys_in_ai_model_configuration_v2(configuration)
+        effective = compile_ai_model_configuration_v2(configuration)
+        await UserConfigurationValidator().validate(
+            effective,
+            organization_id=organization_id,
+            created_by=user.provider_id,
+        )
+    except ValueError as exc:
+        raise HTTPException(status_code=422, detail=exc.args[0])
+
+    await upsert_organization_ai_model_configuration_v2(
+        organization_id,
+        configuration,
+    )
+    return await _model_configuration_v2_response(
+        user=user,
+        configuration=configuration,
+    )
+
+
+@router.get("/model-configurations/v2/migration-preview")
+async def preview_model_configuration_v2_migration(
+    user: UserModel = Depends(get_user_with_selected_organization),
+):
+    legacy = await db_client.get_user_configurations(user.id)
+    try:
+        configuration = convert_legacy_ai_model_configuration_to_v2(legacy)
+    except ValueError as exc:
+        raise HTTPException(status_code=422, detail=str(exc))
+    return {
+        "configuration": mask_ai_model_configuration_v2(configuration),
+        "effective_configuration": mask_user_config(
+            compile_ai_model_configuration_v2(configuration)
+        ),
+    }
+
+
+@router.post(
+    "/model-configurations/v2/migrate",
+    response_model=OrganizationAIModelConfigurationResponse,
+)
+async def migrate_model_configuration_v2(
+    force: bool = Query(default=False),
+    user: UserModel = Depends(get_user_with_selected_organization),
+):
+    organization_id = user.selected_organization_id
+    existing = await get_organization_ai_model_configuration_v2(organization_id)
+    if existing is not None and not force:
+        raise HTTPException(
+            status_code=409,
+            detail="Organization already has a v2 model configuration",
+        )
+
+    legacy = await db_client.get_user_configurations(user.id)
+    try:
+        configuration = convert_legacy_ai_model_configuration_to_v2(legacy)
+        effective = compile_ai_model_configuration_v2(configuration)
+        await UserConfigurationValidator().validate(
+            effective,
+            organization_id=organization_id,
+            created_by=user.provider_id,
+        )
+    except ValueError as exc:
+        raise HTTPException(status_code=422, detail=exc.args[0])
+
+    billing_account_status = None
+    if DEPLOYMENT_MODE != "oss":
+        try:
+            billing_account_status = await ensure_hosted_mps_billing_account_v2(
+                organization_id,
+                created_by=str(user.provider_id),
+            )
+        except Exception as exc:
+            logger.error(
+                "Failed to initialize MPS billing v2 account for organization {}: {}",
+                organization_id,
+                exc,
+            )
+            raise HTTPException(
+                status_code=502,
+                detail="Failed to initialize MPS billing v2 account",
+            )
+
+    await upsert_organization_ai_model_configuration_v2(
+        organization_id,
+        configuration,
+    )
+    await migrate_workflow_model_configurations_to_v2(
+        organization_id=organization_id,
+        fallback_user_config=legacy,
+    )
+    if DEPLOYMENT_MODE != "oss":
+        _sync_posthog_organization_mps_billing_v2_status(
+            organization_id,
+            uses_mps_billing_v2=bool(
+                billing_account_status
+                and billing_account_status.get("billing_mode") == "v2"
+            ),
+        )
+    return await _model_configuration_v2_response(
+        user=user,
+        configuration=configuration,
+    )
+
+
+@router.get("/preferences", response_model=OrganizationPreferences)
+async def get_preferences(
+    user: UserModel = Depends(get_user_with_selected_organization),
+):
+    organization_id = user.selected_organization_id
+    return await get_organization_preferences(organization_id)
+
+
+@router.put("/preferences", response_model=OrganizationPreferences)
+async def save_preferences(
+    request: OrganizationPreferences,
+    user: UserModel = Depends(get_user_with_selected_organization),
+):
+    organization_id = user.selected_organization_id
+    return await upsert_organization_preferences(
+        organization_id,
+        request,
+    )
+
+
+@router.get(
+    "/model-configurations/preferences",
+    response_model=OrganizationPreferences,
+    include_in_schema=False,
+)
+async def get_model_configuration_preferences_legacy(
+    user: UserModel = Depends(get_user_with_selected_organization),
+):
+    return await get_preferences(user=user)
+
+
+@router.put(
+    "/model-configurations/preferences",
+    response_model=OrganizationPreferences,
+    include_in_schema=False,
+)
+async def save_model_configuration_preferences_legacy(
+    request: OrganizationPreferences,
+    user: UserModel = Depends(get_user_with_selected_organization),
+):
+    return await save_preferences(request=request, user=user)
+
+
 def preserve_masked_fields(provider: str, request_dict: dict, existing: dict):
    """If the client re-submitted a masked sensitive field, restore the original."""
    for field_name in _sensitive_fields(provider):
--- a/api/routes/organization_usage.py
+++ b/api/routes/organization_usage.py
@ -1,18 +1,20 @@
 import json
 from datetime import datetime, timedelta
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, Literal, Optional

 from fastapi import APIRouter, Depends, HTTPException, Query
 from fastapi.responses import StreamingResponse
 from loguru import logger
 from pydantic import BaseModel, Field

-from api.constants import DEPLOYMENT_MODE
+from api.constants import DEPLOYMENT_MODE, UI_APP_URL
 from api.db import db_client
 from api.db.models import UserModel
-from api.services.auth.depends import get_user
+from api.services.auth.depends import get_user, get_user_with_selected_organization
 from api.services.mps_service_key_client import mps_service_key_client
 from api.services.reports import generate_usage_runs_report_csv
+from api.utils.artifacts import artifact_url
+from api.utils.recording_artifacts import has_recording_track

 router = APIRouter(prefix="/organizations")

@ -21,14 +23,8 @@ class CurrentUsageResponse(BaseModel):
    period_start: str
    period_end: str
    used_dograh_tokens: float
-    quota_dograh_tokens: int
-    percentage_used: float
-    next_refresh_date: str
-    quota_enabled: bool
    total_duration_seconds: int
-    # New USD fields
    used_amount_usd: Optional[float] = None
-    quota_amount_usd: Optional[float] = None
    currency: Optional[str] = None
    price_per_second_usd: Optional[float] = None

@ -39,6 +35,61 @@ class MPSCreditsResponse(BaseModel):
    total_quota: float


+class MPSCreditPurchaseUrlResponse(BaseModel):
+    checkout_url: str
+
+
+class MPSBillingAccountResponse(BaseModel):
+    id: int
+    organization_id: int
+    billing_mode: str
+    cached_balance_credits: float
+    currency: str
+
+
+class MPSCreditLedgerEntryResponse(BaseModel):
+    id: int
+    entry_type: str
+    origin: Optional[str] = None
+    credits_delta: float
+    balance_after: float
+    amount_minor: Optional[int] = None
+    amount_currency: Optional[str] = None
+    payment_order_id: Optional[int] = None
+    metric_code: Optional[str] = None
+    correlation_id: Optional[str] = None
+    aggregation_key: Optional[str] = None
+    usage_event_id: Optional[int] = None
+    workflow_run_id: Optional[int] = None
+    workflow_id: Optional[int] = None
+    billable_quantity: Optional[float] = None
+    quantity_unit: Optional[str] = None
+    metadata: Dict[str, Any] = Field(default_factory=dict)
+    created_at: str
+
+
+class MPSBillingCreditsResponse(BaseModel):
+    billing_version: Literal["legacy", "v2"]
+    total_credits_used: float = 0.0
+    remaining_credits: float = 0.0
+    total_quota: float = 0.0
+    account: Optional[MPSBillingAccountResponse] = None
+    ledger_entries: List[MPSCreditLedgerEntryResponse] = Field(default_factory=list)
+    total_count: int = 0
+    page: int = 1
+    limit: int = 50
+    total_pages: int = 0
+
+
+def _optional_int(value: Any) -> Optional[int]:
+    if value is None:
+        return None
+    try:
+        return int(value)
+    except (TypeError, ValueError):
+        return None
+
+
 class WorkflowRunUsageResponse(BaseModel):
    id: int
    workflow_id: int
@ -49,6 +100,13 @@ class WorkflowRunUsageResponse(BaseModel):
    call_duration_seconds: int
    recording_url: Optional[str] = None
    transcript_url: Optional[str] = None
+    user_recording_url: Optional[str] = None
+    bot_recording_url: Optional[str] = None
+    recording_public_url: Optional[str] = None
+    transcript_public_url: Optional[str] = None
+    user_recording_public_url: Optional[str] = None
+    bot_recording_public_url: Optional[str] = None
+    public_access_token: Optional[str] = None
    phone_number: Optional[str] = Field(
        default=None,
        deprecated=True,
@ -93,7 +151,7 @@ class DailyUsageBreakdownResponse(BaseModel):

@router.get("/usage/current-period", response_model=CurrentUsageResponse)
 async def get_current_period_usage(user: UserModel = Depends(get_user)):
-    """Get current billing period usage for the user's organization."""
+    """Get current reporting-period usage for the user's organization."""
    if not user.selected_organization_id:
        raise HTTPException(status_code=400, detail="No organization selected")

@ -138,6 +196,206 @@ async def get_mps_credits(user: UserModel = Depends(get_user)):
        raise HTTPException(status_code=500, detail=str(e))


+async def _get_mps_billing_account_status(
+    user: UserModel, organization_id: int
+) -> Optional[dict]:
+    return await mps_service_key_client.get_billing_account_status(
+        organization_id=organization_id,
+        created_by=str(user.provider_id),
+    )
+
+
+def _is_mps_billing_v2(account: Optional[dict]) -> bool:
+    return bool(account and account.get("billing_mode") == "v2")
+
+
+async def _legacy_mps_credits_response(user: UserModel) -> MPSBillingCreditsResponse:
+    if DEPLOYMENT_MODE == "oss":
+        usage = await mps_service_key_client.get_usage_by_created_by(
+            str(user.provider_id)
+        )
+    else:
+        if not user.selected_organization_id:
+            raise HTTPException(status_code=400, detail="No organization selected")
+        usage = await mps_service_key_client.get_usage_by_organization(
+            user.selected_organization_id
+        )
+
+    total_used = float(usage.get("total_credits_used", 0.0))
+    total_remaining = float(usage.get("remaining_credits", 0.0))
+    return MPSBillingCreditsResponse(
+        billing_version="legacy",
+        total_credits_used=total_used,
+        remaining_credits=total_remaining,
+        total_quota=total_used + total_remaining,
+    )
+
+
+@router.get("/billing/credits", response_model=MPSBillingCreditsResponse)
+async def get_billing_credits(
+    page: int = Query(1, ge=1),
+    limit: int = Query(50, ge=1, le=100),
+    user: UserModel = Depends(get_user),
+):
+    """Return legacy MPS credits or paginated v2 billing ledger details for the org."""
+    try:
+        if DEPLOYMENT_MODE == "oss" or not user.selected_organization_id:
+            return await _legacy_mps_credits_response(user)
+
+        organization_id = user.selected_organization_id
+        account_status = await _get_mps_billing_account_status(user, organization_id)
+        if not _is_mps_billing_v2(account_status):
+            return await _legacy_mps_credits_response(user)
+
+        ledger = await mps_service_key_client.get_credit_ledger(
+            organization_id=organization_id,
+            page=page,
+            limit=limit,
+            created_by=str(user.provider_id),
+        )
+        account = ledger.get("account") or {}
+        ledger_entries = ledger.get("ledger_entries") or []
+        total_count = int(ledger.get("total_count") or len(ledger_entries))
+        response_limit = int(ledger.get("limit") or limit)
+        total_pages = int(
+            ledger.get("total_pages")
+            or ((total_count + response_limit - 1) // response_limit)
+        )
+        workflow_ids_by_run_id: dict[int, int] = {}
+        workflow_run_ids = {
+            workflow_run_id
+            for entry in ledger_entries
+            if (workflow_run_id := _optional_int(entry.get("workflow_run_id")))
+            is not None
+        }
+        for workflow_run_id in workflow_run_ids:
+            workflow_run = await db_client.get_workflow_run_by_id(workflow_run_id)
+            if (
+                workflow_run
+                and workflow_run.workflow
+                and workflow_run.workflow.organization_id == organization_id
+            ):
+                workflow_ids_by_run_id[workflow_run_id] = workflow_run.workflow_id
+
+        balance = float(account.get("cached_balance_credits") or 0.0)
+        total_debits = sum(
+            abs(float(entry.get("credits_delta") or 0.0))
+            for entry in ledger_entries
+            if float(entry.get("credits_delta") or 0.0) < 0
+        )
+        if ledger.get("total_debits_credits") is not None:
+            total_debits = float(ledger["total_debits_credits"])
+
+        return MPSBillingCreditsResponse(
+            billing_version="v2",
+            total_credits_used=total_debits,
+            remaining_credits=balance,
+            total_quota=balance + total_debits,
+            account=MPSBillingAccountResponse(
+                id=int(account["id"]),
+                organization_id=int(account["organization_id"]),
+                billing_mode=str(account["billing_mode"]),
+                cached_balance_credits=balance,
+                currency=str(account.get("currency") or "USD"),
+            ),
+            ledger_entries=[
+                MPSCreditLedgerEntryResponse(
+                    id=int(entry["id"]),
+                    entry_type=str(entry["entry_type"]),
+                    origin=entry.get("origin"),
+                    credits_delta=float(entry.get("credits_delta") or 0.0),
+                    balance_after=float(entry.get("balance_after") or 0.0),
+                    amount_minor=entry.get("amount_minor"),
+                    amount_currency=entry.get("amount_currency"),
+                    payment_order_id=entry.get("payment_order_id"),
+                    metric_code=entry.get("metric_code"),
+                    correlation_id=entry.get("correlation_id"),
+                    aggregation_key=entry.get("aggregation_key"),
+                    usage_event_id=_optional_int(entry.get("usage_event_id")),
+                    workflow_run_id=_optional_int(entry.get("workflow_run_id")),
+                    workflow_id=(
+                        workflow_ids_by_run_id.get(
+                            _optional_int(entry.get("workflow_run_id"))
+                        )
+                        if entry.get("workflow_run_id") is not None
+                        else None
+                    ),
+                    billable_quantity=(
+                        float(entry["billable_quantity"])
+                        if entry.get("billable_quantity") is not None
+                        else None
+                    ),
+                    quantity_unit=entry.get("quantity_unit"),
+                    metadata=entry.get("metadata") or {},
+                    created_at=str(entry["created_at"]),
+                )
+                for entry in ledger_entries
+            ],
+            total_count=total_count,
+            page=int(ledger.get("page") or page),
+            limit=response_limit,
+            total_pages=total_pages,
+        )
+    except HTTPException:
+        raise
+    except Exception as exc:
+        logger.error(f"Failed to fetch billing credits: {exc}")
+        raise HTTPException(status_code=500, detail=str(exc))
+
+
+@router.post(
+    "/usage/mps-credits/purchase-url",
+    response_model=MPSCreditPurchaseUrlResponse,
+)
+async def create_mps_credit_purchase_url(
+    user: UserModel = Depends(get_user_with_selected_organization),
+):
+    """Create a checkout URL for organizations using Dograh-managed MPS v2."""
+    if DEPLOYMENT_MODE == "oss":
+        raise HTTPException(
+            status_code=404,
+            detail="Credit purchases are not available in OSS mode",
+        )
+
+    organization_id = user.selected_organization_id
+    assert organization_id is not None
+    account_status = await _get_mps_billing_account_status(user, organization_id)
+    if not _is_mps_billing_v2(account_status):
+        raise HTTPException(
+            status_code=403,
+            detail=(
+                "Credit purchases are available only for organizations using billing v2"
+            ),
+        )
+
+    try:
+        session = await mps_service_key_client.create_credit_purchase_url(
+            organization_id=organization_id,
+            created_by=str(user.provider_id),
+            return_url=f"{UI_APP_URL.rstrip('/')}/billing",
+            billing_details={
+                "source": "dograh_billing",
+                "dograh_user_id": str(user.id),
+                "dograh_provider_id": str(user.provider_id),
+            },
+        )
+    except Exception as exc:
+        logger.error(f"Failed to create MPS credit purchase URL: {exc}")
+        raise HTTPException(
+            status_code=502,
+            detail="Failed to create credit purchase URL",
+        )
+
+    checkout_url = session.get("checkout_url")
+    if not checkout_url:
+        logger.error(f"MPS checkout session response missing checkout_url: {session}")
+        raise HTTPException(
+            status_code=502,
+            detail="MPS checkout session response missing checkout_url",
+        )
+    return MPSCreditPurchaseUrlResponse(checkout_url=checkout_url)
+
+
 FILTERS_DESCRIPTION = """\
 JSON-encoded array of filter objects. Each object has the shape:

@ -223,6 +481,24 @@ async def get_usage_history(

        total_pages = (total_count + limit - 1) // limit

+        for run in runs:
+            public_access_token = run.get("public_access_token")
+            run["transcript_public_url"] = artifact_url(
+                public_access_token, "transcript"
+            )
+            run["recording_public_url"] = artifact_url(public_access_token, "recording")
+            run["user_recording_public_url"] = (
+                artifact_url(public_access_token, "user_recording")
+                if has_recording_track(run.get("extra"), "user")
+                else None
+            )
+            run["bot_recording_public_url"] = (
+                artifact_url(public_access_token, "bot_recording")
+                if has_recording_track(run.get("extra"), "bot")
+                else None
+            )
+            run.pop("extra", None)
+
        return {
            "runs": runs,
            "total_dograh_tokens": total_tokens,
--- a/api/routes/public_agent.py
+++ b/api/routes/public_agent.py
@ -14,7 +14,7 @@ from pydantic import BaseModel

 from api.db import db_client
 from api.enums import TriggerState, WorkflowStatus
-from api.services.quota_service import check_dograh_quota_by_user_id
+from api.services.quota_service import authorize_workflow_run_start
 from api.services.telephony.factory import (
    get_default_telephony_provider,
    get_telephony_provider_by_id,
@ -179,14 +179,6 @@ async def _execute_resolved_target(
    """Shared execution path once the target workflow has been resolved."""
    execution_user_id = _get_execution_user_id(target.workflow)

-    # Check Dograh quota using the workflow owner's config and model overrides.
-    quota_result = await check_dograh_quota_by_user_id(
-        execution_user_id,
-        workflow_id=target.workflow.id,
-    )
-    if not quota_result.has_quota:
-        raise HTTPException(status_code=402, detail=quota_result.error_message)
-
    # Get telephony provider — either the caller-specified config (validated
    # against the workflow's org) or the org's default config.
    if request.telephony_configuration_id is not None:
@ -268,6 +260,15 @@ async def _execute_resolved_target(
        f"to phone number {request.phone_number}"
    )

+    # Check Dograh quota after the run exists so hosted v2 can mint and store
+    # the MPS correlation id before the provider starts the call.
+    quota_result = await authorize_workflow_run_start(
+        workflow_id=target.workflow.id,
+        workflow_run_id=workflow_run.id,
+    )
+    if not quota_result.has_quota:
+        raise HTTPException(status_code=402, detail=quota_result.error_message)
+
    # 9. Construct webhook URL for telephony provider callback
    backend_endpoint, _ = await get_backend_endpoints()
    webhook_endpoint = provider.WEBHOOK_ENDPOINT
--- a/api/routes/public_download.py
+++ b/api/routes/public_download.py
@ -6,14 +6,16 @@ post-call processing for runs that execute integrations, QA, or campaign
 reporting.
 """

-from typing import Literal
-
 from fastapi import APIRouter, HTTPException, Query
 from fastapi.responses import RedirectResponse
 from loguru import logger

 from api.db import db_client
 from api.services.storage import get_storage_for_backend
+from api.utils.recording_artifacts import (
+    get_recording_storage_backend,
+    get_recording_storage_key,
+)

 router = APIRouter(prefix="/public/download")

@ -21,7 +23,7 @@ router = APIRouter(prefix="/public/download")
@router.get("/workflow/{token}/{artifact_type}")
 async def download_workflow_artifact(
    token: str,
-    artifact_type: Literal["recording", "transcript"],
+    artifact_type: str,
    inline: bool = Query(
        default=False, description="Display inline in browser instead of download"
    ),
@ -36,13 +38,15 @@ async def download_workflow_artifact(

    Args:
        token: The public access token (UUID format)
-        artifact_type: Type of artifact - "recording" or "transcript"
+        artifact_type: Type of artifact - "recording", "transcript",
+            "user_recording", or "bot_recording"
        inline: If true, sets Content-Disposition to inline for browser preview

    Returns:
        RedirectResponse to the signed URL (302 redirect)

    Raises:
+        HTTPException 400: If artifact type is unsupported
        HTTPException 404: If token is invalid or artifact not found
    """
    # 1. Lookup workflow run by token
@ -52,10 +56,26 @@ async def download_workflow_artifact(
        raise HTTPException(status_code=404, detail="Invalid or expired token")

    # 2. Get file path based on artifact type
+    artifact_storage_backend = None
    if artifact_type == "recording":
        file_path = workflow_run.recording_url
-    else:  # transcript
+    elif artifact_type == "transcript":
        file_path = workflow_run.transcript_url
+    elif artifact_type == "user_recording":
+        file_path = get_recording_storage_key(workflow_run.extra, "user")
+        artifact_storage_backend = get_recording_storage_backend(
+            workflow_run.extra, "user"
+        )
+    elif artifact_type == "bot_recording":
+        file_path = get_recording_storage_key(workflow_run.extra, "bot")
+        artifact_storage_backend = get_recording_storage_backend(
+            workflow_run.extra, "bot"
+        )
+    else:
+        logger.warning(
+            f"Unsupported artifact type: type={artifact_type}, workflow_run_id={workflow_run.id}"
+        )
+        raise HTTPException(status_code=400, detail="Unsupported artifact type")

    if not file_path:
        logger.warning(
@ -68,7 +88,9 @@ async def download_workflow_artifact(

    # 3. Get storage backend for this workflow run
    try:
-        storage = get_storage_for_backend(workflow_run.storage_backend)
+        storage = get_storage_for_backend(
+            artifact_storage_backend or workflow_run.storage_backend
+        )
    except ValueError as e:
        logger.error(f"Invalid storage backend: {workflow_run.storage_backend}")
        raise HTTPException(status_code=500, detail="Storage configuration error")
--- a/api/routes/public_embed.py
+++ b/api/routes/public_embed.py
@ -7,6 +7,7 @@ They handle CORS, domain validation, and session management for embedded workflo
 import secrets
 from datetime import UTC, datetime, timedelta
 from typing import Optional
+from urllib.parse import urlsplit

 from fastapi import (
    APIRouter,
@ -16,6 +17,8 @@ from fastapi import (
 )
 from loguru import logger
 from pydantic import BaseModel
+from starlette.datastructures import Headers
+from starlette.types import ASGIApp, Receive, Scope, Send

 from api.db import db_client
 from api.enums import WorkflowRunMode
@ -27,6 +30,9 @@ from api.routes.turn_credentials import (

 router = APIRouter(prefix="/public/embed")

+EMBED_CORS_ALLOW_HEADERS = "Content-Type, Origin"
+EMBED_CORS_MAX_AGE = "86400"
+

 class InitEmbedRequest(BaseModel):
    """Request model for initializing an embed session"""
@ -70,11 +76,9 @@ def validate_origin(origin: str, allowed_domains: list) -> bool:
        # If no domains specified, allow all origins
        return True

-    # Extract domain from origin (remove protocol)
-    if "://" in origin:
-        domain = origin.split("://")[1].split("/")[0].split(":")[0]
-    else:
-        domain = origin
+    domain, origin_port = _parse_origin_host_port(origin)
+    if not domain:
+        return False

    # Normalize domain for www matching
    def normalize_www(d: str) -> tuple[str, str]:
@ -87,16 +91,23 @@ def validate_origin(origin: str, allowed_domains: list) -> bool:
    domain_variants = normalize_www(domain)

    for allowed in allowed_domains:
+        allowed = str(allowed).strip().lower()
        if allowed == "*":
            return True
-        elif allowed.startswith("*."):
+        allowed_domain, allowed_port = _parse_origin_host_port(allowed)
+        if not allowed_domain:
+            continue
+        if allowed_port is not None and allowed_port != origin_port:
+            continue
+
+        if allowed_domain.startswith("*."):
            # Wildcard subdomain matching
-            base_domain = allowed[2:]
+            base_domain = allowed_domain[2:]
            if domain == base_domain or domain.endswith("." + base_domain):
                return True
        else:
            # Check both www and non-www versions
-            allowed_variants = normalize_www(allowed)
+            allowed_variants = normalize_www(allowed_domain)
            # If any variant of domain matches any variant of allowed, it's valid
            if any(
                dv in allowed_variants or av in domain_variants
@ -108,6 +119,24 @@ def validate_origin(origin: str, allowed_domains: list) -> bool:
    return False


+def _parse_origin_host_port(value: str) -> tuple[str, str | None]:
+    candidate = value.strip().lower()
+    if not candidate:
+        return "", None
+
+    if "://" not in candidate and not candidate.startswith("//"):
+        candidate = f"//{candidate}"
+
+    parsed = urlsplit(candidate)
+    try:
+        parsed_port = parsed.port
+    except ValueError:
+        parsed_port = None
+
+    port = str(parsed_port) if parsed_port is not None else None
+    return (parsed.hostname or "").rstrip("."), port
+
+
 def generate_session_token() -> str:
    """Generate a cryptographically secure session token"""
    return f"emb_session_{secrets.token_urlsafe(32)}"
@ -121,8 +150,120 @@ def get_request_origin(request: Request) -> str:
    return origin


+def _cors_response(origin: str, methods: str) -> Response:
+    return Response(
+        headers={
+            "Access-Control-Allow-Origin": origin,
+            "Access-Control-Allow-Methods": methods,
+            "Access-Control-Allow-Headers": EMBED_CORS_ALLOW_HEADERS,
+            "Access-Control-Max-Age": EMBED_CORS_MAX_AGE,
+            "Vary": "Origin",
+        }
+    )
+
+
+def _allow_embed_origin(response: Response, origin: str) -> None:
+    response.headers["Access-Control-Allow-Origin"] = origin
+    vary = response.headers.get("Vary")
+    if not vary:
+        response.headers["Vary"] = "Origin"
+        return
+
+    vary_values = {value.strip().lower() for value in vary.split(",")}
+    if "origin" not in vary_values:
+        response.headers["Vary"] = f"{vary}, Origin"
+
+
+async def _config_preflight_response(token: str, origin: str) -> Response:
+    embed_token = await db_client.get_embed_token_by_token(token)
+    if not embed_token or not embed_token.is_active:
+        return Response(status_code=403)
+
+    if not validate_origin(origin, embed_token.allowed_domains or []):
+        return Response(status_code=403)
+
+    return _cors_response(origin, "GET, OPTIONS")
+
+
+async def _turn_credentials_preflight_response(
+    session_token: str, origin: str
+) -> Response:
+    embed_session = await db_client.get_embed_session_by_token(session_token)
+    if not embed_session:
+        return Response(status_code=403)
+
+    if embed_session.expires_at and embed_session.expires_at < datetime.now(UTC):
+        return Response(status_code=403)
+
+    embed_token = await db_client.get_embed_token_by_id(embed_session.embed_token_id)
+    if not embed_token:
+        return Response(status_code=403)
+
+    if not validate_origin(origin, embed_token.allowed_domains or []):
+        return Response(status_code=403)
+
+    return _cors_response(origin, "GET, OPTIONS")
+
+
+async def build_public_embed_preflight_response(
+    path: str, origin: str, requested_method: str, api_prefix: str = "/api/v1"
+) -> Response | None:
+    """Handle embed preflights before global CORSMiddleware rejects external sites."""
+    public_embed_prefix = f"{api_prefix.rstrip('/')}/public/embed"
+
+    if path == f"{public_embed_prefix}/init":
+        if requested_method.upper() != "POST":
+            return Response(status_code=405)
+        return _cors_response(origin, "POST, OPTIONS")
+
+    config_prefix = f"{public_embed_prefix}/config/"
+    if path.startswith(config_prefix):
+        if requested_method.upper() != "GET":
+            return Response(status_code=405)
+        token = path[len(config_prefix) :].split("/", 1)[0]
+        return await _config_preflight_response(token, origin)
+
+    turn_credentials_prefix = f"{public_embed_prefix}/turn-credentials/"
+    if path.startswith(turn_credentials_prefix):
+        if requested_method.upper() != "GET":
+            return Response(status_code=405)
+        session_token = path[len(turn_credentials_prefix) :].split("/", 1)[0]
+        return await _turn_credentials_preflight_response(session_token, origin)
+
+    return None
+
+
+class PublicEmbedCORSMiddleware:
+    """Allow token-gated embed CORS before global SaaS CORS rejects preflights."""
+
+    def __init__(self, app: ASGIApp, api_prefix: str = "/api/v1"):
+        self.app = app
+        self.api_prefix = api_prefix
+
+    async def __call__(self, scope: Scope, receive: Receive, send: Send) -> None:
+        if scope["type"] != "http" or scope.get("method") != "OPTIONS":
+            await self.app(scope, receive, send)
+            return
+
+        headers = Headers(scope=scope)
+        origin = headers.get("origin")
+        requested_method = headers.get("access-control-request-method")
+
+        if origin and requested_method:
+            response = await build_public_embed_preflight_response(
+                scope.get("path", ""), origin, requested_method, self.api_prefix
+            )
+            if response is not None:
+                await response(scope, receive, send)
+                return
+
+        await self.app(scope, receive, send)
+
+
@router.post("/init", response_model=InitEmbedResponse)
-async def initialize_embed_session(request: Request, init_request: InitEmbedRequest):
+async def initialize_embed_session(
+    request: Request, init_request: InitEmbedRequest, response: Response
+):
    """Initialize an embed session with token validation and domain checking.

    This endpoint:
@ -158,6 +299,9 @@ async def initialize_embed_session(request: Request, init_request: InitEmbedRequ
        )
        raise HTTPException(status_code=403, detail=f"Domain not allowed: {origin}")

+    if origin:
+        _allow_embed_origin(response, origin)
+
    # Create workflow run
    try:
        workflow_run = await db_client.create_workflow_run(
@ -165,7 +309,10 @@ async def initialize_embed_session(request: Request, init_request: InitEmbedRequ
            workflow_id=embed_token.workflow_id,
            mode=WorkflowRunMode.SMALLWEBRTC.value,
            user_id=embed_token.created_by,  # Use token creator as run owner
-            initial_context=init_request.context_variables,
+            initial_context={
+                **(init_request.context_variables or {}),
+                "provider": WorkflowRunMode.SMALLWEBRTC.value,
+            },
        )
    except Exception as e:
        logger.error(f"Failed to create workflow run: {e}")
@ -204,8 +351,19 @@ async def initialize_embed_session(request: Request, init_request: InitEmbedRequ
    )


+@router.options("/config/{token}")
+async def options_embed_config(token: str, request: Request):
+    """Fallback OPTIONS handler for the embed config endpoint.
+
+    Browser preflights include Access-Control-Request-Method and are handled by
+    PublicEmbedCORSMiddleware before global CORS. This keeps non-conformant
+    OPTIONS requests on the same validation path.
+    """
+    return await _config_preflight_response(token, request.headers.get("origin", ""))
+
+
@router.get("/config/{token}", response_model=EmbedConfigResponse)
-async def get_embed_config(token: str, request: Request):
+async def get_embed_config(token: str, request: Request, response: Response):
    """Get embed configuration without creating a session.

    This endpoint is used to fetch widget configuration for display purposes
@ -226,6 +384,11 @@ async def get_embed_config(token: str, request: Request):
    if not validate_origin(origin, embed_token.allowed_domains or []):
        raise HTTPException(status_code=403, detail=f"Domain not allowed: {origin}")

+    # Set CORS header explicitly; the global CORSMiddleware covers only
+    # first-party origins; this endpoint is fetched by external embed sites.
+    if origin:
+        _allow_embed_origin(response, origin)
+
    # Extract settings with defaults
    settings = embed_token.settings or {}

@ -243,24 +406,20 @@ async def get_embed_config(token: str, request: Request):

@router.options("/init")
 async def options_init(request: Request):
-    """Handle CORS preflight for init endpoint"""
+    """Fallback OPTIONS handler for init endpoint."""
+    # Browser preflights are handled by PublicEmbedCORSMiddleware before global CORS.
    # For init endpoint, we need to check the token in the request body
    # But OPTIONS requests don't have body, so we'll be permissive
    # The actual validation happens in the POST request
    origin = request.headers.get("origin", "*")

-    return Response(
-        headers={
-            "Access-Control-Allow-Origin": origin,
-            "Access-Control-Allow-Methods": "POST, OPTIONS",
-            "Access-Control-Allow-Headers": "Content-Type, Origin",
-            "Access-Control-Max-Age": "86400",
-        }
-    )
+    return _cors_response(origin, "POST, OPTIONS")


@router.get("/turn-credentials/{session_token}", response_model=TurnCredentialsResponse)
-async def get_public_turn_credentials(session_token: str, request: Request):
+async def get_public_turn_credentials(
+    session_token: str, request: Request, response: Response
+):
    """Get TURN credentials for an embed session.

    This endpoint allows embedded widgets to obtain TURN server credentials
@ -295,6 +454,9 @@ async def get_public_turn_credentials(session_token: str, request: Request):
        )
        raise HTTPException(status_code=403, detail=f"Domain not allowed: {origin}")

+    if origin:
+        _allow_embed_origin(response, origin)
+
    # Check if TURN is configured
    if not TURN_SECRET:
        raise HTTPException(
@ -316,63 +478,8 @@ async def get_public_turn_credentials(session_token: str, request: Request):

@router.options("/turn-credentials/{session_token}")
 async def options_turn_credentials(request: Request, session_token: str):
-    """Handle CORS preflight for TURN credentials endpoint"""
-    origin = request.headers.get("origin", "*")
-
-    # Try to validate the session token and get allowed domains
-    allowed_origin = origin
-    try:
-        embed_session = await db_client.get_embed_session_by_token(session_token)
-        if embed_session:
-            embed_token = await db_client.get_embed_token_by_id(
-                embed_session.embed_token_id
-            )
-            if embed_token:
-                # Check if origin is in allowed domains (empty means allow all)
-                if validate_origin(origin, embed_token.allowed_domains or []):
-                    allowed_origin = origin
-                else:
-                    allowed_origin = ""
-    except Exception:
-        # On error, be permissive for OPTIONS
-        pass
-
-    return Response(
-        headers={
-            "Access-Control-Allow-Origin": allowed_origin,
-            "Access-Control-Allow-Methods": "GET, OPTIONS",
-            "Access-Control-Allow-Headers": "Content-Type",
-            "Access-Control-Max-Age": "86400",
-        }
-    )
-
-
-@router.options("/config/{token}")
-async def options_config(request: Request, token: str):
-    """Handle CORS preflight for config endpoint"""
-    # Get origin header
-    origin = request.headers.get("origin", "*")
-
-    # Try to validate the token and get allowed domains
-    allowed_origin = origin
-    try:
-        embed_token = await db_client.get_embed_token_by_token(token)
-        if embed_token and embed_token.is_active:
-            # Check if origin is in allowed domains
-            if validate_origin(origin, embed_token.allowed_domains or []):
-                allowed_origin = origin
-            else:
-                # If not allowed, don't include the origin
-                allowed_origin = ""
-    except Exception:
-        # On error, be permissive for OPTIONS
-        pass
-
-    return Response(
-        headers={
-            "Access-Control-Allow-Origin": allowed_origin,
-            "Access-Control-Allow-Methods": "GET, OPTIONS",
-            "Access-Control-Allow-Headers": "Content-Type",
-            "Access-Control-Max-Age": "86400",
-        }
+    """Fallback OPTIONS handler for TURN credentials endpoint."""
+    # Browser preflights are handled by PublicEmbedCORSMiddleware before global CORS.
+    return await _turn_credentials_preflight_response(
+        session_token, request.headers.get("origin", "")
    )
--- a/api/routes/s3_signed_url.py
+++ b/api/routes/s3_signed_url.py
@ -40,14 +40,22 @@ class PresignedUploadUrlResponse(BaseModel):
 router = APIRouter(prefix="/s3", tags=["s3"])


+ORG_SCOPED_STORAGE_PREFIXES = ("campaigns", "knowledge_base")
+
+
 def _extract_org_id_from_key(key: str) -> Optional[int]:
    """Try to extract an organization ID from a storage key.

-    Matches keys of the form ``{prefix}/{org_id}/...`` where *org_id* is a
-    positive integer.  Returns ``None`` when the pattern does not match.
+    Matches known org-scoped keys of the form ``{prefix}/{org_id}/...`` where
+    *org_id* is a positive integer. Returns ``None`` when the pattern does not
+    match.
    """
    parts = key.split("/")
-    if len(parts) >= 3 and parts[1].isdigit():
+    if (
+        len(parts) >= 3
+        and parts[0] in ORG_SCOPED_STORAGE_PREFIXES
+        and parts[1].isdigit()
+    ):
        return int(parts[1])
    return None

@ -58,15 +66,20 @@ def _extract_legacy_workflow_run_id(key: str) -> Optional[int]:
    Supports:
      - ``transcripts/{run_id}.txt``
      - ``recordings/{run_id}.wav``
+      - ``recordings/{run_id}/user.wav``
+      - ``recordings/{run_id}/bot.wav``

    Returns ``None`` when the key does not match a legacy pattern.
    """
    if key.startswith("transcripts/") and key.endswith(".txt"):
        run_id_str = key[len("transcripts/") : -4]
-    elif key.startswith("recordings/") and key.endswith(".wav"):
-        run_id_str = key[len("recordings/") : -4]
    else:
-        return None
+        recording_match = re.fullmatch(
+            r"recordings/(\d+)(?:\.wav|/(?:user|bot)\.wav)", key
+        )
+        if not recording_match:
+            return None
+        run_id_str = recording_match.group(1)

    return int(run_id_str) if run_id_str.isdigit() else None

@ -89,8 +102,13 @@ async def _validate_and_extract_workflow_run_id(
    """
    if key.startswith("transcripts/") and key.endswith(".txt"):
        run_id_str = key[len("transcripts/") : -4]  # strip prefix & suffix
-    elif key.startswith("recordings/") and key.endswith(".wav"):
-        run_id_str = key[len("recordings/") : -4]
+    elif key.startswith("recordings/"):
+        run_id = _extract_legacy_workflow_run_id(key)
+        if run_id is None:
+            raise HTTPException(
+                status_code=400, detail="Invalid workflow_run_id in key"
+            )
+        return run_id
    elif allow_special_paths and key.startswith("voicemail_detections/"):
        return None  # Skip validation for these paths
    else:
@ -159,9 +177,9 @@ async def get_signed_url(
    """Return a short-lived signed URL for a file stored on S3 / MinIO.

    Access Control:
-    * Keys that embed an organization ID (``{prefix}/{org_id}/...``) are
-      authorized by matching the org_id against the requesting user's
-      organization.
+    * Known org-scoped keys (for example ``campaigns/{org_id}/...`` and
+      ``knowledge_base/{org_id}/...``) are authorized by matching the org_id
+      against the requesting user's organization.
    * Legacy keys (``recordings/{run_id}.wav``, ``transcripts/{run_id}.txt``)
      are authorized via the workflow run they belong to.
    * Superusers can request any key.
--- a/api/routes/telephony.py
+++ b/api/routes/telephony.py
@ -21,11 +21,11 @@ from starlette.websockets import WebSocketDisconnect

 from api.db import db_client
 from api.db.models import UserModel
-from api.enums import CallType, WorkflowRunState
+from api.enums import CallType, WorkflowRunMode, WorkflowRunState
 from api.errors.telephony_errors import TelephonyError
 from api.sdk_expose import sdk_expose
 from api.services.auth.depends import get_user
-from api.services.quota_service import check_dograh_quota_by_user_id
+from api.services.quota_service import authorize_workflow_run_start
 from api.services.telephony.call_transfer_manager import get_call_transfer_manager
 from api.services.telephony.factory import (
    get_all_telephony_providers,
@ -53,7 +53,7 @@ class InitiateCallRequest(BaseModel):
    workflow_run_id: int | None = None
    phone_number: str | None = None
    # Optional explicit telephony config to use for the test call. If omitted,
-    # falls back to the user's per-user default (when set), then the org default.
+    # falls back to the org default.
    telephony_configuration_id: int | None = None
    # Optional caller-ID phone number to dial out from. Must belong to the
    # resolved telephony configuration; otherwise the provider picks one.
@ -82,7 +82,12 @@ async def initiate_call(
    """Initiate a call using the configured telephony provider from web browser. This is
    supposed to be a test call method for the draft version of the agent."""

-    user_configuration = await db_client.get_user_configurations(user.id)
+    from api.services.organization_preferences import get_organization_preferences
+
+    preferences = await get_organization_preferences(
+        user.selected_organization_id,
+        db=db_client,
+    )

    # Resolve which telephony config to use: explicit request value, otherwise
    # the org's default outbound config.
@ -116,13 +121,12 @@ async def initiate_call(
            detail="telephony_not_configured",
        )

-    phone_number = request.phone_number or user_configuration.test_phone_number
+    phone_number = request.phone_number or preferences.test_phone_number

    if not phone_number:
        raise HTTPException(
            status_code=400,
-            detail="Phone number must be provided in request or set in user "
-            "configuration",
+            detail="Phone number must be provided in request or set in organization preferences",
        )

    workflow = await db_client.get_workflow(
@ -132,14 +136,6 @@ async def initiate_call(
        raise HTTPException(status_code=404, detail="Workflow not found")
    execution_user_id = _get_execution_user_id(workflow)

-    # Check Dograh quota before initiating the call (apply per-workflow
-    # model_overrides so the keys we will actually use are the ones checked).
-    quota_result = await check_dograh_quota_by_user_id(
-        execution_user_id, workflow_id=workflow.id
-    )
-    if not quota_result.has_quota:
-        raise HTTPException(status_code=402, detail=quota_result.error_message)
-
    # Determine the workflow run mode based on provider type
    workflow_run_mode = provider.PROVIDER_NAME

@ -182,6 +178,16 @@ async def initiate_call(
            )
        workflow_run_name = workflow_run.name

+    # Check Dograh quota after the run exists so hosted v2 can mint and store
+    # the MPS correlation id before initiating the call.
+    quota_result = await authorize_workflow_run_start(
+        workflow_id=workflow.id,
+        workflow_run_id=workflow_run_id,
+        actor_user=user,
+    )
+    if not quota_result.has_quota:
+        raise HTTPException(status_code=402, detail=quota_result.error_message)
+
    # Construct webhook URL based on provider type
    backend_endpoint, _ = await get_backend_endpoints()

@ -578,12 +584,36 @@ async def _handle_telephony_websocket(
            provider_type = workflow_run.initial_context.get("provider")
            logger.info(f"Extracted provider_type: {provider_type}")

+        if (
+            workflow_run.mode == WorkflowRunMode.SMALLWEBRTC.value
+            or provider_type == WorkflowRunMode.SMALLWEBRTC.value
+        ):
+            logger.warning(
+                f"SmallWebRTC workflow run {workflow_run_id} reached telephony "
+                f"websocket; mode={workflow_run.mode}, provider={provider_type}"
+            )
+            await websocket.close(
+                code=4400,
+                reason=(
+                    "smallwebrtc runs connect through the WebRTC signaling endpoint, "
+                    "not the telephony websocket"
+                ),
+            )
+            return
+
        if not provider_type:
            logger.error(
                f"No provider type found in workflow run {workflow_run_id}. "
                f"gathered_context: {workflow_run.gathered_context}, mode: {workflow_run.mode}"
            )
-            await websocket.close(code=4400, reason="Provider type not found")
+            await websocket.close(
+                code=4400,
+                reason=(
+                    f"No provider type found for workflow run {workflow_run_id} "
+                    f"(mode: {workflow_run.mode}); telephony websocket requires "
+                    "a telephony provider"
+                ),
+            )
            return

        logger.info(
@ -654,7 +684,7 @@ async def handle_inbound_run(request: Request):
            logger.error("Unable to detect provider for /inbound/run webhook")
            return generic_hangup_response()

-        normalized_data = normalize_webhook_data(provider_class, webhook_data)
+        normalized_data = normalize_webhook_data(provider_class, webhook_data, headers)
        logger.info(
            f"/inbound/run normalized data — provider={normalized_data.provider} "
            f"to={normalized_data.to_number} from={normalized_data.from_number}"
@ -735,19 +765,8 @@ async def handle_inbound_run(request: Request):
                TelephonyError.SIGNATURE_VALIDATION_FAILED
            )

-        # 4. Quota check (use the workflow's model_overrides if set).
-        quota_result = await check_dograh_quota_by_user_id(
-            user_id, workflow_id=workflow_id
-        )
-        if not quota_result.has_quota:
-            logger.warning(
-                f"User {user_id} has exceeded quota: {quota_result.error_message}"
-            )
-            return provider_class.generate_validation_error_response(
-                TelephonyError.QUOTA_EXCEEDED
-            )
-
-        # 5. Create workflow run + return provider-shaped response.
+        # 5. Create workflow run + authorize quota before returning provider
+        # stream instructions.
        workflow_run_id = await _create_inbound_workflow_run(
            workflow_id,
            user_id,
@ -756,6 +775,17 @@ async def handle_inbound_run(request: Request):
            telephony_configuration_id=telephony_configuration_id,
            from_phone_number_id=phone_row.id,
        )
+        quota_result = await authorize_workflow_run_start(
+            workflow_id=workflow_id,
+            workflow_run_id=workflow_run_id,
+        )
+        if not quota_result.has_quota:
+            logger.warning(
+                f"User {user_id} has exceeded quota: {quota_result.error_message}"
+            )
+            return provider_class.generate_validation_error_response(
+                TelephonyError.QUOTA_EXCEEDED
+            )

        backend_endpoint, wss_backend_endpoint = await get_backend_endpoints()
        websocket_url = (
@ -841,7 +871,7 @@ async def handle_inbound_telephony(
            logger.error("Unable to detect provider for webhook")
            return generic_hangup_response()

-        normalized_data = normalize_webhook_data(provider_class, webhook_data)
+        normalized_data = normalize_webhook_data(provider_class, webhook_data, headers)

        logger.info(f"Inbound call - Provider: {normalized_data.provider}")
        logger.info(f"Normalized data: {normalized_data}")
@ -870,20 +900,8 @@ async def handle_inbound_telephony(
            logger.error(f"Request validation failed: {error_type}")
            return provider_class.generate_validation_error_response(error_type)

-        # Check quota before processing (apply per-workflow model_overrides).
+        # Create workflow run.
        user_id = workflow_context["user_id"]
-        quota_result = await check_dograh_quota_by_user_id(
-            user_id, workflow_id=workflow_id
-        )
-        if not quota_result.has_quota:
-            logger.warning(
-                f"User {user_id} has exceeded quota for inbound calls: {quota_result.error_message}"
-            )
-            return provider_class.generate_validation_error_response(
-                TelephonyError.QUOTA_EXCEEDED
-            )
-
-        # Create workflow run
        workflow_run_id = await _create_inbound_workflow_run(
            workflow_id,
            workflow_context["user_id"],
@ -892,6 +910,17 @@ async def handle_inbound_telephony(
            telephony_configuration_id=workflow_context["telephony_configuration_id"],
            from_phone_number_id=workflow_context.get("from_phone_number_id"),
        )
+        quota_result = await authorize_workflow_run_start(
+            workflow_id=workflow_id,
+            workflow_run_id=workflow_run_id,
+        )
+        if not quota_result.has_quota:
+            logger.warning(
+                f"User {user_id} has exceeded quota for inbound calls: {quota_result.error_message}"
+            )
+            return provider_class.generate_validation_error_response(
+                TelephonyError.QUOTA_EXCEEDED
+            )

        # Generate response URLs
        backend_endpoint, wss_backend_endpoint = await get_backend_endpoints()
--- a/api/routes/tool.py
+++ b/api/routes/tool.py
@ -1,303 +1,68 @@
 """API routes for managing tools."""

-import asyncio
-import re
-from datetime import datetime
-from typing import Annotated, Any, Dict, List, Literal, Optional, Union
+from typing import List, Optional

 from fastapi import APIRouter, Depends, HTTPException
-from loguru import logger
-from pydantic import BaseModel, Field, field_validator

 from api.db import db_client
 from api.db.models import UserModel
-from api.enums import PostHogEvent, ToolCategory, ToolStatus
+from api.enums import ToolCategory, ToolStatus
+from api.schemas.tool import (
+    CalculatorToolDefinition,
+    CreatedByResponse,
+    CreateToolRequest,
+    EndCallConfig,
+    EndCallToolDefinition,
+    HttpApiConfig,
+    HttpApiToolDefinition,
+    McpRefreshResponse,
+    McpToolConfig,
+    McpToolDefinition,
+    PresetToolParameter,
+    ToolDefinition,
+    ToolParameter,
+    ToolResponse,
+    TransferCallConfig,
+    TransferCallToolDefinition,
+    UpdateToolRequest,
+)
 from api.sdk_expose import sdk_expose
 from api.services.auth.depends import get_user
-from api.services.posthog_client import capture_event
-from api.services.workflow.mcp_tool_session import discover_mcp_tools
-from api.services.workflow.tools.mcp_tool import (
-    McpDefinitionError,
-    validate_mcp_definition,
+from api.services.tool_management import (
+    ToolManagementError,
+    build_tool_response,
+    create_tool_for_user,
+    refresh_mcp_tool_for_user,
+    validate_tool_credential_references,
 )
-from api.services.workflow.tools.mcp_tool import (
-    McpToolConfig as SharedMcpToolConfig,
-)
-from api.services.workflow.tools.mcp_tool import (
-    McpToolDefinition as SharedMcpToolDefinition,
+from api.services.tool_management import (
+    populate_discovered_tools as _populate_discovered_tools,
 )

 router = APIRouter(prefix="/tools")

-McpToolConfig = SharedMcpToolConfig
-McpToolDefinition = SharedMcpToolDefinition
-
-
-# Request/Response schemas
-class ToolParameter(BaseModel):
-    """A parameter that the tool accepts."""
-
-    name: str = Field(description="Parameter name (used as key in request body)")
-    type: str = Field(description="Parameter type: string, number, or boolean")
-    description: str = Field(description="Description of what this parameter is for")
-    required: bool = Field(
-        default=True, description="Whether this parameter is required"
-    )
-
-
-class PresetToolParameter(BaseModel):
-    """A parameter injected by Dograh at runtime."""
-
-    name: str = Field(description="Parameter name (used as key in request body)")
-    type: str = Field(description="Parameter type: string, number, or boolean")
-    value_template: str = Field(
-        description="Fixed value or template, e.g. {{initial_context.phone_number}}"
-    )
-    required: bool = Field(
-        default=True,
-        description="Whether the parameter must resolve to a non-empty value",
-    )
-
-
-class HttpApiConfig(BaseModel):
-    """Configuration for HTTP API tools."""
-
-    method: str = Field(description="HTTP method (GET, POST, PUT, PATCH, DELETE)")
-    url: str = Field(description="Target URL")
-    headers: Optional[Dict[str, str]] = Field(
-        default=None, description="Static headers to include"
-    )
-    credential_uuid: Optional[str] = Field(
-        default=None, description="Reference to ExternalCredentialModel for auth"
-    )
-    parameters: Optional[List[ToolParameter]] = Field(
-        default=None, description="Parameters that the tool accepts from LLM"
-    )
-    preset_parameters: Optional[List[PresetToolParameter]] = Field(
-        default=None,
-        description="Parameters injected by Dograh from fixed values or workflow context templates",
-    )
-    timeout_ms: Optional[int] = Field(
-        default=5000, description="Request timeout in milliseconds"
-    )
-    customMessage: Optional[str] = Field(
-        default=None, description="Custom message to play after tool execution"
-    )
-    customMessageType: Optional[Literal["text", "audio"]] = Field(
-        default=None, description="Type of custom message: text or audio"
-    )
-    customMessageRecordingId: Optional[str] = Field(
-        default=None, description="Recording ID for audio custom message"
-    )
-
-
-class EndCallConfig(BaseModel):
-    """Configuration for End Call tools."""
-
-    messageType: Literal["none", "custom", "audio"] = Field(
-        default="none", description="Type of goodbye message"
-    )
-    customMessage: Optional[str] = Field(
-        default=None, description="Custom message to play before ending the call"
-    )
-    audioRecordingId: Optional[str] = Field(
-        default=None, description="Recording ID for audio goodbye message"
-    )
-    endCallReason: bool = Field(
-        default=False,
-        description="When enabled, LLM must provide a reason for ending the call. "
-        "The reason is set as call disposition and added to call tags.",
-    )
-    endCallReasonDescription: Optional[str] = Field(
-        default=None,
-        description="Description shown to the LLM for the reason parameter. "
-        "Used only when endCallReason is enabled.",
-    )
-
-
-class TransferCallConfig(BaseModel):
-    """Configuration for Transfer Call tools."""
-
-    destination: str = Field(
-        description="Phone number or SIP endpoint to transfer the call to (E.164 format e.g., +1234567890, or SIP endpoint e.g., PJSIP/1234)"
-    )
-    messageType: Literal["none", "custom", "audio"] = Field(
-        default="none", description="Type of message to play before transfer"
-    )
-    customMessage: Optional[str] = Field(
-        default=None, description="Custom message to play before transferring the call"
-    )
-    audioRecordingId: Optional[str] = Field(
-        default=None, description="Recording ID for audio message before transfer"
-    )
-    timeout: int = Field(
-        default=30,
-        ge=5,
-        le=120,
-        description="Maximum time in seconds to wait for destination to answer (5-120 seconds)",
-    )
-
-    @field_validator("destination")
-    @classmethod
-    def validate_destination(cls, v: str) -> str:
-        """Validate that destination is a valid E.164 phone number or SIP endpoint."""
-        # Allow empty string for initial creation (like HTTP API tools with empty URL)
-        if not v.strip():
-            return v
-
-        # E.164 format: +[1-9]\d{1,14}
-        e164_pattern = r"^\+[1-9]\d{1,14}$"
-
-        # SIP endpoint format: PJSIP/extension or SIP/extension
-        sip_pattern = r"^(PJSIP|SIP)/[\w\-\.@]+$"
-
-        is_valid_e164 = re.match(e164_pattern, v)
-        is_valid_sip = re.match(sip_pattern, v, re.IGNORECASE)
-
-        if not (is_valid_e164 or is_valid_sip):
-            raise ValueError(
-                "Destination must be a valid E.164 phone number (e.g., +1234567890) or SIP endpoint (e.g., PJSIP/1234)"
-            )
-        return v
-
-
-class HttpApiToolDefinition(BaseModel):
-    """Tool definition for HTTP API tools."""
-
-    schema_version: int = Field(default=1, description="Schema version")
-    type: Literal["http_api"] = Field(description="Tool type")
-    config: HttpApiConfig = Field(description="HTTP API configuration")
-
-
-class EndCallToolDefinition(BaseModel):
-    """Tool definition for End Call tools."""
-
-    schema_version: int = Field(default=1, description="Schema version")
-    type: Literal["end_call"] = Field(description="Tool type")
-    config: EndCallConfig = Field(description="End Call configuration")
-
-
-class TransferCallToolDefinition(BaseModel):
-    """Tool definition for Transfer Call tools."""
-
-    schema_version: int = Field(default=1, description="Schema version")
-    type: Literal["transfer_call"] = Field(description="Tool type")
-    config: TransferCallConfig = Field(description="Transfer Call configuration")
-
-
-class CalculatorToolDefinition(BaseModel):
-    """Tool definition for Calculator tools (no configuration needed)."""
-
-    schema_version: int = Field(default=1, description="Schema version")
-    type: Literal["calculator"] = Field(description="Tool type")
-
-
-# Union type for tool definitions - Pydantic will discriminate based on 'type' field
-ToolDefinition = Annotated[
-    Union[
-        HttpApiToolDefinition,
-        EndCallToolDefinition,
-        TransferCallToolDefinition,
-        CalculatorToolDefinition,
-        McpToolDefinition,
-    ],
-    Field(discriminator="type"),
+__all__ = [
+    "CalculatorToolDefinition",
+    "CreateToolRequest",
+    "CreatedByResponse",
+    "EndCallConfig",
+    "EndCallToolDefinition",
+    "HttpApiConfig",
+    "HttpApiToolDefinition",
+    "McpRefreshResponse",
+    "McpToolConfig",
+    "McpToolDefinition",
+    "PresetToolParameter",
+    "ToolDefinition",
+    "ToolParameter",
+    "ToolResponse",
+    "TransferCallConfig",
+    "TransferCallToolDefinition",
+    "UpdateToolRequest",
+    "_populate_discovered_tools",
 ]


-class CreateToolRequest(BaseModel):
-    """Request schema for creating a tool."""
-
-    name: str = Field(max_length=255)
-    description: Optional[str] = None
-    category: str = Field(default=ToolCategory.HTTP_API.value)
-    icon: Optional[str] = Field(default="globe", max_length=50)
-    icon_color: Optional[str] = Field(default="#3B82F6", max_length=7)
-    definition: ToolDefinition
-
-    @field_validator("category")
-    @classmethod
-    def validate_category(cls, v: str) -> str:
-        """Validate that category is a valid ToolCategory value."""
-        valid_categories = [c.value for c in ToolCategory]
-        if v not in valid_categories:
-            raise ValueError(
-                f"Invalid category '{v}'. Must be one of: {', '.join(valid_categories)}"
-            )
-        return v
-
-
-class UpdateToolRequest(BaseModel):
-    """Request schema for updating a tool."""
-
-    name: Optional[str] = Field(default=None, max_length=255)
-    description: Optional[str] = None
-    icon: Optional[str] = Field(default=None, max_length=50)
-    icon_color: Optional[str] = Field(default=None, max_length=7)
-    definition: Optional[ToolDefinition] = None
-    status: Optional[str] = None
-
-
-class CreatedByResponse(BaseModel):
-    """Response schema for the user who created a tool."""
-
-    id: int
-    provider_id: str
-
-
-class ToolResponse(BaseModel):
-    """Response schema for a tool."""
-
-    id: int
-    tool_uuid: str
-    name: str
-    description: Optional[str]
-    category: str
-    icon: Optional[str]
-    icon_color: Optional[str]
-    status: str
-    definition: Dict[str, Any]
-    created_at: datetime
-    updated_at: Optional[datetime]
-    created_by: Optional[CreatedByResponse] = None
-
-    class Config:
-        from_attributes = True
-
-
-class McpRefreshResponse(BaseModel):
-    """Result of re-discovering an MCP server's tool catalog."""
-
-    tool_uuid: str
-    discovered_tools: list = Field(default_factory=list)
-    error: Optional[str] = None
-
-
-def build_tool_response(tool, include_created_by: bool = False) -> ToolResponse:
-    """Build a response from a tool model."""
-    created_by = None
-    if include_created_by and tool.created_by_user:
-        created_by = CreatedByResponse(
-            id=tool.created_by_user.id,
-            provider_id=tool.created_by_user.provider_id,
-        )
-
-    return ToolResponse(
-        id=tool.id,
-        tool_uuid=tool.tool_uuid,
-        name=tool.name,
-        description=tool.description,
-        category=tool.category,
-        icon=tool.icon,
-        icon_color=tool.icon_color,
-        status=tool.status,
-        definition=tool.definition,
-        created_at=tool.created_at,
-        updated_at=tool.updated_at,
-        created_by=created_by,
-    )
-
-
 def validate_category(category: str) -> None:
    """Validate that the category is valid."""
    valid_categories = [c.value for c in ToolCategory]
@ -361,53 +126,13 @@ async def list_tools(
    return [build_tool_response(tool) for tool in tools]


-async def _fetch_credential(credential_uuid: Optional[str], organization_id: int):
-    """Best-effort credential lookup for MCP auth. A missing/failed credential
-    degrades to ``None`` (unauthenticated) rather than failing the request."""
-    if not credential_uuid:
-        return None
-    try:
-        return await db_client.get_credential_by_uuid(credential_uuid, organization_id)
-    except Exception as e:  # noqa: BLE001
-        logger.warning(f"MCP: credential fetch failed: {e}")
-        return None
-
-
-async def _populate_discovered_tools(definition: dict, *, organization_id: int) -> dict:
-    """Best-effort: for an MCP definition, connect to the server, list its
-    tools, and overwrite ``config.discovered_tools``. Never raises and never
-    blocks tool save — a dead server yields ``discovered_tools: []``. Non-MCP
-    definitions pass through untouched."""
-    if not isinstance(definition, dict) or definition.get("type") != "mcp":
-        return definition
-    try:
-        cfg = validate_mcp_definition(definition)
-    except McpDefinitionError:
-        return definition
-
-    credential = await _fetch_credential(cfg.get("credential_uuid"), organization_id)
-
-    # Run discovery in an isolated asyncio task so an anyio cancel-scope
-    # CancelledError doesn't bleed into the parent task and corrupt the
-    # subsequent DB write. _run() never raises (degrades to []).
-    async def _run() -> list:
-        try:
-            return await discover_mcp_tools(
-                url=cfg["url"],
-                credential=credential,
-                timeout_secs=cfg["timeout_secs"],
-                sse_read_timeout_secs=cfg["sse_read_timeout_secs"],
-            )
-        except BaseException as e:  # noqa: BLE001
-            logger.warning(f"MCP discovery failed; caching empty list: {e}")
-            return []
-
-    discovered = await asyncio.ensure_future(_run())
-    definition["config"]["discovered_tools"] = discovered
-    return definition
-
-
-@router.post("/")
+@router.post(
+    "/",
+    **sdk_expose(
+        method="create_tool",
+        description="Create a reusable tool for the authenticated organization.",
+    ),
+)
 async def create_tool(
    request: CreateToolRequest,
    user: UserModel = Depends(get_user),
@ -421,40 +146,10 @@ async def create_tool(
    Returns:
        The created tool
    """
-    if not user.selected_organization_id:
-        raise HTTPException(
-            status_code=400, detail="No organization selected for the user"
-        )
-
-    validate_category(request.category)
-
-    definition = await _populate_discovered_tools(
-        request.definition.model_dump(),
-        organization_id=user.selected_organization_id,
-    )
-
-    tool = await db_client.create_tool(
-        organization_id=user.selected_organization_id,
-        user_id=user.id,
-        name=request.name,
-        definition=definition,
-        category=request.category,
-        description=request.description,
-        icon=request.icon,
-        icon_color=request.icon_color,
-    )
-
-    capture_event(
-        distinct_id=str(user.provider_id),
-        event=PostHogEvent.TOOL_CREATED,
-        properties={
-            "tool_name": request.name,
-            "tool_category": request.category,
-            "organization_id": user.selected_organization_id,
-        },
-    )
-
-    return build_tool_response(tool)
+    try:
+        return await create_tool_for_user(request, user, source="api")
+    except ToolManagementError as e:
+        raise HTTPException(status_code=e.status_code, detail=e.message) from e


@router.get("/{tool_uuid}")
@ -494,57 +189,10 @@ async def refresh_mcp_tools(
    """Re-discover an MCP tool's server catalog and overwrite the cached
    ``definition.config.discovered_tools``. Server down → 200 with error
    (cache not overwritten on transient failure)."""
-    if not user.selected_organization_id:
-        raise HTTPException(
-            status_code=400, detail="No organization selected for the user"
-        )
-
-    tool = await db_client.get_tool_by_uuid(
-        tool_uuid, user.selected_organization_id, include_archived=True
-    )
-    if not tool:
-        raise HTTPException(status_code=404, detail="Tool not found")
-    if tool.category != ToolCategory.MCP.value:
-        raise HTTPException(status_code=400, detail="Tool is not an MCP tool")
-
    try:
-        cfg = validate_mcp_definition(tool.definition)
-    except McpDefinitionError as e:
-        raise HTTPException(status_code=400, detail=f"Invalid MCP definition: {e}")
-
-    credential = await _fetch_credential(
-        cfg.get("credential_uuid"), user.selected_organization_id
-    )
-
-    try:
-        discovered = await discover_mcp_tools(
-            url=cfg["url"],
-            credential=credential,
-            timeout_secs=cfg["timeout_secs"],
-            sse_read_timeout_secs=cfg["sse_read_timeout_secs"],
-        )
-    except Exception as e:  # noqa: BLE001
-        logger.warning(f"MCP refresh discovery failed: {e}")
-        discovered = []
-
-    if not discovered:
-        error = (
-            f"Could not reach the MCP server at {cfg['url']} "
-            f"(or it exposes no tools). Previously cached list retained."
-        )
-        # Do NOT clobber a previously-good cache with [] on a transient outage.
-        return McpRefreshResponse(tool_uuid=tool_uuid, discovered_tools=[], error=error)
-
-    new_def = dict(tool.definition or {})
-    new_def["config"] = {**new_def.get("config", {}), "discovered_tools": discovered}
-    await db_client.update_tool(
-        tool_uuid=tool_uuid,
-        organization_id=user.selected_organization_id,
-        definition=new_def,
-    )
-    return McpRefreshResponse(
-        tool_uuid=tool_uuid, discovered_tools=discovered, error=None
-    )
+        return await refresh_mcp_tool_for_user(tool_uuid, user)
+    except ToolManagementError as e:
+        raise HTTPException(status_code=e.status_code, detail=e.message) from e


@router.put("/{tool_uuid}")
@ -571,14 +219,20 @@ async def update_tool(
    if request.status:
        validate_status(request.status)

-    definition = (
-        await _populate_discovered_tools(
-            request.definition.model_dump(),
-            organization_id=user.selected_organization_id,
-        )
-        if request.definition
-        else None
-    )
+    definition = None
+    if request.definition:
+        definition = request.definition.model_dump()
+        try:
+            await validate_tool_credential_references(
+                definition,
+                organization_id=user.selected_organization_id,
+            )
+            definition = await _populate_discovered_tools(
+                definition,
+                organization_id=user.selected_organization_id,
+            )
+        except ToolManagementError as e:
+            raise HTTPException(status_code=e.status_code, detail=e.message) from e

    tool = await db_client.update_tool(
        tool_uuid=tool_uuid,
--- a/api/routes/user.py
+++ b/api/routes/user.py
@ -9,7 +9,11 @@ from api.db import db_client
 from api.db.models import (
    UserModel,
 )
+from api.schemas.onboarding_state import OnboardingState, OnboardingStateUpdate
 from api.services.auth.depends import get_user
+from api.services.configuration.ai_model_configuration import (
+    get_resolved_ai_model_configuration,
+)
 from api.services.configuration.check_validity import (
    APIKeyStatusResponse,
    UserConfigurationValidator,
@ -19,6 +23,14 @@ from api.services.configuration.masking import check_for_masked_keys, mask_user_
 from api.services.configuration.merge import merge_user_configurations
 from api.services.configuration.registry import REGISTRY, ServiceType
 from api.services.mps_service_key_client import mps_service_key_client
+from api.services.organization_preferences import (
+    get_organization_preferences,
+    upsert_organization_preferences,
+)
+from api.services.user_onboarding import (
+    get_onboarding_state,
+    update_onboarding_state,
+)

 router = APIRouter(prefix="/user")

@ -91,8 +103,17 @@ class UserConfigurationRequestResponseSchema(BaseModel):
 async def get_user_configurations(
    user: UserModel = Depends(get_user),
 ) -> UserConfigurationRequestResponseSchema:
-    user_configurations = await db_client.get_user_configurations(user.id)
-    masked_config = mask_user_config(user_configurations)
+    resolved_config = await get_resolved_ai_model_configuration(
+        user_id=user.id,
+        organization_id=user.selected_organization_id,
+    )
+    masked_config = mask_user_config(resolved_config.effective)
+    if user.selected_organization_id:
+        preferences = await get_organization_preferences(user.selected_organization_id)
+        if preferences.test_phone_number is not None:
+            masked_config["test_phone_number"] = preferences.test_phone_number
+        if preferences.timezone is not None:
+            masked_config["timezone"] = preferences.timezone

    # Add organization pricing info if available
    if user.selected_organization_id:
@ -118,34 +139,61 @@ async def update_user_configurations(

    # Remove organization_pricing from incoming dict as it's read-only
    incoming_dict.pop("organization_pricing", None)
+    preferences_update = {
+        key: incoming_dict.pop(key)
+        for key in ("test_phone_number", "timezone")
+        if key in incoming_dict
+    }

-    # Merge via helper
-    try:
-        user_configurations = merge_user_configurations(existing_config, incoming_dict)
-    except ValidationError as e:
-        raise HTTPException(status_code=422, detail=str(e))
+    if incoming_dict:
+        # Merge via helper
+        try:
+            user_configurations = merge_user_configurations(
+                existing_config, incoming_dict
+            )
+        except ValidationError as e:
+            raise HTTPException(status_code=422, detail=str(e))

-    try:
-        check_for_masked_keys(user_configurations)
-    except ValueError as e:
-        raise HTTPException(status_code=400, detail=str(e))
+        try:
+            check_for_masked_keys(user_configurations)
+        except ValueError as e:
+            raise HTTPException(status_code=400, detail=str(e))

-    try:
-        validator = UserConfigurationValidator()
-        await validator.validate(
-            user_configurations,
-            organization_id=user.selected_organization_id,
-            created_by=user.provider_id,
+        try:
+            validator = UserConfigurationValidator()
+            await validator.validate(
+                user_configurations,
+                organization_id=user.selected_organization_id,
+                created_by=user.provider_id,
+            )
+        except ValueError as e:
+            raise HTTPException(status_code=422, detail=e.args[0])
+
+        user_configurations = await db_client.update_user_configuration(
+            user.id, user_configurations
        )
-    except ValueError as e:
-        raise HTTPException(status_code=422, detail=e.args[0])
+    else:
+        user_configurations = existing_config

-    user_configurations = await db_client.update_user_configuration(
-        user.id, user_configurations
-    )
+    if user.selected_organization_id and preferences_update:
+        preferences = await get_organization_preferences(user.selected_organization_id)
+        if "test_phone_number" in preferences_update:
+            preferences.test_phone_number = preferences_update["test_phone_number"]
+        if "timezone" in preferences_update:
+            preferences.timezone = preferences_update["timezone"]
+        await upsert_organization_preferences(
+            user.selected_organization_id,
+            preferences,
+        )

    # Return masked version of updated config
    masked_config = mask_user_config(user_configurations)
+    if user.selected_organization_id:
+        preferences = await get_organization_preferences(user.selected_organization_id)
+        if preferences.test_phone_number is not None:
+            masked_config["test_phone_number"] = preferences.test_phone_number
+        if preferences.timezone is not None:
+            masked_config["timezone"] = preferences.timezone

    # Add organization pricing info if available
    if user.selected_organization_id:
@ -160,12 +208,31 @@ async def update_user_configurations(
    return masked_config


+@router.get("/onboarding-state")
+async def get_user_onboarding_state(
+    user: UserModel = Depends(get_user),
+) -> OnboardingState:
+    return await get_onboarding_state(user.id)
+
+
+@router.put("/onboarding-state")
+async def update_user_onboarding_state(
+    request: OnboardingStateUpdate,
+    user: UserModel = Depends(get_user),
+) -> OnboardingState:
+    return await update_onboarding_state(user.id, request)
+
+
@router.get("/configurations/user/validate")
 async def validate_user_configurations(
    validity_ttl_seconds: int = Query(default=60, ge=0, le=86400),
    user: UserModel = Depends(get_user),
 ) -> APIKeyStatusResponse:
-    configurations = await db_client.get_user_configurations(user.id)
+    resolved_config = await get_resolved_ai_model_configuration(
+        user_id=user.id,
+        organization_id=user.selected_organization_id,
+    )
+    configurations = resolved_config.effective

    if (
        configurations.last_validated_at
@ -321,9 +388,18 @@ class VoiceInfo(BaseModel):
    preview_url: Optional[str] = None


+class VoiceFacets(BaseModel):
+    """Distinct selector values across a provider's full voice catalog."""
+
+    genders: List[str] = []
+    accents: List[str] = []
+    languages: List[str] = []
+
+
 class VoicesResponse(BaseModel):
    provider: str
    voices: List[VoiceInfo]
+    facets: Optional[VoiceFacets] = None


@router.get("/configurations/voices/{provider}")
@ -331,6 +407,9 @@ async def get_voices(
    provider: TTSProvider,
    model: Optional[str] = None,
    language: Optional[str] = None,
+    q: Optional[str] = None,
+    gender: Optional[str] = None,
+    accent: Optional[str] = None,
    user: UserModel = Depends(get_user),
 ) -> VoicesResponse:
    """Get available voices for a TTS provider."""
@ -339,12 +418,16 @@ async def get_voices(
            provider=provider,
            model=model,
            language=language,
+            q=q,
+            gender=gender,
+            accent=accent,
            organization_id=user.selected_organization_id,
            created_by=user.provider_id,
        )
        return VoicesResponse(
            provider=result.get("provider", provider),
            voices=[VoiceInfo(**voice) for voice in result.get("voices", [])],
+            facets=result.get("facets"),
        )
    except Exception as e:
        logger.error(f"Failed to fetch voices for {provider}: {e}")
--- a/api/routes/webrtc_signaling.py
+++ b/api/routes/webrtc_signaling.py
@ -19,7 +19,7 @@ import ipaddress
 import os
 from datetime import UTC, datetime
 from enum import Enum
-from typing import Dict, List, Optional
+from typing import Dict, List, Optional, Set

 from aiortc import RTCIceServer
 from aiortc.sdp import candidate_from_sdp
@ -45,7 +45,7 @@ from api.services.pipecat.ws_sender_registry import (
    register_ws_sender,
    unregister_ws_sender,
 )
-from api.services.quota_service import check_dograh_quota
+from api.services.quota_service import authorize_workflow_run_start

 router = APIRouter(prefix="/ws")

@ -246,6 +246,74 @@ class SignalingManager:
    def __init__(self):
        self._connections: Dict[str, WebSocket] = {}
        self._peer_connections: Dict[str, SmallWebRTCConnection] = {}
+        self._connection_peer_ids: Dict[str, Set[str]] = {}
+        self._peer_connection_owners: Dict[str, str] = {}
+
+    def _track_peer_connection(
+        self, connection_id: str, pc_id: str, pc: SmallWebRTCConnection
+    ) -> None:
+        self._peer_connections[pc_id] = pc
+        self._peer_connection_owners[pc_id] = connection_id
+        self._connection_peer_ids.setdefault(connection_id, set()).add(pc_id)
+
+    def _forget_peer_connection(self, pc_id: str) -> Optional[str]:
+        connection_id = self._peer_connection_owners.pop(pc_id, None)
+        self._peer_connections.pop(pc_id, None)
+
+        if connection_id:
+            peer_ids = self._connection_peer_ids.get(connection_id)
+            if peer_ids is not None:
+                peer_ids.discard(pc_id)
+                if not peer_ids:
+                    self._connection_peer_ids.pop(connection_id, None)
+
+        return connection_id
+
+    async def _send_json_if_connected(
+        self, websocket: WebSocket, message: dict
+    ) -> bool:
+        if websocket.application_state != WebSocketState.CONNECTED:
+            return False
+
+        try:
+            await websocket.send_json(message)
+            return True
+        except Exception as e:
+            logger.debug(f"Failed to send signaling WebSocket message: {e}")
+            return False
+
+    async def _close_websocket_if_connected(
+        self, websocket: WebSocket, code: int = 1000, reason: str = ""
+    ) -> None:
+        if websocket.application_state != WebSocketState.CONNECTED:
+            return
+
+        try:
+            await websocket.close(code=code, reason=reason)
+        except Exception as e:
+            logger.debug(f"Failed to close signaling WebSocket: {e}")
+
+    async def _notify_call_ended_and_close_websocket(
+        self,
+        websocket: WebSocket,
+        workflow_run_id: int,
+        pc_id: str,
+        reason: str,
+    ) -> None:
+        await self._send_json_if_connected(
+            websocket,
+            {
+                "type": "call-ended",
+                "payload": {
+                    "workflow_run_id": workflow_run_id,
+                    "pc_id": pc_id,
+                    "reason": reason,
+                },
+            },
+        )
+        await self._close_websocket_if_connected(
+            websocket, code=1000, reason="call ended"
+        )

    async def handle_websocket(
        self,
@ -257,35 +325,51 @@ class SignalingManager:
        """Handle WebSocket connection for signaling."""
        await websocket.accept()
        connection_id = f"{workflow_id}:{workflow_run_id}:{user.id}"
-        self._connections[connection_id] = websocket
+        connection_key = f"{connection_id}:{id(websocket)}"
+        self._connections[connection_key] = websocket

        try:
            while True:
                message = await websocket.receive_json()
                await self._handle_message(
-                    websocket, message, workflow_id, workflow_run_id, user
+                    websocket,
+                    message,
+                    workflow_id,
+                    workflow_run_id,
+                    user,
+                    connection_key,
                )
        except WebSocketDisconnect:
            logger.info(f"WebSocket disconnected for {connection_id}")
        except Exception as e:
-            logger.error(f"WebSocket error for {connection_id}: {e}")
+            if websocket.application_state == WebSocketState.DISCONNECTED:
+                logger.info(f"WebSocket disconnected for {connection_id}")
+            else:
+                logger.error(f"WebSocket error for {connection_id}: {e}")
        finally:
            # Cleanup
-            self._connections.pop(connection_id, None)
+            self._connections.pop(connection_key, None)
+            peer_ids = list(self._connection_peer_ids.pop(connection_key, set()))

            # Unregister WebSocket sender for real-time feedback
            unregister_ws_sender(workflow_run_id)

-            # Clean up all peer connections for this workflow run
+            # Clean up peer connections owned by this WebSocket.
            # Note: In a WebSocket-based signaling approach (vs HTTP PATCH),
            # we maintain our own connection map instead of relying on
            # SmallWebRTCRequestHandler's _pcs_map. This is suitable for
            # multi-worker FastAPI deployments where state cannot be shared.
-            for pc_id in list(self._peer_connections.keys()):
+            for pc_id in peer_ids:
+                self._peer_connection_owners.pop(pc_id, None)
                pc = self._peer_connections.pop(pc_id, None)
                if pc:
-                    await pc.disconnect()
-                    logger.debug(f"Disconnected peer connection: {pc_id}")
+                    try:
+                        await pc.disconnect()
+                        logger.debug(f"Disconnected peer connection: {pc_id}")
+                    except Exception as e:
+                        logger.debug(
+                            f"Failed to disconnect peer connection {pc_id}: {e}"
+                        )

    async def _handle_message(
        self,
@ -294,17 +378,20 @@ class SignalingManager:
        workflow_id: int,
        workflow_run_id: int,
        user: UserModel,
+        connection_key: str,
    ):
        """Handle incoming WebSocket messages."""
        msg_type = message.get("type")
        payload = message.get("payload", {})

        if msg_type == "offer":
-            await self._handle_offer(ws, payload, workflow_id, workflow_run_id, user)
+            await self._handle_offer(
+                ws, payload, workflow_id, workflow_run_id, user, connection_key
+            )
        elif msg_type == "ice-candidate":
-            await self._handle_ice_candidate(ws, payload, workflow_run_id)
+            await self._handle_ice_candidate(payload, connection_key)
        elif msg_type == "renegotiate":
-            await self._handle_renegotiation(ws, payload, workflow_id, workflow_run_id)
+            await self._handle_renegotiation(ws, payload, connection_key)

    async def _handle_offer(
        self,
@ -313,6 +400,7 @@ class SignalingManager:
        workflow_id: int,
        workflow_run_id: int,
        user: UserModel,
+        connection_key: str,
    ):
        """Handle offer message and create answer with ICE trickling."""
        pc_id = payload.get("pc_id")
@ -320,6 +408,15 @@ class SignalingManager:
        type_ = payload.get("type")
        call_context_vars = payload.get("call_context_vars", {})

+        if not pc_id or not sdp or not type_:
+            await ws.send_json(
+                {
+                    "type": "error",
+                    "payload": {"message": "Missing offer fields"},
+                }
+            )
+            return
+
        # Set run context for logging and tracing. org_id must be set before
        # pc.initialize() so that aiortc's internal tasks inherit it.
        set_current_run_id(workflow_run_id)
@ -329,7 +426,11 @@ class SignalingManager:

        # Check Dograh quota before initiating the call (apply per-workflow
        # model_overrides so we evaluate the keys this workflow will use).
-        quota_result = await check_dograh_quota(user, workflow_id=workflow_id)
+        quota_result = await authorize_workflow_run_start(
+            workflow_id=workflow_id,
+            workflow_run_id=workflow_run_id,
+            actor_user=user,
+        )
        if not quota_result.has_quota:
            # Send error response for quota issues
            await ws.send_json(
@ -343,7 +444,16 @@ class SignalingManager:
            )
            return

-        if pc_id and pc_id in self._peer_connections:
+        if pc_id in self._peer_connections:
+            if self._peer_connection_owners.get(pc_id) != connection_key:
+                await ws.send_json(
+                    {
+                        "type": "error",
+                        "payload": {"message": "Peer connection already owned"},
+                    }
+                )
+                return
+
            # Reuse existing connection
            logger.info(f"Reusing existing connection for pc_id: {pc_id}")
            pc = self._peer_connections[pc_id]
@ -375,7 +485,7 @@ class SignalingManager:
            await pc.initialize(sdp=sdp, type=type_)

            # Store peer connection using client's pc_id
-            self._peer_connections[pc_id] = pc
+            self._track_peer_connection(connection_key, pc_id, pc)

            # Register WebSocket sender for real-time feedback
            async def ws_sender(message: dict):
@ -388,7 +498,16 @@ class SignalingManager:
            @pc.event_handler("closed")
            async def handle_disconnected(webrtc_connection: SmallWebRTCConnection):
                logger.info(f"PeerConnection closed: {webrtc_connection.pc_id}")
-                self._peer_connections.pop(webrtc_connection.pc_id, None)
+                owner_connection_id = self._forget_peer_connection(
+                    webrtc_connection.pc_id
+                )
+                if owner_connection_id == connection_key:
+                    await self._notify_call_ended_and_close_websocket(
+                        ws,
+                        workflow_run_id,
+                        webrtc_connection.pc_id,
+                        reason="peer_connection_closed",
+                    )

            # Start pipeline in background
            asyncio.create_task(
@ -417,9 +536,7 @@ class SignalingManager:
                }
            )

-    async def _handle_ice_candidate(
-        self, ws: WebSocket, payload: dict, workflow_run_id: int
-    ):
+    async def _handle_ice_candidate(self, payload: dict, connection_key: str):
        """Handle incoming ICE candidate from client.

        Uses SmallWebRTC's native ICE trickling support via add_ice_candidate().
@ -438,6 +555,9 @@ class SignalingManager:
        if not pc:
            logger.warning(f"No peer connection found for pc_id: {pc_id}")
            return
+        if self._peer_connection_owners.get(pc_id) != connection_key:
+            logger.warning(f"Ignoring ICE candidate for unowned pc_id: {pc_id}")
+            return

        if candidate_data:
            candidate_str = candidate_data.get("candidate", "")
@ -462,7 +582,7 @@ class SignalingManager:
            logger.debug(f"End of ICE candidates for pc_id: {pc_id}")

    async def _handle_renegotiation(
-        self, ws: WebSocket, payload: dict, workflow_id: int, workflow_run_id: int
+        self, ws: WebSocket, payload: dict, connection_key: str
    ):
        """Handle renegotiation request."""
        pc_id = payload.get("pc_id")
@ -475,6 +595,11 @@ class SignalingManager:
                {"type": "error", "payload": {"message": "Peer connection not found"}}
            )
            return
+        if self._peer_connection_owners.get(pc_id) != connection_key:
+            await ws.send_json(
+                {"type": "error", "payload": {"message": "Peer connection not found"}}
+            )
+            return

        pc = self._peer_connections[pc_id]
        await pc.renegotiate(sdp=sdp, type=type_, restart_pc=restart_pc)
@ -545,6 +670,20 @@ async def public_signaling_websocket(
        await websocket.close(code=1008, reason="Invalid embed token")
        return

+    # Enforce the embed token's allowed-domain policy on the public signaling
+    # path, mirroring the HTTP embed endpoints (issue #330). Without this a
+    # leaked or replayed session token could attach from an arbitrary origin.
+    from api.routes.public_embed import validate_origin
+
+    origin = websocket.headers.get("origin") or websocket.headers.get("referer", "")
+    if not validate_origin(origin, embed_token.allowed_domains or []):
+        logger.warning(
+            f"Domain validation failed for public signaling: {origin} "
+            f"not in {embed_token.allowed_domains}"
+        )
+        await websocket.close(code=1008, reason="Domain not allowed")
+        return
+
    # Create a minimal user object for compatibility with signaling manager
    # Use the embed token creator as the user
    user = await db_client.get_user_by_id(embed_token.created_by)
--- a/api/routes/workflow.py
+++ b/api/routes/workflow.py
@ -15,16 +15,30 @@ from api.db import db_client
 from api.db.agent_trigger_client import TriggerPathConflictError
 from api.db.models import UserModel
 from api.db.workflow_template_client import WorkflowTemplateClient
-from api.enums import CallType, PostHogEvent, StorageBackend
+from api.enums import CallType, PostHogEvent, StorageBackend, WorkflowStatus
+from api.schemas.ai_model_configuration import OrganizationAIModelConfigurationV2
 from api.schemas.workflow import WorkflowRunResponseSchema
 from api.sdk_expose import sdk_expose
 from api.services.auth.depends import get_user
+from api.services.configuration.ai_model_configuration import (
+    WORKFLOW_MODEL_CONFIGURATION_V2_OVERRIDE_KEY,
+    check_for_masked_keys_in_ai_model_configuration_v2,
+    compile_ai_model_configuration_v2,
+    convert_legacy_ai_model_configuration_to_v2,
+    get_resolved_ai_model_configuration,
+    merge_ai_model_configuration_v2_secrets,
+)
 from api.services.configuration.check_validity import UserConfigurationValidator
 from api.services.configuration.masking import (
+    mask_workflow_configurations,
    mask_workflow_definition,
    merge_workflow_api_keys,
 )
-from api.services.configuration.resolve import resolve_effective_config
+from api.services.configuration.merge import merge_workflow_configuration_secrets
+from api.services.configuration.resolve import (
+    enrich_overrides_with_api_keys,
+    resolve_effective_config,
+)
 from api.services.mps_service_key_client import mps_service_key_client
 from api.services.posthog_client import capture_event
 from api.services.reports import generate_workflow_report_csv
@ -32,6 +46,10 @@ from api.services.storage import storage_fs
 from api.services.workflow.dto import ReactFlowDTO, sanitize_workflow_definition
 from api.services.workflow.duplicate import duplicate_workflow
 from api.services.workflow.errors import ItemKind, WorkflowError
+from api.services.workflow.run_usage_response import (
+    format_public_cost_info,
+    format_public_usage_info,
+)
 from api.services.workflow.trigger_paths import (
    TriggerPathIssue,
    ensure_trigger_paths,
@ -40,7 +58,15 @@ from api.services.workflow.trigger_paths import (
    trigger_path_to_node_id,
    validate_trigger_paths,
 )
-from api.services.workflow.workflow_graph import WorkflowGraph
+from api.services.workflow.workflow_graph import (
+    WorkflowGraph,
+    validate_node_instance_constraints,
+)
+from api.utils.artifacts import artifact_url
+from api.utils.recording_artifacts import (
+    get_recording_storage_key,
+    has_recording_track,
+)

 router = APIRouter(prefix="/workflow")

@ -169,6 +195,27 @@ def _validation_errors_http_exception(
    )


+def _node_instance_validation_errors(
+    workflow_definition: Optional[dict],
+) -> list[WorkflowError]:
+    """Validate spec-driven max_instances without requiring a complete draft."""
+    if not workflow_definition:
+        return []
+    nodes = workflow_definition.get("nodes")
+    if not isinstance(nodes, list):
+        return []
+
+    node_types = [
+        node.get("type")
+        for node in nodes
+        if isinstance(node, dict) and isinstance(node.get("type"), str)
+    ]
+    return validate_node_instance_constraints(
+        node_types,
+        enforce_min_instances=False,
+    )
+
+
 class CallDispositionCodes(BaseModel):
    disposition_codes: list[str] = []

@ -361,6 +408,9 @@ async def create_workflow(
    trigger_path_issues = validate_trigger_paths(workflow_definition)
    if trigger_path_issues:
        raise _trigger_path_validation_http_exception(trigger_path_issues)
+    instance_errors = _node_instance_validation_errors(workflow_definition)
+    if instance_errors:
+        raise _validation_errors_http_exception(instance_errors)

    # Validate trigger path uniqueness BEFORE creating the workflow so we
    # don't leave an orphaned workflow record when the trigger conflicts.
@ -409,7 +459,9 @@ async def create_workflow(
        "current_definition_id": workflow.current_definition_id,
        "template_context_variables": workflow.template_context_variables,
        "call_disposition_codes": workflow.call_disposition_codes,
-        "workflow_configurations": workflow.workflow_configurations,
+        "workflow_configurations": mask_workflow_configurations(
+            workflow.workflow_configurations
+        ),
    }


@ -507,7 +559,9 @@ async def create_workflow_from_template(
            "current_definition_id": workflow.current_definition_id,
            "template_context_variables": workflow.template_context_variables,
            "call_disposition_codes": workflow.call_disposition_codes,
-            "workflow_configurations": workflow.workflow_configurations,
+            "workflow_configurations": mask_workflow_configurations(
+                workflow.workflow_configurations
+            ),
        }

    except HTTPException:
@ -551,6 +605,31 @@ async def get_workflow_count(
    )


+def _validate_status_filter(status: Optional[str]) -> List[str]:
+    """Parse and validate a workflow ``status`` query filter.
+
+    Accepts a single value or a comma-separated list. Returns the list of
+    validated status values (empty when no filter was supplied). Any value
+    outside the ``workflow_status`` enum raises 422 so the request fails as a
+    clean client error instead of a 500 from the Postgres enum cast.
+    """
+    if status is None or status == "":
+        return []
+    allowed = {s.value for s in WorkflowStatus}
+    requested = [s.strip() for s in status.split(",")]
+    invalid = sorted({s for s in requested if s not in allowed})
+    if invalid:
+        invalid_display = ["<empty>" if s == "" else s for s in invalid]
+        raise HTTPException(
+            status_code=422,
+            detail=(
+                f"Invalid workflow status filter: {invalid_display}. "
+                f"Allowed values: {sorted(allowed)}."
+            ),
+        )
+    return requested
+
+
@router.get(
    "/fetch",
    **sdk_expose(
@ -570,21 +649,22 @@ async def get_workflows(
    Returns a lightweight response with only essential fields for listing.
    Use GET /workflow/fetch/{workflow_id} to get full workflow details.
    """
-    # Handle comma-separated status values
-    if status and "," in status:
-        # Split comma-separated values and fetch workflows for each status
-        status_list = [s.strip() for s in status.split(",")]
+    statuses = _validate_status_filter(status)
+    if statuses:
+        # Fetch workflows for each requested status and combine the results.
        all_workflows = []
-        for status_value in status_list:
-            workflows = await db_client.get_all_workflows_for_listing(
-                organization_id=user.selected_organization_id, status=status_value
+        for status_value in statuses:
+            all_workflows.extend(
+                await db_client.get_all_workflows_for_listing(
+                    organization_id=user.selected_organization_id,
+                    status=status_value,
+                )
            )
-            all_workflows.extend(workflows)
        workflows = all_workflows
    else:
-        # Single status or no status filter
+        # No status filter
        workflows = await db_client.get_all_workflows_for_listing(
-            organization_id=user.selected_organization_id, status=status
+            organization_id=user.selected_organization_id, status=None
        )

    # Get run counts for all workflows in a single query
@ -652,7 +732,7 @@ async def get_workflow(
        "current_definition_id": workflow.current_definition_id,
        "template_context_variables": template_vars,
        "call_disposition_codes": workflow.call_disposition_codes,
-        "workflow_configurations": workflow_configs,
+        "workflow_configurations": mask_workflow_configurations(workflow_configs),
        "version_number": active_def.version_number if active_def else None,
        "version_status": active_def.status if active_def else None,
        "workflow_uuid": workflow.workflow_uuid,
@ -690,7 +770,9 @@ async def get_workflow_versions(
            created_at=v.created_at,
            published_at=v.published_at,
            workflow_json=mask_workflow_definition(v.workflow_json),
-            workflow_configurations=v.workflow_configurations,
+            workflow_configurations=mask_workflow_configurations(
+                v.workflow_configurations
+            ),
            template_context_variables=v.template_context_variables,
        )
        for v in versions
@ -775,7 +857,9 @@ async def create_workflow_draft(
        created_at=draft.created_at,
        published_at=draft.published_at,
        workflow_json=mask_workflow_definition(draft.workflow_json),
-        workflow_configurations=draft.workflow_configurations,
+        workflow_configurations=mask_workflow_configurations(
+            draft.workflow_configurations
+        ),
        template_context_variables=draft.template_context_variables,
    )

@ -789,10 +873,20 @@ async def get_workflows_summary(
    ),
 ) -> List[WorkflowSummaryResponse]:
    """Get minimal workflow information (id and name only) for all workflows"""
-    workflows = await db_client.get_all_workflows(
-        organization_id=user.selected_organization_id,
-        status=status,
-    )
+    statuses = _validate_status_filter(status)
+    if statuses:
+        workflows = []
+        for status_value in statuses:
+            workflows.extend(
+                await db_client.get_all_workflows(
+                    organization_id=user.selected_organization_id,
+                    status=status_value,
+                )
+            )
+    else:
+        workflows = await db_client.get_all_workflows(
+            organization_id=user.selected_organization_id, status=None
+        )
    return [
        WorkflowSummaryResponse(id=workflow.id, name=workflow.name)
        for workflow in workflows
@ -833,7 +927,9 @@ async def update_workflow_status(
            "current_definition_id": workflow.current_definition_id,
            "template_context_variables": workflow.template_context_variables,
            "call_disposition_codes": workflow.call_disposition_codes,
-            "workflow_configurations": workflow.workflow_configurations,
+            "workflow_configurations": mask_workflow_configurations(
+                workflow.workflow_configurations
+            ),
            "total_runs": run_count,
        }
    except ValueError as e:
@ -921,6 +1017,9 @@ async def update_workflow(
        trigger_path_issues = validate_trigger_paths(workflow_definition)
        if trigger_path_issues:
            raise _trigger_path_validation_http_exception(trigger_path_issues)
+        instance_errors = _node_instance_validation_errors(workflow_definition)
+        if instance_errors:
+            raise _validation_errors_http_exception(instance_errors, status_code=409)
        if workflow_definition:
            existing_workflow = await db_client.get_workflow(
                workflow_id, organization_id=user.selected_organization_id
@ -938,24 +1037,133 @@ async def update_workflow(
                    existing_def,
                )

-        # Validate model_overrides: resolve onto global config, then
-        # run the same validator used by the user-configurations endpoint.
-        if request.workflow_configurations and request.workflow_configurations.get(
-            "model_overrides"
+        # Validate model overrides. v2 uses a complete workflow-level model
+        # configuration; legacy v1 uses partial service overlays.
+        workflow_configurations = request.workflow_configurations
+        if workflow_configurations and workflow_configurations.get(
+            WORKFLOW_MODEL_CONFIGURATION_V2_OVERRIDE_KEY
        ):
-            user_config = await db_client.get_user_configurations(user.id)
-            try:
-                effective = resolve_effective_config(
-                    user_config,
-                    request.workflow_configurations["model_overrides"],
+            existing_workflow = await db_client.get_workflow(
+                workflow_id, organization_id=user.selected_organization_id
+            )
+            if existing_workflow is None:
+                raise HTTPException(
+                    status_code=404, detail=f"Workflow with id {workflow_id} not found"
                )
+            existing_draft = await db_client.get_draft_version(workflow_id)
+            existing_configs = (
+                existing_draft.workflow_configurations
+                if existing_draft
+                else existing_workflow.released_definition.workflow_configurations
+            )
+            existing_v2_override = (existing_configs or {}).get(
+                WORKFLOW_MODEL_CONFIGURATION_V2_OVERRIDE_KEY
+            )
+            try:
+                incoming_v2_override = (
+                    OrganizationAIModelConfigurationV2.model_validate(
+                        workflow_configurations[
+                            WORKFLOW_MODEL_CONFIGURATION_V2_OVERRIDE_KEY
+                        ]
+                    )
+                )
+                existing_v2_override_config = (
+                    OrganizationAIModelConfigurationV2.model_validate(
+                        existing_v2_override
+                    )
+                    if existing_v2_override
+                    else None
+                )
+                v2_override = merge_ai_model_configuration_v2_secrets(
+                    incoming_v2_override,
+                    existing_v2_override_config,
+                )
+                if existing_v2_override_config is None:
+                    resolved_config = await get_resolved_ai_model_configuration(
+                        user_id=user.id,
+                        organization_id=user.selected_organization_id,
+                    )
+                    v2_override = merge_ai_model_configuration_v2_secrets(
+                        v2_override,
+                        resolved_config.organization_configuration,
+                    )
+                check_for_masked_keys_in_ai_model_configuration_v2(v2_override)
+                effective = compile_ai_model_configuration_v2(v2_override)
                await UserConfigurationValidator().validate(
                    effective,
                    organization_id=user.selected_organization_id,
                    created_by=user.provider_id,
                )
+            except (ValidationError, ValueError) as e:
+                raise HTTPException(status_code=422, detail=str(e))
+            workflow_configurations = {
+                **workflow_configurations,
+                WORKFLOW_MODEL_CONFIGURATION_V2_OVERRIDE_KEY: v2_override.model_dump(
+                    mode="json",
+                    exclude_none=True,
+                ),
+            }
+            workflow_configurations.pop("model_overrides", None)
+        elif workflow_configurations and workflow_configurations.get("model_overrides"):
+            existing_workflow = await db_client.get_workflow(
+                workflow_id, organization_id=user.selected_organization_id
+            )
+            if existing_workflow is None:
+                raise HTTPException(
+                    status_code=404, detail=f"Workflow with id {workflow_id} not found"
+                )
+            existing_draft = await db_client.get_draft_version(workflow_id)
+            existing_configs = (
+                existing_draft.workflow_configurations
+                if existing_draft
+                else existing_workflow.released_definition.workflow_configurations
+            )
+            workflow_configurations = merge_workflow_configuration_secrets(
+                workflow_configurations,
+                existing_configs,
+            )
+            resolved_config = await get_resolved_ai_model_configuration(
+                user_id=user.id,
+                organization_id=user.selected_organization_id,
+            )
+            effective_config = resolved_config.effective
+            try:
+                enriched_overrides = enrich_overrides_with_api_keys(
+                    workflow_configurations["model_overrides"],
+                    effective_config,
+                )
+                effective = resolve_effective_config(
+                    effective_config, enriched_overrides
+                )
+                if resolved_config.source == "organization_v2":
+                    v2_override = convert_legacy_ai_model_configuration_to_v2(effective)
+                    await UserConfigurationValidator().validate(
+                        compile_ai_model_configuration_v2(v2_override),
+                        organization_id=user.selected_organization_id,
+                        created_by=user.provider_id,
+                    )
+                else:
+                    await UserConfigurationValidator().validate(
+                        effective,
+                        organization_id=user.selected_organization_id,
+                        created_by=user.provider_id,
+                    )
            except ValueError as e:
                raise HTTPException(status_code=422, detail=str(e))
+            if resolved_config.source == "organization_v2":
+                workflow_configurations = {
+                    **workflow_configurations,
+                    WORKFLOW_MODEL_CONFIGURATION_V2_OVERRIDE_KEY: v2_override.model_dump(
+                        mode="json",
+                        exclude_none=True,
+                    ),
+                }
+                workflow_configurations.pop("model_overrides", None)
+            else:
+                workflow_configurations = {
+                    **workflow_configurations,
+                    "model_overrides": enriched_overrides,
+                }

        # Reject upfront if any new trigger path collides with another
        # workflow's trigger — keeps the workflow record from
@ -978,7 +1186,7 @@ async def update_workflow(
            name=request.name,
            workflow_definition=workflow_definition,
            template_context_variables=request.template_context_variables,
-            workflow_configurations=request.workflow_configurations,
+            workflow_configurations=workflow_configurations,
            organization_id=user.selected_organization_id,
        )

@ -1014,7 +1222,7 @@ async def update_workflow(
            "current_definition_id": workflow.current_definition_id,
            "template_context_variables": template_vars,
            "call_disposition_codes": workflow.call_disposition_codes,
-            "workflow_configurations": workflow_configs,
+            "workflow_configurations": mask_workflow_configurations(workflow_configs),
            "version_number": active_def.version_number if active_def else None,
            "version_status": active_def.status if active_def else None,
        }
@ -1061,7 +1269,9 @@ async def duplicate_workflow_endpoint(
            "current_definition_id": workflow.current_definition_id,
            "template_context_variables": workflow.template_context_variables,
            "call_disposition_codes": workflow.call_disposition_codes,
-            "workflow_configurations": workflow.workflow_configurations,
+            "workflow_configurations": mask_workflow_configurations(
+                workflow.workflow_configurations
+            ),
        }
    except ValueError as e:
        raise HTTPException(status_code=404, detail=str(e))
@ -1113,6 +1323,20 @@ async def get_workflow_run(
    )
    if not run:
        raise HTTPException(status_code=404, detail="Workflow run not found")
+
+    public_access_token = run.public_access_token
+    user_recording_url = get_recording_storage_key(run.extra, "user")
+    bot_recording_url = get_recording_storage_key(run.extra, "bot")
+    has_user_recording = has_recording_track(run.extra, "user")
+    has_bot_recording = has_recording_track(run.extra, "bot")
+    if (
+        run.transcript_url
+        or run.recording_url
+        or has_user_recording
+        or has_bot_recording
+    ) and not public_access_token:
+        public_access_token = await db_client.ensure_public_access_token(run.id)
+
    return {
        "id": run.id,
        "workflow_id": run.workflow_id,
@ -1121,22 +1345,23 @@ async def get_workflow_run(
        "is_completed": run.is_completed,
        "transcript_url": run.transcript_url,
        "recording_url": run.recording_url,
-        "cost_info": {
-            "dograh_token_usage": (
-                run.cost_info.get("dograh_token_usage")
-                if run.cost_info and "dograh_token_usage" in run.cost_info
-                else round(float(run.cost_info.get("total_cost_usd", 0)) * 100, 2)
-                if run.cost_info and "total_cost_usd" in run.cost_info
-                else 0
-            ),
-            "call_duration_seconds": int(
-                round(run.cost_info.get("call_duration_seconds"))
-            )
-            if run.cost_info and run.cost_info.get("call_duration_seconds") is not None
-            else None,
-        }
-        if run.cost_info
-        else None,
+        "user_recording_url": user_recording_url,
+        "bot_recording_url": bot_recording_url,
+        "transcript_public_url": artifact_url(public_access_token, "transcript"),
+        "recording_public_url": artifact_url(public_access_token, "recording"),
+        "user_recording_public_url": (
+            artifact_url(public_access_token, "user_recording")
+            if has_user_recording
+            else None
+        ),
+        "bot_recording_public_url": (
+            artifact_url(public_access_token, "bot_recording")
+            if has_bot_recording
+            else None
+        ),
+        "public_access_token": public_access_token,
+        "cost_info": format_public_cost_info(run.cost_info, run.usage_info),
+        "usage_info": format_public_usage_info(run.usage_info),
        "created_at": run.created_at,
        "definition_id": run.definition_id,
        "initial_context": run.initial_context,
@ -1336,7 +1561,9 @@ async def duplicate_workflow_template(
        "current_definition_id": workflow.current_definition_id,
        "template_context_variables": workflow.template_context_variables,
        "call_disposition_codes": workflow.call_disposition_codes,
-        "workflow_configurations": workflow.workflow_configurations,
+        "workflow_configurations": mask_workflow_configurations(
+            workflow.workflow_configurations
+        ),
    }


--- a/api/routes/workflow_text_chat.py
+++ b/api/routes/workflow_text_chat.py
@ -9,8 +9,8 @@ from pydantic import BaseModel, Field
 from api.db import db_client
 from api.db.models import UserModel, WorkflowRunTextSessionModel
 from api.enums import WorkflowRunMode
-from api.services.auth.depends import get_user
-from api.services.quota_service import check_dograh_quota
+from api.services.auth.depends import get_user_with_selected_organization
+from api.services.quota_service import authorize_workflow_run_start
 from api.services.workflow.text_chat_session_service import (
    TextChatPendingTurnLostError,
    TextChatSessionExecutionError,
@ -96,14 +96,16 @@ def _revision_conflict_detail(e: Any) -> dict[str, Any]:
    }


-def _require_selected_organization_id(user: UserModel) -> int:
-    if user.selected_organization_id is None:
-        raise HTTPException(status_code=403, detail="Organization context is required")
-    return user.selected_organization_id
-
-
-async def _ensure_text_chat_quota(user: UserModel, workflow_id: int) -> None:
-    quota_result = await check_dograh_quota(user, workflow_id=workflow_id)
+async def _ensure_text_chat_quota(
+    user: UserModel,
+    workflow_id: int,
+    workflow_run_id: int,
+) -> None:
+    quota_result = await authorize_workflow_run_start(
+        workflow_id=workflow_id,
+        workflow_run_id=workflow_run_id,
+        actor_user=user,
+    )
    if not quota_result.has_quota:
        raise HTTPException(status_code=402, detail=quota_result.error_message)

@ -114,9 +116,8 @@ async def _load_text_session_or_404(
    user: UserModel,
 ) -> WorkflowRunTextSessionModel:
    set_current_run_id(run_id)
-    organization_id = _require_selected_organization_id(user)
    text_session = await db_client.get_workflow_run_text_session(
-        run_id, organization_id=organization_id
+        run_id, organization_id=user.selected_organization_id
    )
    if not text_session or not text_session.workflow_run:
        raise HTTPException(status_code=404, detail="Text chat session not found")
@ -158,11 +159,8 @@ async def _execute_pending_turn_response(
 async def create_text_chat_session(
    workflow_id: int,
    request: CreateTextChatSessionRequest,
-    user: UserModel = Depends(get_user),
+    user: UserModel = Depends(get_user_with_selected_organization),
 ) -> WorkflowRunTextSessionResponse:
-    organization_id = _require_selected_organization_id(user)
-    await _ensure_text_chat_quota(user, workflow_id)
-
    session_name = request.name or f"WR-TEXT-{uuid4().hex[:6].upper()}"
    try:
        workflow_run = await db_client.create_workflow_run(
@ -172,12 +170,13 @@ async def create_text_chat_session(
            user_id=user.id,
            initial_context=request.initial_context,
            use_draft=True,
-            organization_id=organization_id,
+            organization_id=user.selected_organization_id,
        )
    except ValueError as e:
        raise HTTPException(status_code=404, detail=str(e))

    set_current_run_id(workflow_run.id)
+    await _ensure_text_chat_quota(user, workflow_id, workflow_run.id)

    annotations = {
        "tester": {
@ -220,7 +219,7 @@ async def create_text_chat_session(
 async def get_text_chat_session(
    workflow_id: int,
    run_id: int,
-    user: UserModel = Depends(get_user),
+    user: UserModel = Depends(get_user_with_selected_organization),
 ) -> WorkflowRunTextSessionResponse:
    text_session = await _load_text_session_or_404(workflow_id, run_id, user)
    return _build_response(text_session)
@ -234,10 +233,10 @@ async def append_text_chat_message(
    workflow_id: int,
    run_id: int,
    request: AppendTextChatMessageRequest,
-    user: UserModel = Depends(get_user),
+    user: UserModel = Depends(get_user_with_selected_organization),
 ) -> WorkflowRunTextSessionResponse:
    text_session = await _load_text_session_or_404(workflow_id, run_id, user)
-    await _ensure_text_chat_quota(user, workflow_id)
+    await _ensure_text_chat_quota(user, workflow_id, run_id)

    try:
        text_session = await append_text_chat_user_message(
@ -264,7 +263,7 @@ async def rewind_text_chat_session(
    workflow_id: int,
    run_id: int,
    request: RewindTextChatSessionRequest,
-    user: UserModel = Depends(get_user),
+    user: UserModel = Depends(get_user_with_selected_organization),
 ) -> WorkflowRunTextSessionResponse:
    text_session = await _load_text_session_or_404(workflow_id, run_id, user)
    try:
--- a/api/schemas/ai_model_configuration.py
+++ b/api/schemas/ai_model_configuration.py
@ -0,0 +1,190 @@
+from __future__ import annotations
+
+from datetime import datetime
+from typing import Literal
+
+from pydantic import BaseModel, Field, model_validator
+
+from api.services.configuration.registry import (
+    DograhEmbeddingsConfiguration,
+    DograhLLMService,
+    DograhSTTService,
+    DograhTTSService,
+    EmbeddingsConfig,
+    LLMConfig,
+    RealtimeConfig,
+    ServiceProviders,
+    STTConfig,
+    TTSConfig,
+)
+
+DOGRAH_SPEED_MIN = 0.5
+DOGRAH_SPEED_MAX = 2.0
+DOGRAH_SPEED_STEP = 0.1
+DOGRAH_SPEED_OPTIONS: tuple[float, ...] = (0.8, 1.0, 1.2)
+DOGRAH_DEFAULT_VOICE = "default"
+DOGRAH_DEFAULT_LANGUAGE = "multi"
+
+
+class EffectiveAIModelConfiguration(BaseModel):
+    llm: LLMConfig | None = None
+    stt: STTConfig | None = None
+    tts: TTSConfig | None = None
+    embeddings: EmbeddingsConfig | None = None
+    realtime: RealtimeConfig | None = None
+    is_realtime: bool = False
+    managed_service_version: int | None = None
+    test_phone_number: str | None = None
+    timezone: str | None = None
+    last_validated_at: datetime | None = None
+
+    @model_validator(mode="before")
+    @classmethod
+    def strip_incomplete_realtime_when_disabled(cls, data):
+        """Skip realtime validation when is_realtime is False and api_key is missing."""
+        if isinstance(data, dict) and not data.get("is_realtime", False):
+            realtime = data.get("realtime")
+            if isinstance(realtime, dict) and not realtime.get("api_key"):
+                data.pop("realtime", None)
+        return data
+
+
+class DograhManagedAIModelConfiguration(BaseModel):
+    api_key: str
+    voice: str = DOGRAH_DEFAULT_VOICE
+    speed: float = Field(default=1.0, ge=DOGRAH_SPEED_MIN, le=DOGRAH_SPEED_MAX)
+    language: str = DOGRAH_DEFAULT_LANGUAGE
+
+
+class BYOKPipelineAIModelConfiguration(BaseModel):
+    llm: LLMConfig
+    tts: TTSConfig
+    stt: STTConfig
+    embeddings: EmbeddingsConfig | None = None
+
+    @model_validator(mode="after")
+    def reject_dograh_providers(self):
+        _reject_dograh_provider("llm", self.llm)
+        _reject_dograh_provider("tts", self.tts)
+        _reject_dograh_provider("stt", self.stt)
+        _reject_dograh_provider("embeddings", self.embeddings)
+        return self
+
+
+class BYOKRealtimeAIModelConfiguration(BaseModel):
+    realtime: RealtimeConfig
+    llm: LLMConfig
+    embeddings: EmbeddingsConfig | None = None
+
+    @model_validator(mode="after")
+    def reject_dograh_providers(self):
+        _reject_dograh_provider("llm", self.llm)
+        _reject_dograh_provider("embeddings", self.embeddings)
+        return self
+
+
+class BYOKAIModelConfiguration(BaseModel):
+    mode: Literal["pipeline", "realtime"]
+    pipeline: BYOKPipelineAIModelConfiguration | None = None
+    realtime: BYOKRealtimeAIModelConfiguration | None = None
+
+    @model_validator(mode="after")
+    def validate_selected_mode(self):
+        if self.mode == "pipeline" and self.pipeline is None:
+            raise ValueError("byok.pipeline is required when byok.mode is pipeline")
+        if self.mode == "realtime" and self.realtime is None:
+            raise ValueError("byok.realtime is required when byok.mode is realtime")
+        return self
+
+
+class OrganizationAIModelConfigurationV2(BaseModel):
+    version: Literal[2] = 2
+    mode: Literal["dograh", "byok"]
+    dograh: DograhManagedAIModelConfiguration | None = None
+    byok: BYOKAIModelConfiguration | None = None
+
+    @model_validator(mode="after")
+    def validate_selected_mode(self):
+        if self.mode == "dograh" and self.dograh is None:
+            raise ValueError("dograh configuration is required when mode is dograh")
+        if self.mode == "byok" and self.byok is None:
+            raise ValueError("byok configuration is required when mode is byok")
+        return self
+
+
+class OrganizationAIModelConfigurationResponse(BaseModel):
+    configuration: dict | None
+    effective_configuration: dict
+    source: Literal["organization_v2", "legacy_user_v1", "empty"]
+
+
+def compile_ai_model_configuration_v2(
+    configuration: OrganizationAIModelConfigurationV2,
+) -> EffectiveAIModelConfiguration:
+    if configuration.mode == "dograh":
+        if configuration.dograh is None:
+            raise ValueError("dograh configuration is required")
+        return _compile_dograh_configuration(configuration.dograh)
+
+    if configuration.byok is None:
+        raise ValueError("byok configuration is required")
+    if configuration.byok.mode == "pipeline":
+        if configuration.byok.pipeline is None:
+            raise ValueError("byok.pipeline is required")
+        pipeline = configuration.byok.pipeline
+        return EffectiveAIModelConfiguration(
+            llm=pipeline.llm,
+            tts=pipeline.tts,
+            stt=pipeline.stt,
+            embeddings=pipeline.embeddings,
+            is_realtime=False,
+        )
+
+    if configuration.byok.realtime is None:
+        raise ValueError("byok.realtime is required")
+    realtime = configuration.byok.realtime
+    return EffectiveAIModelConfiguration(
+        llm=realtime.llm,
+        realtime=realtime.realtime,
+        embeddings=realtime.embeddings,
+        is_realtime=True,
+    )
+
+
+def _compile_dograh_configuration(
+    configuration: DograhManagedAIModelConfiguration,
+) -> EffectiveAIModelConfiguration:
+    return EffectiveAIModelConfiguration(
+        llm=DograhLLMService(
+            provider=ServiceProviders.DOGRAH,
+            api_key=configuration.api_key,
+            model="default",
+        ),
+        tts=DograhTTSService(
+            provider=ServiceProviders.DOGRAH,
+            api_key=configuration.api_key,
+            model="default",
+            voice=configuration.voice,
+            speed=configuration.speed,
+        ),
+        stt=DograhSTTService(
+            provider=ServiceProviders.DOGRAH,
+            api_key=configuration.api_key,
+            model="default",
+            language=configuration.language,
+        ),
+        embeddings=DograhEmbeddingsConfiguration(
+            provider=ServiceProviders.DOGRAH,
+            api_key=configuration.api_key,
+            model="dograh_embedding_v1",
+        ),
+        is_realtime=False,
+        managed_service_version=2,
+    )
+
+
+def _reject_dograh_provider(section: str, service) -> None:
+    if service is None:
+        return
+    if getattr(service, "provider", None) == ServiceProviders.DOGRAH:
+        raise ValueError(f"BYOK {section} cannot use Dograh provider")
--- a/api/schemas/onboarding_state.py
+++ b/api/schemas/onboarding_state.py
@ -0,0 +1,47 @@
+from datetime import datetime
+
+from pydantic import BaseModel, Field
+
+
+class OnboardingState(BaseModel):
+    """Per-user onboarding state, stored under UserConfigurationKey.ONBOARDING.
+
+    Server-authoritative replacement for the browser-localStorage onboarding
+    store, so the post-signup gate and one-time tooltips hold across devices.
+    """
+
+    # Post-signup onboarding form gate: set once on submit/skip.
+    completed_at: datetime | None = None
+    skipped: bool = False
+    # One-time UI affordances (tooltip keys, milestone action keys). Kept as
+    # free-form strings — the UI owns the vocabulary.
+    seen_tooltips: list[str] = Field(default_factory=list)
+    completed_actions: list[str] = Field(default_factory=list)
+
+
+class OnboardingStateUpdate(BaseModel):
+    """Partial update merged into the stored state.
+
+    Scalars overwrite when supplied; list entries are unioned into the stored
+    lists, so concurrent updates (e.g. two tabs marking different tooltips)
+    don't drop each other's items.
+    """
+
+    completed_at: datetime | None = None
+    skipped: bool | None = None
+    seen_tooltips: list[str] | None = None
+    completed_actions: list[str] | None = None
+
+    def apply_to(self, state: OnboardingState) -> OnboardingState:
+        merged = state.model_copy(deep=True)
+        if self.completed_at is not None:
+            merged.completed_at = self.completed_at
+        if self.skipped is not None:
+            merged.skipped = self.skipped
+        for tooltip in self.seen_tooltips or []:
+            if tooltip not in merged.seen_tooltips:
+                merged.seen_tooltips.append(tooltip)
+        for action in self.completed_actions or []:
+            if action not in merged.completed_actions:
+                merged.completed_actions.append(action)
+        return merged
--- a/api/schemas/organization_preferences.py
+++ b/api/schemas/organization_preferences.py
@ -0,0 +1,6 @@
+from pydantic import BaseModel
+
+
+class OrganizationPreferences(BaseModel):
+    test_phone_number: str | None = None
+    timezone: str | None = None
--- a/api/schemas/tool.py
+++ b/api/schemas/tool.py
@ -0,0 +1,447 @@
+"""Pydantic schemas for reusable Dograh tools.
+
+These models are the single contract for tool creation/update across the
+REST API, generated SDKs, and the MCP authoring surface. Field descriptions
+are human/API-facing; ``llm_hint`` JSON schema extras are guidance for LLMs
+when the same schema is surfaced through MCP or SDK authoring flows.
+"""
+
+from __future__ import annotations
+
+import re
+from datetime import datetime
+from typing import Annotated, Any, Dict, List, Literal, Optional, Union
+
+from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator
+
+from api.enums import ToolCategory
+
+DEFAULT_MCP_TIMEOUT_SECS = 30
+DEFAULT_MCP_SSE_READ_TIMEOUT_SECS = 300
+
+ToolParameterType = Literal["string", "number", "boolean", "object", "array"]
+HttpMethod = Literal["GET", "POST", "PUT", "PATCH", "DELETE"]
+ToolCategoryValue = Literal[
+    "http_api",
+    "end_call",
+    "transfer_call",
+    "calculator",
+    "native",
+    "integration",
+    "mcp",
+]
+
+
+def _llm_hint(text: str) -> dict[str, str]:
+    return {"llm_hint": text}
+
+
+class ToolParameter(BaseModel):
+    """A parameter that the tool accepts from the model at call time."""
+
+    name: str = Field(
+        description="Parameter name used as a key in the tool request body.",
+        json_schema_extra=_llm_hint(
+            "Use a stable snake_case name the agent can naturally fill."
+        ),
+    )
+    type: ToolParameterType = Field(
+        description="JSON type for the parameter value.",
+        json_schema_extra=_llm_hint(
+            "Allowed values are string, number, boolean, object, and array."
+        ),
+    )
+    description: str = Field(
+        description="Description shown to the model for this parameter.",
+        json_schema_extra=_llm_hint(
+            "Write this as an instruction to the agent: what value to provide and when."
+        ),
+    )
+    required: bool = Field(
+        default=True,
+        description="Whether this parameter is required when the tool is called.",
+    )
+
+
+class PresetToolParameter(BaseModel):
+    """A parameter injected by Dograh at runtime."""
+
+    name: str = Field(description="Parameter name used as a key in the request body.")
+    type: ToolParameterType = Field(
+        description="JSON type for the resolved value.",
+        json_schema_extra=_llm_hint(
+            "Allowed values are string, number, boolean, object, and array."
+        ),
+    )
+    value_template: str = Field(
+        description="Fixed value or template, e.g. {{initial_context.phone_number}}.",
+        json_schema_extra=_llm_hint(
+            "Use {{initial_context.*}} for call-start context and "
+            "{{gathered_context.*}} for values extracted during the call."
+        ),
+    )
+    required: bool = Field(
+        default=True,
+        description="Whether the parameter must resolve to a non-empty value.",
+    )
+
+
+class HttpApiConfig(BaseModel):
+    """Configuration for HTTP API tools."""
+
+    method: HttpMethod = Field(
+        description="HTTP method to use for the request.",
+        json_schema_extra=_llm_hint("Use one of GET, POST, PUT, PATCH, DELETE."),
+    )
+    url: str = Field(
+        description="Target HTTP or HTTPS URL.",
+        json_schema_extra=_llm_hint(
+            "Use the final endpoint URL. Authentication belongs in credential_uuid, "
+            "not embedded in the URL."
+        ),
+    )
+    headers: Optional[Dict[str, str]] = Field(
+        default=None,
+        description="Static headers to include with every request.",
+        json_schema_extra=_llm_hint(
+            "Do not place secrets here. Store secrets in the UI credential manager "
+            "and reference them with credential_uuid."
+        ),
+    )
+    credential_uuid: Optional[str] = Field(
+        default=None,
+        description="Reference to an external credential for request authentication.",
+        json_schema_extra=_llm_hint(
+            "Use a credential_uuid returned by list_credentials. The MCP flow does "
+            "not create credential secrets."
+        ),
+    )
+    parameters: Optional[List[ToolParameter]] = Field(
+        default=None,
+        description="Parameters the model must provide when calling this tool.",
+    )
+    preset_parameters: Optional[List[PresetToolParameter]] = Field(
+        default=None,
+        description=(
+            "Parameters injected by Dograh from fixed values or workflow context "
+            "templates."
+        ),
+    )
+    timeout_ms: Optional[int] = Field(
+        default=5000,
+        ge=1,
+        description="Request timeout in milliseconds.",
+    )
+    customMessage: Optional[str] = Field(
+        default=None, description="Custom message to play after tool execution."
+    )
+    customMessageType: Optional[Literal["text", "audio"]] = Field(
+        default=None, description="Type of custom message."
+    )
+    customMessageRecordingId: Optional[str] = Field(
+        default=None, description="Recording ID for an audio custom message."
+    )
+
+    @field_validator("method", mode="before")
+    @classmethod
+    def validate_method(cls, v: Any) -> str:
+        if not isinstance(v, str):
+            raise ValueError("method must be one of GET, POST, PUT, PATCH, DELETE")
+        method = v.upper()
+        if method not in {"GET", "POST", "PUT", "PATCH", "DELETE"}:
+            raise ValueError("method must be one of GET, POST, PUT, PATCH, DELETE")
+        return method
+
+
+class EndCallConfig(BaseModel):
+    """Configuration for End Call tools."""
+
+    messageType: Literal["none", "custom", "audio"] = Field(
+        default="none", description="Type of goodbye message."
+    )
+    customMessage: Optional[str] = Field(
+        default=None, description="Custom message to play before ending the call."
+    )
+    audioRecordingId: Optional[str] = Field(
+        default=None, description="Recording ID for audio goodbye message."
+    )
+    endCallReason: bool = Field(
+        default=False,
+        description=(
+            "When enabled, the model must provide a reason for ending the call. "
+            "The reason is set as call disposition and added to call tags."
+        ),
+    )
+    endCallReasonDescription: Optional[str] = Field(
+        default=None,
+        description=(
+            "Description shown to the model for the reason parameter. Used only "
+            "when endCallReason is enabled."
+        ),
+    )
+
+
+class TransferCallConfig(BaseModel):
+    """Configuration for Transfer Call tools."""
+
+    destination: str = Field(
+        description=(
+            "Phone number or SIP endpoint to transfer the call to, e.g. "
+            "+1234567890 or PJSIP/1234."
+        )
+    )
+    messageType: Literal["none", "custom", "audio"] = Field(
+        default="none", description="Type of message to play before transfer."
+    )
+    customMessage: Optional[str] = Field(
+        default=None, description="Custom message to play before transferring."
+    )
+    audioRecordingId: Optional[str] = Field(
+        default=None, description="Recording ID for audio message before transfer."
+    )
+    timeout: int = Field(
+        default=30,
+        ge=5,
+        le=120,
+        description="Maximum seconds to wait for the destination to answer.",
+    )
+
+    @field_validator("destination")
+    @classmethod
+    def validate_destination(cls, v: str) -> str:
+        """Validate that destination is a valid E.164 phone number or SIP endpoint."""
+        if not v.strip():
+            return v
+
+        e164_pattern = r"^\+[1-9]\d{1,14}$"
+        sip_pattern = r"^(PJSIP|SIP)/[\w\-\.@]+$"
+
+        is_valid_e164 = re.match(e164_pattern, v)
+        is_valid_sip = re.match(sip_pattern, v, re.IGNORECASE)
+
+        if not (is_valid_e164 or is_valid_sip):
+            raise ValueError(
+                "Destination must be a valid E.164 phone number "
+                "(e.g., +1234567890) or SIP endpoint (e.g., PJSIP/1234)"
+            )
+        return v
+
+
+class McpToolConfig(BaseModel):
+    """Configuration for a customer MCP server tool definition."""
+
+    transport: Literal["streamable_http"] = Field(
+        default="streamable_http",
+        description="MCP transport protocol.",
+    )
+    url: str = Field(
+        description="MCP server URL. Must use http:// or https://.",
+        json_schema_extra=_llm_hint("Use the server's streamable HTTP MCP endpoint."),
+    )
+    credential_uuid: Optional[str] = Field(
+        default=None,
+        description="Reference to an external credential for MCP server auth.",
+        json_schema_extra=_llm_hint(
+            "Use a credential_uuid returned by list_credentials. Credentials are "
+            "created by the user in the UI."
+        ),
+    )
+    tools_filter: list[str] = Field(
+        default_factory=list,
+        description="Allowlist of MCP tool names to expose. Empty exposes all tools.",
+        json_schema_extra=_llm_hint(
+            "Use exact MCP tool names from the remote server catalog when you need "
+            "to restrict the exposed tools."
+        ),
+    )
+    timeout_secs: int = Field(
+        default=DEFAULT_MCP_TIMEOUT_SECS,
+        ge=0,
+        description="Connection timeout in seconds.",
+    )
+    sse_read_timeout_secs: int = Field(
+        default=DEFAULT_MCP_SSE_READ_TIMEOUT_SECS,
+        ge=0,
+        description="SSE read timeout in seconds.",
+    )
+    discovered_tools: list[dict[str, Any]] = Field(
+        default_factory=list,
+        description=(
+            "Server-managed cache of the MCP server's tool catalog "
+            "[{name, description}]. Populated best-effort by the backend."
+        ),
+        json_schema_extra=_llm_hint("Do not author this field; the server fills it."),
+    )
+
+    @field_validator("url")
+    @classmethod
+    def validate_url(cls, v: str) -> str:
+        if not isinstance(v, str) or not v.startswith(("http://", "https://")):
+            raise ValueError("config.url must be an http(s) URL")
+        return v
+
+    @field_validator("tools_filter")
+    @classmethod
+    def validate_tools_filter(cls, v: list[str]) -> list[str]:
+        if not all(isinstance(tool_name, str) for tool_name in v):
+            raise ValueError("config.tools_filter must be a list of strings")
+        return v
+
+
+class HttpApiToolDefinition(BaseModel):
+    """Tool definition for HTTP API tools."""
+
+    schema_version: int = Field(default=1, description="Schema version.")
+    type: Literal["http_api"] = Field(description="Tool type.")
+    config: HttpApiConfig = Field(description="HTTP API configuration.")
+
+
+class EndCallToolDefinition(BaseModel):
+    """Tool definition for End Call tools."""
+
+    schema_version: int = Field(default=1, description="Schema version.")
+    type: Literal["end_call"] = Field(description="Tool type.")
+    config: EndCallConfig = Field(description="End Call configuration.")
+
+
+class TransferCallToolDefinition(BaseModel):
+    """Tool definition for Transfer Call tools."""
+
+    schema_version: int = Field(default=1, description="Schema version.")
+    type: Literal["transfer_call"] = Field(description="Tool type.")
+    config: TransferCallConfig = Field(description="Transfer Call configuration.")
+
+
+class CalculatorToolDefinition(BaseModel):
+    """Tool definition for Calculator tools."""
+
+    schema_version: int = Field(default=1, description="Schema version.")
+    type: Literal["calculator"] = Field(description="Tool type.")
+
+
+class McpToolDefinition(BaseModel):
+    """Persisted MCP tool definition."""
+
+    schema_version: int = Field(default=1, description="Schema version.")
+    type: Literal["mcp"] = Field(description="Tool type.")
+    config: McpToolConfig = Field(description="MCP server configuration.")
+
+
+ToolDefinition = Annotated[
+    Union[
+        HttpApiToolDefinition,
+        EndCallToolDefinition,
+        TransferCallToolDefinition,
+        CalculatorToolDefinition,
+        McpToolDefinition,
+    ],
+    Field(discriminator="type"),
+]
+
+
+class CreateToolRequest(BaseModel):
+    """Request schema for creating a reusable tool."""
+
+    name: str = Field(
+        max_length=255,
+        description="Display name for the tool.",
+        json_schema_extra=_llm_hint(
+            "Use a concise action-oriented name; this influences the function "
+            "name shown to the agent."
+        ),
+    )
+    description: Optional[str] = Field(
+        default=None,
+        description="Description shown to the agent when deciding whether to call it.",
+        json_schema_extra=_llm_hint(
+            "State exactly when the agent should call the tool and what result it gets."
+        ),
+    )
+    category: ToolCategoryValue = Field(
+        default=ToolCategory.HTTP_API.value,
+        description="Tool category. Must match definition.type.",
+    )
+    icon: Optional[str] = Field(
+        default="globe", max_length=50, description="Lucide icon identifier."
+    )
+    icon_color: Optional[str] = Field(
+        default="#3B82F6", max_length=7, description="Hex color for the tool icon."
+    )
+    definition: ToolDefinition = Field(description="Typed tool definition.")
+
+    @model_validator(mode="before")
+    @classmethod
+    def default_category_from_definition(cls, data: Any) -> Any:
+        if not isinstance(data, dict):
+            return data
+        if data.get("category"):
+            return data
+        definition = data.get("definition")
+        if isinstance(definition, dict) and definition.get("type"):
+            return {**data, "category": definition["type"]}
+        return data
+
+    @field_validator("category")
+    @classmethod
+    def validate_category(cls, v: str) -> str:
+        valid_categories = [c.value for c in ToolCategory]
+        if v not in valid_categories:
+            raise ValueError(
+                f"Invalid category '{v}'. Must be one of: {', '.join(valid_categories)}"
+            )
+        return v
+
+    @model_validator(mode="after")
+    def validate_category_matches_definition(self) -> "CreateToolRequest":
+        definition_type = self.definition.type
+        if self.category != definition_type:
+            raise ValueError(
+                f"category '{self.category}' must match definition.type "
+                f"'{definition_type}'"
+            )
+        return self
+
+
+class UpdateToolRequest(BaseModel):
+    """Request schema for updating a reusable tool."""
+
+    name: Optional[str] = Field(default=None, max_length=255)
+    description: Optional[str] = None
+    icon: Optional[str] = Field(default=None, max_length=50)
+    icon_color: Optional[str] = Field(default=None, max_length=7)
+    definition: Optional[ToolDefinition] = None
+    status: Optional[str] = None
+
+
+class CreatedByResponse(BaseModel):
+    """Response schema for the user who created a tool."""
+
+    id: int
+    provider_id: str
+
+
+class ToolResponse(BaseModel):
+    """Response schema for a reusable tool."""
+
+    id: int
+    tool_uuid: str
+    name: str
+    description: Optional[str]
+    category: str
+    icon: Optional[str]
+    icon_color: Optional[str]
+    status: str
+    definition: Dict[str, Any]
+    created_at: datetime
+    updated_at: Optional[datetime]
+    created_by: Optional[CreatedByResponse] = None
+
+    model_config = ConfigDict(from_attributes=True)
+
+
+class McpRefreshResponse(BaseModel):
+    """Result of re-discovering an MCP server's tool catalog."""
+
+    tool_uuid: str
+    discovered_tools: list = Field(default_factory=list)
+    error: Optional[str] = None
--- a/api/schemas/user_configuration.py
+++ b/api/schemas/user_configuration.py
@ -1,33 +0,0 @@
-from datetime import datetime
-
-from pydantic import BaseModel, model_validator
-
-from api.services.configuration.registry import (
-    EmbeddingsConfig,
-    LLMConfig,
-    RealtimeConfig,
-    STTConfig,
-    TTSConfig,
-)
-
-
-class UserConfiguration(BaseModel):
-    llm: LLMConfig | None = None
-    stt: STTConfig | None = None
-    tts: TTSConfig | None = None
-    embeddings: EmbeddingsConfig | None = None
-    realtime: RealtimeConfig | None = None
-    is_realtime: bool = False
-    test_phone_number: str | None = None
-    timezone: str | None = None
-    last_validated_at: datetime | None = None
-
-    @model_validator(mode="before")
-    @classmethod
-    def strip_incomplete_realtime_when_disabled(cls, data):
-        """Skip realtime validation when is_realtime is False and api_key is missing."""
-        if isinstance(data, dict) and not data.get("is_realtime", False):
-            realtime = data.get("realtime")
-            if isinstance(realtime, dict) and not realtime.get("api_key"):
-                data.pop("realtime", None)
-        return data
--- a/api/schemas/workflow.py
+++ b/api/schemas/workflow.py
@ -15,7 +15,15 @@ class WorkflowRunResponseSchema(BaseModel):
    is_completed: bool
    transcript_url: str | None
    recording_url: str | None
+    user_recording_url: str | None = None
+    bot_recording_url: str | None = None
+    transcript_public_url: str | None = None
+    recording_public_url: str | None = None
+    user_recording_public_url: str | None = None
+    bot_recording_public_url: str | None = None
+    public_access_token: str | None = None
    cost_info: Dict[str, Any] | None
+    usage_info: Dict[str, Any] | None = None
    definition_id: int | None  # This is for backward compatibility
    initial_context: dict | None = None
    gathered_context: dict | None = None
--- a/api/services/auth/depends.py
+++ b/api/services/auth/depends.py
@ -1,7 +1,7 @@
 from typing import Annotated, Optional

 import httpx
-from fastapi import Header, HTTPException, Query, WebSocket
+from fastapi import Depends, Header, HTTPException, Query, WebSocket
 from loguru import logger
 from pydantic import ValidationError

@ -9,12 +9,20 @@ from api.constants import AUTH_PROVIDER, DOGRAH_MPS_SECRET_KEY, MPS_API_URL
 from api.db import db_client
 from api.db.models import UserModel
 from api.enums import PostHogEvent
-from api.schemas.user_configuration import UserConfiguration
+from api.schemas.ai_model_configuration import EffectiveAIModelConfiguration
 from api.services.auth.stack_auth import stackauth
 from api.services.configuration.registry import ServiceProviders
-from api.services.posthog_client import capture_event
+from api.services.mps_billing import ensure_hosted_mps_billing_account_v2
+from api.services.posthog_client import (
+    capture_event,
+    group_identify,
+    set_person_properties,
+)
 from api.utils.auth import decode_jwt_token

+POSTHOG_ORGANIZATION_GROUP_TYPE = "organization"
+POSTHOG_ORGANIZATION_USES_MPS_BILLING_V2_PROPERTY = "uses_mps_billing_v2"
+

 async def get_user(
    authorization: Annotated[str | None, Header()] = None,
@ -93,6 +101,11 @@ async def get_user(
        ) = await db_client.get_or_create_organization_by_provider_id(
            org_provider_id=selected_team_id, user_id=user_model.id
        )
+        if org_was_created:
+            _sync_created_organization_to_posthog(
+                organization=organization,
+                stack_user=stack_user,
+            )

        # Check if user's selected organization differs from the current organization
        if user_model.selected_organization_id != organization.id:
@ -106,10 +119,30 @@ async def get_user(
            # Update the user_model object to reflect the change
            user_model.selected_organization_id = organization.id

+            _associate_user_with_posthog_organization(
+                user=user_model,
+                organization=organization,
+                stack_user=stack_user,
+                org_was_created=org_was_created,
+            )
+
            # Only create default configuration if organization was just created
            # This prevents race conditions where multiple concurrent requests
            # might try to create configurations
            if org_was_created:
+                try:
+                    await ensure_hosted_mps_billing_account_v2(
+                        organization.id,
+                        created_by=str(stack_user["id"]),
+                    )
+                except Exception:
+                    logger.warning(
+                        "Failed to initialize hosted MPS billing account for "
+                        "organization {}",
+                        organization.id,
+                        exc_info=True,
+                    )
+
                existing_cfg = await db_client.get_user_configurations(user_model.id)
                if not (existing_cfg.llm or existing_cfg.tts or existing_cfg.stt):
                    mps_config = await create_user_configuration_with_mps_key(
@ -119,6 +152,19 @@ async def get_user(
                        await db_client.update_user_configuration(
                            user_model.id, mps_config
                        )
+                        from api.enums import OrganizationConfigurationKey
+                        from api.services.configuration.ai_model_configuration import (
+                            convert_legacy_ai_model_configuration_to_v2,
+                        )
+
+                        model_config_v2 = convert_legacy_ai_model_configuration_to_v2(
+                            mps_config
+                        )
+                        await db_client.upsert_configuration(
+                            organization.id,
+                            OrganizationConfigurationKey.MODEL_CONFIGURATION_V2.value,
+                            model_config_v2.model_dump(mode="json", exclude_none=True),
+                        )

    except Exception as exc:
        raise HTTPException(
@ -129,6 +175,154 @@ async def get_user(
    return user_model


+def _sync_created_organization_to_posthog(
+    *,
+    organization,
+    stack_user: dict | None = None,
+    created_by_provider_id: str | None = None,
+    uses_mps_billing_v2: bool | None = None,
+) -> None:
+    """Create/update the PostHog organization group for a newly-created org."""
+    try:
+        organization_id = int(organization.id)
+        organization_provider_id = getattr(organization, "provider_id", None)
+        created_by = created_by_provider_id
+        if created_by is None and stack_user and stack_user.get("id"):
+            created_by = str(stack_user["id"])
+        properties = {
+            "organization_id": organization_id,
+            "organization_provider_id": organization_provider_id,
+            "auth_provider": "stack",
+        }
+        if created_by:
+            properties["created_by_provider_id"] = created_by
+        if uses_mps_billing_v2 is not None:
+            properties[POSTHOG_ORGANIZATION_USES_MPS_BILLING_V2_PROPERTY] = (
+                uses_mps_billing_v2
+            )
+
+        group_identify(
+            POSTHOG_ORGANIZATION_GROUP_TYPE,
+            str(organization_id),
+            properties,
+            distinct_id=created_by,
+        )
+        if created_by:
+            capture_event(
+                distinct_id=created_by,
+                event=PostHogEvent.ORGANIZATION_CREATED,
+                properties=properties,
+                groups={POSTHOG_ORGANIZATION_GROUP_TYPE: str(organization_id)},
+            )
+    except Exception:
+        logger.exception("Failed to sync created organization to PostHog")
+
+
+def _sync_posthog_organization_group_properties(
+    *,
+    organization,
+    uses_mps_billing_v2: bool | None = None,
+) -> None:
+    """Update PostHog organization group properties without creating a person."""
+    try:
+        organization_id = int(organization.id)
+        properties = {
+            "organization_id": organization_id,
+            "organization_provider_id": getattr(organization, "provider_id", None),
+            "auth_provider": "stack",
+        }
+        if uses_mps_billing_v2 is not None:
+            properties[POSTHOG_ORGANIZATION_USES_MPS_BILLING_V2_PROPERTY] = (
+                uses_mps_billing_v2
+            )
+
+        group_identify(
+            POSTHOG_ORGANIZATION_GROUP_TYPE,
+            str(organization_id),
+            properties,
+        )
+    except Exception:
+        logger.exception("Failed to sync organization group properties to PostHog")
+
+
+def _sync_posthog_organization_mps_billing_v2_status(
+    organization_id: int,
+    *,
+    uses_mps_billing_v2: bool,
+) -> None:
+    """Update the PostHog organization group with current MPS billing status."""
+    try:
+        organization_id = int(organization_id)
+        group_identify(
+            POSTHOG_ORGANIZATION_GROUP_TYPE,
+            str(organization_id),
+            {POSTHOG_ORGANIZATION_USES_MPS_BILLING_V2_PROPERTY: uses_mps_billing_v2},
+        )
+    except Exception:
+        logger.exception("Failed to sync organization billing status to PostHog")
+
+
+def _associate_user_with_posthog_organization(
+    *,
+    user: UserModel,
+    organization,
+    stack_user: dict | None = None,
+    user_distinct_id: str | None = None,
+    org_was_created: bool,
+    organization_ids: list[int] | None = None,
+    selected_organization_id: int | None = None,
+    selected_organization_provider_id: str | None = None,
+) -> None:
+    """Attach the Stack user to the PostHog organization group."""
+    try:
+        organization_id = int(organization.id)
+        organization_provider_id = getattr(organization, "provider_id", None)
+        if user_distinct_id is None:
+            if stack_user and stack_user.get("id"):
+                user_distinct_id = str(stack_user["id"])
+            else:
+                user_distinct_id = str(user.provider_id)
+        selected_org_id = selected_organization_id or organization_id
+        selected_org_provider_id = (
+            selected_organization_provider_id or organization_provider_id
+        )
+        person_properties = {
+            "user_id": user.id,
+            "user_provider_id": user_distinct_id,
+            "selected_organization_id": selected_org_id,
+            "selected_organization_provider_id": selected_org_provider_id,
+        }
+        if organization_ids is not None:
+            person_properties["organization_ids"] = organization_ids
+        if user.email:
+            person_properties["email"] = user.email
+        set_person_properties(user_distinct_id, person_properties)
+        event_properties = {
+            "user_id": user.id,
+            "organization_id": organization_id,
+            "organization_provider_id": organization_provider_id,
+            "auth_provider": "stack",
+            "organization_was_created": org_was_created,
+        }
+
+        capture_event(
+            distinct_id=user_distinct_id,
+            event=PostHogEvent.ORGANIZATION_USER_ASSOCIATED,
+            properties=event_properties,
+            groups={POSTHOG_ORGANIZATION_GROUP_TYPE: str(organization_id)},
+        )
+    except Exception:
+        logger.exception("Failed to associate user with PostHog organization")
+
+
+async def get_user_with_selected_organization(
+    user: Annotated[UserModel, Depends(get_user)],
+) -> UserModel:
+    if not user.selected_organization_id:
+        raise HTTPException(status_code=400, detail="No organization selected")
+    return user
+
+
 async def _handle_oss_auth(authorization: str | None) -> UserModel:
    """
    Handle authentication for OSS deployment mode.
@ -192,7 +386,7 @@ async def _handle_api_key_auth(api_key: str) -> UserModel:

 async def create_user_configuration_with_mps_key(
    user_id: int, organization_id: int, user_provider_id: str
-) -> Optional[UserConfiguration]:
+) -> Optional[EffectiveAIModelConfiguration]:
    """Create user configuration using MPS service key.

    Args:
@ -201,7 +395,7 @@ async def create_user_configuration_with_mps_key(
        user_provider_id: The user's provider ID (for created_by field)

    Returns:
-        UserConfiguration with MPS-provided API keys or None if failed
+        EffectiveAIModelConfiguration with MPS-provided API keys or None if failed
    """

    async with httpx.AsyncClient() as client:
@ -211,7 +405,7 @@ async def create_user_configuration_with_mps_key(
            response = await client.post(
                f"{MPS_API_URL}/api/v1/service-keys/",
                json={
-                    "name": f"Default Dograh Model Service Key",
+                    "name": "Default Dograh Model Service Key",
                    "description": "Auto-generated key for OSS user",
                    "expires_in_days": 7,  # Short-lived for OSS
                    "created_by": user_provider_id,
@ -229,7 +423,7 @@ async def create_user_configuration_with_mps_key(
            response = await client.post(
                f"{MPS_API_URL}/api/v1/service-keys/",
                json={
-                    "name": f"Default Dograh Model Service Key",
+                    "name": "Default Dograh Model Service Key",
                    "description": f"Auto-generated key for organization {organization_id}",
                    "organization_id": organization_id,
                    "expires_in_days": 90,  # Longer-lived for authenticated users
@ -263,9 +457,14 @@ async def create_user_configuration_with_mps_key(
                        "api_key": [service_key],
                        "model": "default",
                    },
+                    "embeddings": {
+                        "provider": ServiceProviders.DOGRAH.value,
+                        "api_key": [service_key],
+                        "model": "dograh_embedding_v1",
+                    },
                }
-                user_config = UserConfiguration(**configuration)
-                return user_config
+                effective_config = EffectiveAIModelConfiguration(**configuration)
+                return effective_config
        else:
            logger.warning(
                f"Failed to get MPS service key: {response.status_code} - {response.text}"
--- a/api/services/campaign/campaign_call_dispatcher.py
+++ b/api/services/campaign/campaign_call_dispatcher.py
@ -15,6 +15,7 @@ from api.services.campaign.errors import (
    PhoneNumberPoolExhaustedError,
 )
 from api.services.campaign.rate_limiter import rate_limiter
+from api.services.quota_service import authorize_workflow_run_start
 from api.utils.common import get_backend_endpoints

 if TYPE_CHECKING:
@ -108,6 +109,7 @@ class CampaignCallDispatcher:
            logger.warning(f"Failed to initialize from_number pool: {e}")

        processed_count = 0
+        processed_run_ids: set[int] = set()
        for i, queued_run in enumerate(queued_runs):
            try:
                # Apply rate limiting, i.e lets not initiate more than rate_limit_per_second
@ -133,28 +135,48 @@ class CampaignCallDispatcher:
                )

                processed_count += 1
+                processed_run_ids.add(queued_run.id)

                # Update campaign processed count
                await db_client.update_campaign(
                    campaign_id=campaign_id, processed_rows=campaign.processed_rows + 1
                )

-            except (ConcurrentSlotAcquisitionError, PhoneNumberPoolExhaustedError):
-                # Revert all unprocessed runs (current and remaining) back to queued
-                # so they can be picked up again when campaign is resumed
-                for unprocessed_run in queued_runs[i:]:
-                    try:
-                        await db_client.update_queued_run(
-                            queued_run_id=unprocessed_run.id,
-                            state="queued",
-                        )
-                        logger.info(
-                            f"Reverted queued run {unprocessed_run.id} back to queued state"
-                        )
-                    except Exception as revert_error:
-                        logger.error(
-                            f"Failed to revert queued run {unprocessed_run.id}: {revert_error}"
-                        )
+            except asyncio.CancelledError:
+                logger.warning(
+                    f"Campaign {campaign_id} batch cancelled; returning claimed "
+                    "queued runs that were not dispatched"
+                )
+                await self._return_unprocessed_claims(
+                    queued_runs, processed_run_ids, reason="task_cancelled"
+                )
+                raise
+
+            except PhoneNumberPoolExhaustedError as e:
+                logger.warning(
+                    f"Phone number pool exhausted for campaign {campaign_id}; "
+                    "returning claimed queued runs that were not dispatched: "
+                    f"{e}"
+                )
+                await self._return_unprocessed_claims(
+                    queued_runs,
+                    processed_run_ids,
+                    reason="phone_number_pool_exhausted",
+                )
+                # Re-raise to propagate to process_campaign_batch
+                raise
+
+            except ConcurrentSlotAcquisitionError as e:
+                logger.warning(
+                    f"Concurrent slot acquisition failed for campaign {campaign_id}; "
+                    "returning claimed queued runs that were not dispatched: "
+                    f"{e}"
+                )
+                await self._return_unprocessed_claims(
+                    queued_runs,
+                    processed_run_ids,
+                    reason="concurrent_slot_acquisition_failed",
+                )
                # Re-raise to propagate to process_campaign_batch
                raise

@ -178,6 +200,38 @@ class CampaignCallDispatcher:

        return processed_count

+    async def _return_unprocessed_claims(
+        self,
+        queued_runs: list[QueuedRunModel],
+        processed_run_ids: set[int],
+        *,
+        reason: str,
+    ) -> None:
+        queued_run_ids = [
+            queued_run.id
+            for queued_run in queued_runs
+            if queued_run.id not in processed_run_ids
+        ]
+        if not queued_run_ids:
+            return
+
+        try:
+            returned_count = (
+                await db_client.return_processing_queued_runs_without_workflow(
+                    queued_run_ids
+                )
+            )
+            logger.info(
+                f"Returned {returned_count}/{len(queued_run_ids)} claimed queued runs "
+                f"back to queued state; reason={reason}; "
+                f"queued_run_ids={queued_run_ids}"
+            )
+        except Exception as revert_error:
+            logger.error(
+                f"Failed to return claimed queued runs; reason={reason}; "
+                f"queued_run_ids={queued_run_ids}; error={revert_error}"
+            )
+
    async def dispatch_call(
        self, queued_run: QueuedRunModel, campaign: any, slot_id: str
    ) -> Optional[WorkflowRunModel]:
@ -286,6 +340,41 @@ class CampaignCallDispatcher:
                },
            )

+        quota_result = await authorize_workflow_run_start(
+            workflow_id=campaign.workflow_id,
+            workflow_run_id=workflow_run.id,
+        )
+        if not quota_result.has_quota:
+            error_message = quota_result.error_message or "Quota exceeded"
+            logger.warning(
+                f"Campaign {campaign.id} quota check failed for workflow run "
+                f"{workflow_run.id}: {error_message}"
+            )
+            await db_client.update_workflow_run(
+                run_id=workflow_run.id,
+                is_completed=True,
+                state=WorkflowRunState.COMPLETED.value,
+                gathered_context={"error": error_message},
+            )
+
+            mapping = await rate_limiter.get_workflow_slot_mapping(workflow_run.id)
+            if mapping:
+                org_id, mapped_slot_id = mapping
+                await rate_limiter.release_concurrent_slot(org_id, mapped_slot_id)
+                await rate_limiter.delete_workflow_slot_mapping(workflow_run.id)
+
+            from_number_mapping = await rate_limiter.get_workflow_from_number_mapping(
+                workflow_run.id
+            )
+            if from_number_mapping:
+                fn_org_id, fn_number, fn_tcid = from_number_mapping
+                await rate_limiter.release_from_number(
+                    fn_org_id, fn_number, telephony_configuration_id=fn_tcid
+                )
+                await rate_limiter.delete_workflow_from_number_mapping(workflow_run.id)
+
+            raise ValueError(error_message)
+
        # Initiate call via telephony provider
        try:
            # Construct webhook URL with parameters
--- a/api/services/configuration/ai_model_configuration.py
+++ b/api/services/configuration/ai_model_configuration.py
@ -0,0 +1,490 @@
+from __future__ import annotations
+
+import copy
+from dataclasses import dataclass
+from typing import Literal
+
+from loguru import logger
+from pydantic import ValidationError
+from sqlalchemy import select, update
+from sqlalchemy.orm import selectinload
+
+from api.constants import MPS_API_URL
+from api.db import db_client
+from api.db.models import WorkflowDefinitionModel, WorkflowModel
+from api.enums import OrganizationConfigurationKey
+from api.schemas.ai_model_configuration import (
+    DOGRAH_DEFAULT_LANGUAGE,
+    DOGRAH_DEFAULT_VOICE,
+    DOGRAH_SPEED_MAX,
+    DOGRAH_SPEED_MIN,
+    BYOKAIModelConfiguration,
+    BYOKPipelineAIModelConfiguration,
+    BYOKRealtimeAIModelConfiguration,
+    DograhManagedAIModelConfiguration,
+    EffectiveAIModelConfiguration,
+    OrganizationAIModelConfigurationV2,
+    compile_ai_model_configuration_v2,
+)
+from api.services.configuration.masking import (
+    SERVICE_SECRET_FIELDS,
+    contains_masked_key,
+    mask_key,
+    resolve_masked_api_keys,
+)
+from api.services.configuration.registry import ServiceProviders
+from api.services.configuration.resolve import resolve_effective_config
+
+AIModelConfigurationSource = Literal["organization_v2", "legacy_user_v1", "empty"]
+WORKFLOW_MODEL_CONFIGURATION_V2_OVERRIDE_KEY = "model_configuration_v2_override"
+
+
+@dataclass
+class ResolvedAIModelConfiguration:
+    effective: EffectiveAIModelConfiguration
+    source: AIModelConfigurationSource
+    organization_configuration: OrganizationAIModelConfigurationV2 | None = None
+
+
+@dataclass
+class WorkflowAIModelConfigurationMigrationResult:
+    workflow_count: int = 0
+    definition_count: int = 0
+    workflow_ids: list[int] | None = None
+
+
+async def get_resolved_ai_model_configuration(
+    *,
+    user_id: int | None,
+    organization_id: int | None,
+) -> ResolvedAIModelConfiguration:
+    organization_configuration = await get_organization_ai_model_configuration_v2(
+        organization_id
+    )
+    if organization_configuration is not None:
+        return ResolvedAIModelConfiguration(
+            effective=compile_ai_model_configuration_v2(organization_configuration),
+            source="organization_v2",
+            organization_configuration=organization_configuration,
+        )
+
+    if user_id is None:
+        return ResolvedAIModelConfiguration(
+            effective=EffectiveAIModelConfiguration(),
+            source="empty",
+        )
+
+    legacy = await db_client.get_user_configurations(user_id)
+    return ResolvedAIModelConfiguration(
+        effective=legacy,
+        source="legacy_user_v1" if _has_model_services(legacy) else "empty",
+    )
+
+
+async def get_effective_ai_model_configuration_for_workflow(
+    *,
+    user_id: int | None,
+    organization_id: int | None,
+    workflow_configurations: dict | None,
+) -> EffectiveAIModelConfiguration:
+    workflow_configurations = workflow_configurations or {}
+    v2_override = workflow_configurations.get(
+        WORKFLOW_MODEL_CONFIGURATION_V2_OVERRIDE_KEY
+    )
+    if v2_override:
+        return compile_ai_model_configuration_v2(
+            OrganizationAIModelConfigurationV2.model_validate(v2_override)
+        )
+
+    resolved_config = await get_resolved_ai_model_configuration(
+        user_id=user_id,
+        organization_id=organization_id,
+    )
+    return resolve_effective_config(
+        resolved_config.effective,
+        workflow_configurations.get("model_overrides"),
+    )
+
+
+async def get_organization_ai_model_configuration_v2(
+    organization_id: int | None,
+) -> OrganizationAIModelConfigurationV2 | None:
+    if organization_id is None:
+        return None
+    row = await db_client.get_configuration(
+        organization_id,
+        OrganizationConfigurationKey.MODEL_CONFIGURATION_V2.value,
+    )
+    if row is None or not row.value:
+        return None
+    try:
+        return OrganizationAIModelConfigurationV2.model_validate(row.value)
+    except ValidationError as exc:
+        logger.warning(
+            "Invalid org AI model configuration v2 for organization "
+            f"{organization_id}: {exc}. Falling back to legacy configuration."
+        )
+        return None
+
+
+async def upsert_organization_ai_model_configuration_v2(
+    organization_id: int,
+    configuration: OrganizationAIModelConfigurationV2,
+) -> OrganizationAIModelConfigurationV2:
+    await db_client.upsert_configuration(
+        organization_id,
+        OrganizationConfigurationKey.MODEL_CONFIGURATION_V2.value,
+        configuration.model_dump(mode="json", exclude_none=True),
+    )
+    return configuration
+
+
+async def migrate_workflow_model_configurations_to_v2(
+    *,
+    organization_id: int,
+    fallback_user_config: EffectiveAIModelConfiguration,
+) -> WorkflowAIModelConfigurationMigrationResult:
+    workflows = await _list_workflows_for_model_configuration_migration(organization_id)
+    owner_configs: dict[int, EffectiveAIModelConfiguration] = {}
+    workflow_updates: list[tuple[int, dict]] = []
+    definition_updates: list[tuple[int, dict]] = []
+    migrated_workflow_ids: set[int] = set()
+
+    for workflow in workflows:
+        base_config = fallback_user_config
+        if workflow.user_id is not None:
+            if workflow.user_id not in owner_configs:
+                owner_configs[
+                    workflow.user_id
+                ] = await db_client.get_user_configurations(workflow.user_id)
+            base_config = owner_configs[workflow.user_id]
+
+        workflow_configs, workflow_changed = (
+            migrate_workflow_configuration_model_override_to_v2(
+                workflow.workflow_configurations,
+                base_config,
+            )
+        )
+        if workflow_changed:
+            workflow_updates.append((workflow.id, workflow_configs))
+            migrated_workflow_ids.add(workflow.id)
+
+        for definition in workflow.definitions:
+            definition_configs, definition_changed = (
+                migrate_workflow_configuration_model_override_to_v2(
+                    definition.workflow_configurations,
+                    base_config,
+                )
+            )
+            if definition_changed:
+                definition_updates.append((definition.id, definition_configs))
+                migrated_workflow_ids.add(workflow.id)
+
+    if workflow_updates or definition_updates:
+        async with db_client.async_session() as session:
+            for workflow_id, workflow_configs in workflow_updates:
+                await session.execute(
+                    update(WorkflowModel)
+                    .where(WorkflowModel.id == workflow_id)
+                    .values(workflow_configurations=workflow_configs)
+                )
+            for definition_id, definition_configs in definition_updates:
+                await session.execute(
+                    update(WorkflowDefinitionModel)
+                    .where(WorkflowDefinitionModel.id == definition_id)
+                    .values(workflow_configurations=definition_configs)
+                )
+            await session.commit()
+
+    return WorkflowAIModelConfigurationMigrationResult(
+        workflow_count=len(migrated_workflow_ids),
+        definition_count=len(definition_updates),
+        workflow_ids=sorted(migrated_workflow_ids),
+    )
+
+
+def migrate_workflow_configuration_model_override_to_v2(
+    workflow_configurations: dict | None,
+    base_config: EffectiveAIModelConfiguration,
+) -> tuple[dict, bool]:
+    if not isinstance(workflow_configurations, dict):
+        return {}, False
+
+    migrated = copy.deepcopy(workflow_configurations)
+    model_overrides = migrated.get("model_overrides")
+    existing_v2_override = migrated.get(WORKFLOW_MODEL_CONFIGURATION_V2_OVERRIDE_KEY)
+    if not isinstance(model_overrides, dict):
+        if "model_overrides" in migrated:
+            migrated.pop("model_overrides", None)
+            return migrated, True
+        return migrated, False
+
+    if not existing_v2_override:
+        effective = resolve_effective_config(base_config, model_overrides)
+        v2_override = convert_legacy_ai_model_configuration_to_v2(effective)
+        migrated[WORKFLOW_MODEL_CONFIGURATION_V2_OVERRIDE_KEY] = v2_override.model_dump(
+            mode="json", exclude_none=True
+        )
+    migrated.pop("model_overrides", None)
+    return migrated, True
+
+
+def merge_ai_model_configuration_v2_secrets(
+    incoming: OrganizationAIModelConfigurationV2,
+    existing: OrganizationAIModelConfigurationV2 | None,
+) -> OrganizationAIModelConfigurationV2:
+    if existing is None:
+        return incoming
+
+    incoming_dict = incoming.model_dump(mode="json", exclude_none=True)
+    existing_dict = existing.model_dump(mode="json", exclude_none=True)
+
+    if incoming_dict.get("mode") == "dograh" and existing_dict.get("mode") == "dograh":
+        incoming_dograh = incoming_dict.get("dograh") or {}
+        existing_dograh = existing_dict.get("dograh") or {}
+        incoming_key = incoming_dograh.get("api_key")
+        existing_key = existing_dograh.get("api_key")
+        if incoming_key and existing_key and contains_masked_key(incoming_key):
+            incoming_dograh["api_key"] = resolve_masked_api_keys(
+                incoming_key,
+                existing_key,
+            )
+
+    if incoming_dict.get("mode") == "byok" and existing_dict.get("mode") == "byok":
+        _merge_byok_secret_fields(incoming_dict.get("byok"), existing_dict.get("byok"))
+
+    return OrganizationAIModelConfigurationV2.model_validate(incoming_dict)
+
+
+def check_for_masked_keys_in_ai_model_configuration_v2(
+    configuration: OrganizationAIModelConfigurationV2,
+) -> None:
+    data = configuration.model_dump(mode="json", exclude_none=True)
+    _raise_if_masked_secret(data)
+
+
+def mask_ai_model_configuration_v2(
+    configuration: OrganizationAIModelConfigurationV2 | None,
+) -> dict | None:
+    if configuration is None:
+        return None
+    data = configuration.model_dump(mode="json", exclude_none=True)
+    _mask_secret_fields(data)
+    return data
+
+
+def convert_legacy_ai_model_configuration_to_v2(
+    configuration: EffectiveAIModelConfiguration,
+) -> OrganizationAIModelConfigurationV2:
+    dograh_key = _first_dograh_api_key(configuration)
+    if dograh_key:
+        return _convert_any_dograh_legacy_configuration(configuration, dograh_key)
+
+    if configuration.is_realtime:
+        if configuration.realtime is None or configuration.llm is None:
+            raise ValueError("Realtime legacy configuration is incomplete")
+        return OrganizationAIModelConfigurationV2(
+            mode="byok",
+            byok=BYOKAIModelConfiguration(
+                mode="realtime",
+                realtime=BYOKRealtimeAIModelConfiguration(
+                    realtime=configuration.realtime,
+                    llm=configuration.llm,
+                    embeddings=configuration.embeddings,
+                ),
+            ),
+        )
+
+    if (
+        configuration.llm is None
+        or configuration.tts is None
+        or configuration.stt is None
+    ):
+        raise ValueError("Pipeline legacy configuration is incomplete")
+    return OrganizationAIModelConfigurationV2(
+        mode="byok",
+        byok=BYOKAIModelConfiguration(
+            mode="pipeline",
+            pipeline=BYOKPipelineAIModelConfiguration(
+                llm=configuration.llm,
+                tts=configuration.tts,
+                stt=configuration.stt,
+                embeddings=configuration.embeddings,
+            ),
+        ),
+    )
+
+
+def dograh_embeddings_base_url() -> str:
+    # AsyncOpenAI appends "/embeddings"; MPS exposes that under /api/v1/llm.
+    return f"{MPS_API_URL}/api/v1/llm"
+
+
+def apply_managed_embeddings_base_url(
+    *,
+    provider: str | None,
+    base_url: str | None,
+) -> str | None:
+    if provider == ServiceProviders.DOGRAH.value or provider == ServiceProviders.DOGRAH:
+        return dograh_embeddings_base_url()
+    return base_url
+
+
+def _merge_byok_secret_fields(incoming_byok: dict | None, existing_byok: dict | None):
+    if not isinstance(incoming_byok, dict) or not isinstance(existing_byok, dict):
+        return
+    incoming_mode = incoming_byok.get("mode")
+    existing_mode = existing_byok.get("mode")
+    if incoming_mode != existing_mode:
+        return
+    section_names = (
+        ("llm", "tts", "stt", "embeddings")
+        if incoming_mode == "pipeline"
+        else ("realtime", "llm", "embeddings")
+    )
+    incoming_container = incoming_byok.get(incoming_mode)
+    existing_container = existing_byok.get(existing_mode)
+    if not isinstance(incoming_container, dict) or not isinstance(
+        existing_container, dict
+    ):
+        return
+    for section_name in section_names:
+        incoming_section = incoming_container.get(section_name)
+        existing_section = existing_container.get(section_name)
+        if isinstance(incoming_section, dict) and isinstance(existing_section, dict):
+            _merge_service_secret_fields(incoming_section, existing_section)
+
+
+async def _list_workflows_for_model_configuration_migration(
+    organization_id: int,
+) -> list[WorkflowModel]:
+    async with db_client.async_session() as session:
+        result = await session.execute(
+            select(WorkflowModel)
+            .options(selectinload(WorkflowModel.definitions))
+            .where(WorkflowModel.organization_id == organization_id)
+        )
+        return list(result.scalars().unique().all())
+
+
+def _merge_service_secret_fields(incoming: dict, existing: dict):
+    if (
+        incoming.get("provider") is not None
+        and existing.get("provider") is not None
+        and incoming.get("provider") != existing.get("provider")
+    ):
+        return
+    for secret_field in SERVICE_SECRET_FIELDS:
+        if secret_field not in existing:
+            continue
+        incoming_secret = incoming.get(secret_field)
+        existing_secret = existing[secret_field]
+        if incoming_secret is None:
+            incoming[secret_field] = existing_secret
+        elif contains_masked_key(incoming_secret):
+            incoming[secret_field] = resolve_masked_api_keys(
+                incoming_secret,
+                existing_secret,
+            )
+
+
+def _raise_if_masked_secret(value):
+    if isinstance(value, dict):
+        for key, nested in value.items():
+            if key in SERVICE_SECRET_FIELDS and contains_masked_key(nested):
+                raise ValueError(
+                    f"The {key} appears to be masked. Please provide the actual "
+                    "value, not the masked value."
+                )
+            _raise_if_masked_secret(nested)
+    elif isinstance(value, list):
+        for item in value:
+            _raise_if_masked_secret(item)
+
+
+def _mask_secret_fields(value):
+    if isinstance(value, dict):
+        for key, nested in list(value.items()):
+            if key in SERVICE_SECRET_FIELDS and nested:
+                value[key] = _mask_secret_value(nested)
+            else:
+                _mask_secret_fields(nested)
+    elif isinstance(value, list):
+        for item in value:
+            _mask_secret_fields(item)
+
+
+def _mask_secret_value(value):
+    if isinstance(value, list):
+        return [mask_key(item) for item in value]
+    return mask_key(value)
+
+
+def _has_model_services(configuration: EffectiveAIModelConfiguration) -> bool:
+    return any(
+        service is not None
+        for service in (
+            configuration.llm,
+            configuration.tts,
+            configuration.stt,
+            configuration.embeddings,
+            configuration.realtime,
+        )
+    )
+
+
+def _convert_any_dograh_legacy_configuration(
+    configuration: EffectiveAIModelConfiguration,
+    dograh_key: str,
+) -> OrganizationAIModelConfigurationV2:
+    speed = getattr(configuration.tts, "speed", 1.0)
+    try:
+        speed = float(speed)
+    except (TypeError, ValueError):
+        speed = 1.0
+    if not DOGRAH_SPEED_MIN <= speed <= DOGRAH_SPEED_MAX:
+        speed = 1.0
+    return OrganizationAIModelConfigurationV2(
+        mode="dograh",
+        dograh=DograhManagedAIModelConfiguration(
+            api_key=dograh_key,
+            voice=getattr(configuration.tts, "voice", DOGRAH_DEFAULT_VOICE)
+            or DOGRAH_DEFAULT_VOICE,
+            speed=speed,
+            language=getattr(configuration.stt, "language", DOGRAH_DEFAULT_LANGUAGE)
+            or DOGRAH_DEFAULT_LANGUAGE,
+        ),
+    )
+
+
+def _first_dograh_api_key(configuration: EffectiveAIModelConfiguration) -> str | None:
+    for service in (
+        configuration.llm,
+        configuration.tts,
+        configuration.stt,
+        configuration.embeddings,
+        configuration.realtime,
+    ):
+        if service is None or _provider(service) != ServiceProviders.DOGRAH:
+            continue
+        try:
+            return _single_api_key(service)
+        except ValueError:
+            continue
+    return None
+
+
+def _provider(service):
+    return getattr(service, "provider", None)
+
+
+def _single_api_key(service) -> str:
+    if hasattr(service, "get_all_api_keys"):
+        keys = service.get_all_api_keys()
+        if len(keys) != 1:
+            raise ValueError("Expected exactly one API key")
+        return keys[0]
+    key = getattr(service, "api_key", None)
+    if not key:
+        raise ValueError("Expected an API key")
+    return key
--- a/api/services/configuration/check_validity.py
+++ b/api/services/configuration/check_validity.py
@ -1,5 +1,6 @@
 from typing import Optional, TypedDict

+import httpx
 import openai
 from deepgram import DeepgramClient
 from groq import Groq
@ -8,11 +9,12 @@ from groq import Groq
 #     from pyneuphonic import Neuphonic
 # except ImportError:
 #     Neuphonic = None
-from api.schemas.user_configuration import (
-    UserConfiguration,
+from api.schemas.ai_model_configuration import (
+    EffectiveAIModelConfiguration,
 )
 from api.services.configuration.registry import ServiceConfig, ServiceProviders
 from api.services.mps_service_key_client import mps_service_key_client
+from api.utils.url_security import validate_user_configured_service_url

 AuthContext = TypedDict(
    "AuthContext",
@ -37,9 +39,11 @@ class UserConfigurationValidator:
            ServiceProviders.DEEPGRAM.value: self._check_deepgram_api_key,
            ServiceProviders.GROQ.value: self._check_groq_api_key,
            ServiceProviders.OPENROUTER.value: self._check_openrouter_api_key,
+            ServiceProviders.INWORLD.value: self._check_inworld_api_key,
            ServiceProviders.ELEVENLABS.value: self._validate_elevenlabs_api_key,
            ServiceProviders.GOOGLE.value: self._check_google_api_key,
            ServiceProviders.AZURE.value: self._check_azure_api_key,
+            ServiceProviders.AZURE_SPEECH.value: self._check_azure_speech_api_key,
            ServiceProviders.CARTESIA.value: self._check_cartesia_api_key,
            ServiceProviders.DOGRAH.value: self._check_dograh_api_key,
            ServiceProviders.SARVAM.value: self._check_sarvam_api_key,
@ -47,21 +51,24 @@ class UserConfigurationValidator:
            ServiceProviders.CAMB.value: self._check_camb_api_key,
            ServiceProviders.AWS_BEDROCK.value: self._check_aws_bedrock_api_key,
            ServiceProviders.SPEACHES.value: self._check_speaches_api_key,
+            ServiceProviders.HUGGINGFACE.value: self._check_huggingface_api_key,
            ServiceProviders.GOOGLE_VERTEX.value: self._check_google_vertex_llm_api_key,
            ServiceProviders.OPENAI_REALTIME.value: self._check_openai_api_key,
            ServiceProviders.GROK_REALTIME.value: self._check_grok_realtime_api_key,
            ServiceProviders.ULTRAVOX_REALTIME.value: self._check_ultravox_realtime_api_key,
            ServiceProviders.GOOGLE_REALTIME.value: self._check_google_api_key,
            ServiceProviders.GOOGLE_VERTEX_REALTIME.value: self._check_google_vertex_realtime_api_key,
+            ServiceProviders.AZURE_REALTIME.value: self._check_azure_realtime_api_key,
            ServiceProviders.ASSEMBLYAI.value: self._check_assemblyai_api_key,
            ServiceProviders.GLADIA.value: self._check_gladia_api_key,
            ServiceProviders.RIME.value: self._check_rime_api_key,
            ServiceProviders.MINIMAX.value: self._check_minimax_api_key,
+            ServiceProviders.SMALLEST.value: self._check_smallest_api_key,
        }

    async def validate(
        self,
-        configuration: UserConfiguration,
+        configuration: EffectiveAIModelConfiguration,
        organization_id: Optional[int] = None,
        created_by: Optional[str] = None,
    ) -> APIKeyStatusResponse:
@ -72,21 +79,21 @@ class UserConfigurationValidator:
        status_list = []

        status_list.extend(self._validate_service(configuration.llm, "llm"))
-        status_list.extend(self._validate_service(configuration.stt, "stt"))
-        status_list.extend(self._validate_service(configuration.tts, "tts"))
-        # Embeddings is optional - only validate if configured
-        status_list.extend(
-            self._validate_service(
-                configuration.embeddings, "embeddings", required=False
-            )
-        )
-        # Realtime is optional - only validate if is_realtime is enabled
        if configuration.is_realtime:
            status_list.extend(
                self._validate_service(
                    configuration.realtime, "realtime", required=True
                )
            )
+        else:
+            status_list.extend(self._validate_service(configuration.stt, "stt"))
+            status_list.extend(self._validate_service(configuration.tts, "tts"))
+        # Embeddings is optional - only validate if configured
+        status_list.extend(
+            self._validate_service(
+                configuration.embeddings, "embeddings", required=False
+            )
+        )

        if status_list:
            raise ValueError(status_list)
@ -107,6 +114,17 @@ class UserConfigurationValidator:

        provider = service_config.provider

+        for url_field in ("base_url", "endpoint"):
+            url = getattr(service_config, url_field, None)
+            if url:
+                try:
+                    validate_user_configured_service_url(
+                        url,
+                        field_name=url_field,
+                    )
+                except ValueError as e:
+                    return [{"model": service_name, "message": str(e)}]
+
        # Speaches doesn't require an API key
        if provider == ServiceProviders.SPEACHES.value:
            try:
@ -181,30 +199,92 @@ class UserConfigurationValidator:
        api_key = service_config.api_key

        try:
-            if not self._check_api_key(provider, api_key):
+            if not self._check_api_key(provider, api_key, service_config):
                return [
-                    {"model": service_name, "message": f"Invalid {provider} API key"}
+                    {
+                        "model": service_name,
+                        "message": (
+                            f"Invalid {provider} API key. Please verify your API key is "
+                            f"correct, has not expired, and has the required permissions."
+                        ),
+                    }
                ]
        except ValueError as e:
            return [{"model": service_name, "message": str(e)}]

        return []

-    def _check_api_key(self, provider: str, api_key: str) -> bool:
+    def _check_api_key(
+        self,
+        provider: str,
+        api_key: str,
+        service_config: Optional[ServiceConfig] = None,
+    ) -> bool:
        """Check if an API key for a provider is valid."""
        validator = self._validator_map.get(provider)
        if not validator:
            return False

+        if provider in (
+            ServiceProviders.OPENAI.value,
+            ServiceProviders.OPENAI_REALTIME.value,
+        ):
+            return validator(provider, api_key, service_config)
        return validator(provider, api_key)

-    def _check_openai_api_key(self, model: str, api_key: str) -> bool:
-        client = openai.OpenAI(api_key=api_key)
+    def _check_openai_api_key(
+        self, model: str, api_key: str, service_config: Optional[ServiceConfig] = None
+    ) -> bool:
+        client_kwargs: dict[str, str] = {"api_key": api_key}
+        base_url = getattr(service_config, "base_url", None) if service_config else None
+        if base_url:
+            client_kwargs["base_url"] = base_url
+        client = openai.OpenAI(**client_kwargs)
        try:
            client.models.list()
            return True
        except openai.AuthenticationError:
-            return False
+            if base_url and "openai.com" not in base_url:
+                raise ValueError(
+                    f"Invalid OpenAI API key. The key was rejected by the API at {base_url}. "
+                    "Please check that your API key is correct and has not been revoked."
+                )
+            raise ValueError(
+                "Invalid OpenAI API key. The key was rejected by the OpenAI API. "
+                "Please check that your API key is correct and has not been revoked. "
+                "You can verify your keys at https://platform.openai.com/api-keys."
+            )
+        except openai.APIConnectionError:
+            if base_url:
+                raise ValueError(
+                    f"Could not connect to the OpenAI-compatible API at {base_url}. "
+                    "Please verify that the base_url is correct and reachable, and try again."
+                )
+            raise ValueError(
+                "Could not connect to the OpenAI API. Please check your network connection "
+                "and try again."
+            )
+        except openai.APIError:
+            if base_url:
+                raise ValueError(
+                    f"The OpenAI-compatible API at {base_url} returned an error while "
+                    "validating the API key. Please verify that the base_url is correct, "
+                    "the service is available, and the API key is valid."
+                )
+            raise ValueError(
+                "The OpenAI API returned an error while validating the API key. "
+                "Please try again later."
+            )
+        except Exception:
+            if base_url:
+                raise ValueError(
+                    f"Failed to validate the OpenAI API key using the API at {base_url}. "
+                    "Please verify that the base_url is correct and reachable, and that the "
+                    "API key is valid."
+                )
+            raise ValueError(
+                "Failed to validate the OpenAI API key. Please try again later."
+            )

    def _check_deepgram_api_key(self, model: str, api_key: str) -> bool:
        try:
@ -212,7 +292,11 @@ class UserConfigurationValidator:
            deepgram.manage.v1.projects.list()
            return True
        except Exception:
-            return False
+            raise ValueError(
+                "Invalid Deepgram API key. The key was rejected by the Deepgram API. "
+                "Please check that your API key is correct and active. "
+                "You can verify your keys at https://console.deepgram.com/."
+            )

    def _check_groq_api_key(self, model: str, api_key: str) -> bool:
        client = Groq(api_key=api_key)
@ -220,7 +304,11 @@ class UserConfigurationValidator:
            client.models.list()
            return True
        except Exception:
-            return False
+            raise ValueError(
+                "Invalid Groq API key. The key was rejected by the Groq API. "
+                "Please check that your API key is correct and active. "
+                "You can verify your keys at https://console.groq.com/keys."
+            )

    def _validate_elevenlabs_api_key(self, model: str, api_key: str) -> bool:
        return True
@ -231,6 +319,12 @@ class UserConfigurationValidator:
    def _check_azure_api_key(self, model: str, api_key: str) -> bool:
        return True

+    def _check_azure_speech_api_key(self, model: str, api_key: str) -> bool:
+        return True
+
+    def _check_azure_realtime_api_key(self, model: str, api_key: str) -> bool:
+        return True
+
    def _check_cartesia_api_key(self, model: str, api_key: str) -> bool:
        return True

@ -253,6 +347,32 @@ class UserConfigurationValidator:
    def _check_openrouter_api_key(self, model: str, api_key: str) -> bool:
        return True

+    def _check_inworld_api_key(self, model: str, api_key: str) -> bool:
+        try:
+            response = httpx.get(
+                "https://api.inworld.ai/voices/v1/voices",
+                headers={"Authorization": f"Basic {api_key}"},
+                params={"pageSize": 1},
+                timeout=10.0,
+            )
+            response.raise_for_status()
+            return True
+        except httpx.HTTPStatusError as exc:
+            if exc.response.status_code in (401, 403):
+                raise ValueError(
+                    "Invalid Inworld API key. The key was rejected by the Inworld API. "
+                    "Please verify that your API key is correct, active, and has voice read access."
+                ) from exc
+            raise ValueError(
+                "The Inworld API returned an error while validating the API key. "
+                "Please try again later."
+            ) from exc
+        except httpx.RequestError as exc:
+            raise ValueError(
+                "Could not connect to the Inworld API. Please check your network connection "
+                "and try again."
+            ) from exc
+
    def _check_grok_realtime_api_key(self, model: str, api_key: str) -> bool:
        return True

@ -270,6 +390,14 @@ class UserConfigurationValidator:
            raise ValueError("base_url is required for Speaches services")
        return True

+    def _check_huggingface_api_key(self, model: str, api_key: str) -> bool:
+        if not api_key.startswith("hf_"):
+            raise ValueError(
+                "Invalid Hugging Face API token format. Use a token that starts with "
+                "'hf_' and has Inference Providers permission."
+            )
+        return True
+
    def _check_google_vertex_realtime_api_key(self, model: str, service_config) -> bool:
        if not getattr(service_config, "project_id", None):
            raise ValueError("project_id is required for Google Vertex Realtime")
@ -299,6 +427,7 @@ class UserConfigurationValidator:
        return True

    def _check_minimax_api_key(self, model: str, api_key: str) -> bool:
-        # MiniMax doesn't publish a cheap key-validation endpoint; trust the key
-        # at save time and surface auth errors at first call (same as Rime/Sarvam).
+        return True
+
+    def _check_smallest_api_key(self, model: str, api_key: str) -> bool:
        return True
--- a/api/services/configuration/masking.py
+++ b/api/services/configuration/masking.py
@ -9,9 +9,10 @@ The rules are simple:
   in storage.
 """

+import copy
 from typing import Any, Dict, Optional

-from api.schemas.user_configuration import UserConfiguration
+from api.schemas.ai_model_configuration import EffectiveAIModelConfiguration
 from api.services.configuration.registry import ServiceConfig
 from api.services.integrations import get_node_secret_fields

@ -19,6 +20,7 @@ VISIBLE_CHARS = 4  # number of trailing characters to reveal
 MASK_CHAR = "*"
 MASK_MARKER = "***"  # substring that indicates a masked key
 SERVICE_SECRET_FIELDS = ("api_key", "credentials", "aws_access_key", "aws_secret_key")
+MODEL_OVERRIDE_FIELDS = ("llm", "tts", "stt", "realtime")


 def contains_masked_key(value: str | list[str] | None) -> bool:
@ -29,7 +31,7 @@ def contains_masked_key(value: str | list[str] | None) -> bool:
    return any(MASK_MARKER in k for k in keys)


-def check_for_masked_keys(config: "UserConfiguration") -> None:
+def check_for_masked_keys(config: "EffectiveAIModelConfiguration") -> None:
    """Raise ValueError if any service in *config* still has a masked secret."""
    for field in ("llm", "tts", "stt", "embeddings", "realtime"):
        service = getattr(config, field, None)
@ -67,6 +69,12 @@ def mask_key(real_key: str, visible: int = VISIBLE_CHARS) -> str:
    return f"{masked_part}{real_key[-visible:]}"


+def _mask_secret_value(value: str | list[str]) -> str | list[str]:
+    if isinstance(value, list):
+        return [mask_key(k) for k in value]
+    return mask_key(value)
+
+
 def is_mask_of(masked: str, real_key: str) -> bool:
    """Return *True* if *masked* equals the mask of *real_key* under the current rules."""
    return mask_key(real_key) == masked
@ -103,7 +111,7 @@ def resolve_masked_api_keys(


 # ---------------------------------------------------------------------------
-# High-level helpers for UserConfiguration objects
+# High-level helpers for EffectiveAIModelConfiguration objects
 # ---------------------------------------------------------------------------


@ -117,14 +125,11 @@ def _mask_service(service_cfg: Optional[ServiceConfig]) -> Optional[Dict[str, An
        if secret_field not in data or not data[secret_field]:
            continue
        raw = data[secret_field]
-        if isinstance(raw, list):
-            data[secret_field] = [mask_key(k) for k in raw]
-        else:
-            data[secret_field] = mask_key(raw)
+        data[secret_field] = _mask_secret_value(raw)
    return data


-def mask_user_config(config: UserConfiguration) -> Dict[str, Any]:
+def mask_user_config(config: EffectiveAIModelConfiguration) -> Dict[str, Any]:
    """Return a JSON-serialisable dict of *config* with every api_key masked."""

    return {
@ -139,6 +144,42 @@ def mask_user_config(config: UserConfiguration) -> Dict[str, Any]:
    }


+def mask_workflow_configurations(config: Optional[Dict]) -> Optional[Dict]:
+    """Mask secret fields inside workflow-level model overrides for API responses."""
+    if not config:
+        return config
+
+    masked = copy.deepcopy(config)
+    model_overrides = masked.get("model_overrides")
+    if isinstance(model_overrides, dict):
+        for section in MODEL_OVERRIDE_FIELDS:
+            override = model_overrides.get(section)
+            if not isinstance(override, dict):
+                continue
+            for secret_field in SERVICE_SECRET_FIELDS:
+                raw = override.get(secret_field)
+                if raw:
+                    override[secret_field] = _mask_secret_value(raw)
+
+    v2_override = masked.get("model_configuration_v2_override")
+    if isinstance(v2_override, dict):
+        _mask_nested_service_secrets(v2_override)
+
+    return masked
+
+
+def _mask_nested_service_secrets(value):
+    if isinstance(value, dict):
+        for key, nested in list(value.items()):
+            if key in SERVICE_SECRET_FIELDS and nested:
+                value[key] = _mask_secret_value(nested)
+            else:
+                _mask_nested_service_secrets(nested)
+    elif isinstance(value, list):
+        for item in value:
+            _mask_nested_service_secrets(item)
+
+
 # ---------------------------------------------------------------------------
 # Workflow definition helpers – mask / merge node API keys
 # ---------------------------------------------------------------------------
--- a/api/services/configuration/merge.py
+++ b/api/services/configuration/merge.py
@ -4,21 +4,71 @@ from __future__ import annotations
 stored, while honouring masked API keys.
 """

+import copy
 from typing import Dict

-from api.schemas.user_configuration import UserConfiguration
+from api.schemas.ai_model_configuration import EffectiveAIModelConfiguration
 from api.services.configuration.masking import (
+    MODEL_OVERRIDE_FIELDS,
    SERVICE_SECRET_FIELDS,
+    contains_masked_key,
    resolve_masked_api_keys,
 )

 SERVICE_FIELDS = ("llm", "tts", "stt", "embeddings", "realtime")


+def _same_provider(incoming_cfg: dict, existing_cfg: dict) -> bool:
+    return not (
+        existing_cfg.get("provider") is not None
+        and incoming_cfg.get("provider") is not None
+        and incoming_cfg.get("provider") != existing_cfg.get("provider")
+    )
+
+
+def _merge_service_secret_fields(
+    incoming_cfg: dict,
+    existing_cfg: dict,
+    *,
+    preserve_missing: bool,
+    masked_value_preserves_full_secret: bool = False,
+) -> dict:
+    """Restore existing real secrets when incoming values are masked.
+
+    If ``preserve_missing`` is true, missing incoming secret fields are also
+    copied from the existing config. User config updates need that behavior;
+    workflow model overrides leave missing secrets blank so later enrichment can
+    copy from the current global config.
+    """
+    if not _same_provider(incoming_cfg, existing_cfg):
+        return incoming_cfg
+
+    for secret_field in SERVICE_SECRET_FIELDS:
+        if secret_field not in existing_cfg:
+            continue
+
+        incoming_secret = incoming_cfg.get(secret_field)
+        existing_secret = existing_cfg[secret_field]
+        if incoming_secret is not None:
+            if contains_masked_key(incoming_secret):
+                incoming_cfg[secret_field] = (
+                    existing_secret
+                    if masked_value_preserves_full_secret
+                    else resolve_masked_api_keys(
+                        incoming_secret,
+                        existing_secret,
+                    )
+                )
+        elif preserve_missing:
+            incoming_cfg[secret_field] = existing_secret
+
+    return incoming_cfg
+
+
 def merge_user_configurations(
-    existing: UserConfiguration, incoming_partial: Dict[str, dict]
-) -> UserConfiguration:
-    """Merge *incoming_partial* onto *existing* and return a new UserConfiguration.
+    existing: EffectiveAIModelConfiguration, incoming_partial: Dict[str, dict]
+) -> EffectiveAIModelConfiguration:
+    """Merge *incoming_partial* onto *existing* and return a new EffectiveAIModelConfiguration.

    *incoming_partial* is the body of the PUT request (already `model_dump()`ed or
    extracted via Pydantic `model_dump`).
@ -41,23 +91,12 @@ def merge_user_configurations(
            return  # nothing to do

        old_cfg = merged.get(service_name, {})
-
-        provider_changed = (
-            old_cfg.get("provider") is not None
-            and incoming_cfg.get("provider") is not None
-            and incoming_cfg.get("provider") != old_cfg.get("provider")
-        )
-
-        if not provider_changed:
-            for secret_field in SERVICE_SECRET_FIELDS:
-                incoming_secret = incoming_cfg.get(secret_field)
-                if incoming_secret is not None:
-                    if old_cfg and secret_field in old_cfg:
-                        incoming_cfg[secret_field] = resolve_masked_api_keys(
-                            incoming_secret, old_cfg[secret_field]
-                        )
-                elif secret_field in old_cfg:
-                    incoming_cfg[secret_field] = old_cfg[secret_field]
+        if old_cfg:
+            incoming_cfg = _merge_service_secret_fields(
+                incoming_cfg,
+                old_cfg,
+                preserve_missing=True,
+            )

        merged[service_name] = incoming_cfg

@ -74,4 +113,47 @@ def merge_user_configurations(
    if "timezone" in incoming_partial:
        merged["timezone"] = incoming_partial["timezone"]

-    return UserConfiguration.model_validate(merged)
+    return EffectiveAIModelConfiguration.model_validate(merged)
+
+
+def merge_workflow_configuration_secrets(
+    incoming_config: dict | None,
+    existing_config: dict | None,
+) -> dict | None:
+    """Restore persisted workflow override secrets when the client sends masks.
+
+    Workflow model overrides intentionally persist real keys so a workflow keeps
+    running after the global provider changes. API responses mask those keys, so
+    save requests must merge masked placeholders back to the stored real values.
+
+    Unlike user config updates, a missing workflow override secret is not copied
+    from the existing workflow config. Missing means "copy from current global"
+    during the later enrichment step.
+    """
+    if not incoming_config or not existing_config:
+        return incoming_config
+
+    merged = copy.deepcopy(incoming_config)
+    incoming_overrides = merged.get("model_overrides")
+    existing_overrides = existing_config.get("model_overrides")
+    if not isinstance(incoming_overrides, dict) or not isinstance(
+        existing_overrides, dict
+    ):
+        return merged
+
+    for section in MODEL_OVERRIDE_FIELDS:
+        incoming_section = incoming_overrides.get(section)
+        existing_section = existing_overrides.get(section)
+        if not isinstance(incoming_section, dict) or not isinstance(
+            existing_section, dict
+        ):
+            continue
+
+        incoming_overrides[section] = _merge_service_secret_fields(
+            incoming_section,
+            existing_section,
+            preserve_missing=False,
+            masked_value_preserves_full_secret=True,
+        )
+
+    return merged
--- a/api/services/configuration/options/init.py
+++ b/api/services/configuration/options/init.py
@ -1,4 +1,27 @@
-from .deepgram import DEEPGRAM_LANGUAGES, DEEPGRAM_STT_MODELS
+from .azure import (
+    AZURE_EMBEDDING_MODELS,
+    AZURE_MODELS,
+    AZURE_REALTIME_API_VERSIONS,
+    AZURE_REALTIME_MODELS,
+    AZURE_REALTIME_VOICES,
+    AZURE_SPEECH_REGIONS,
+    AZURE_SPEECH_STT_LANGUAGES,
+    AZURE_SPEECH_TTS_LANGUAGES,
+    AZURE_SPEECH_TTS_VOICES,
+)
+from .cartesia import (
+    CARTESIA_INK_2_STT_LANGUAGES,
+    CARTESIA_INK_WHISPER_STT_LANGUAGES,
+    CARTESIA_STT_LANGUAGES,
+    CARTESIA_STT_MODELS,
+)
+from .deepgram import (
+    DEEPGRAM_FLUX_MODELS,
+    DEEPGRAM_FLUX_MULTILINGUAL_LANGUAGE_OPTIONS,
+    DEEPGRAM_FLUX_MULTILINGUAL_LANGUAGES,
+    DEEPGRAM_LANGUAGES,
+    DEEPGRAM_STT_MODELS,
+)
 from .gladia import GLADIA_STT_LANGUAGES, GLADIA_STT_MODELS
 from .google import (
    GOOGLE_MODELS,
@ -16,14 +39,39 @@ from .google import (
 )
 from .sarvam import (
    SARVAM_LANGUAGES,
+    SARVAM_LLM_MODELS,
+    SARVAM_STT_LANGUAGES_V3,
+    SARVAM_STT_LANGUAGES_V25,
    SARVAM_STT_MODELS,
    SARVAM_TTS_MODELS,
    SARVAM_V2_VOICES,
    SARVAM_V3_VOICES,
 )
+from .smallest import (
+    SMALLEST_TTS_LANGUAGES,
+    SMALLEST_TTS_MODELS,
+    SMALLEST_TTS_PRO_VOICES,
+    SMALLEST_TTS_VOICES,
+)
 from .speechmatics import SPEECHMATICS_STT_LANGUAGES

 __all__ = [
+    "AZURE_EMBEDDING_MODELS",
+    "AZURE_MODELS",
+    "AZURE_REALTIME_API_VERSIONS",
+    "AZURE_REALTIME_MODELS",
+    "AZURE_REALTIME_VOICES",
+    "AZURE_SPEECH_REGIONS",
+    "AZURE_SPEECH_STT_LANGUAGES",
+    "AZURE_SPEECH_TTS_LANGUAGES",
+    "AZURE_SPEECH_TTS_VOICES",
+    "CARTESIA_INK_2_STT_LANGUAGES",
+    "CARTESIA_INK_WHISPER_STT_LANGUAGES",
+    "CARTESIA_STT_LANGUAGES",
+    "CARTESIA_STT_MODELS",
+    "DEEPGRAM_FLUX_MODELS",
+    "DEEPGRAM_FLUX_MULTILINGUAL_LANGUAGES",
+    "DEEPGRAM_FLUX_MULTILINGUAL_LANGUAGE_OPTIONS",
    "DEEPGRAM_LANGUAGES",
    "DEEPGRAM_STT_MODELS",
    "GLADIA_STT_LANGUAGES",
@ -41,9 +89,16 @@ __all__ = [
    "GOOGLE_VERTEX_REALTIME_MODELS",
    "GOOGLE_VERTEX_REALTIME_VOICES",
    "SARVAM_LANGUAGES",
+    "SARVAM_LLM_MODELS",
+    "SARVAM_STT_LANGUAGES_V25",
+    "SARVAM_STT_LANGUAGES_V3",
    "SARVAM_STT_MODELS",
    "SARVAM_TTS_MODELS",
    "SARVAM_V2_VOICES",
    "SARVAM_V3_VOICES",
+    "SMALLEST_TTS_LANGUAGES",
+    "SMALLEST_TTS_MODELS",
+    "SMALLEST_TTS_PRO_VOICES",
+    "SMALLEST_TTS_VOICES",
    "SPEECHMATICS_STT_LANGUAGES",
 ]
--- a/api/services/configuration/options/azure.py
+++ b/api/services/configuration/options/azure.py
@ -0,0 +1,125 @@
+AZURE_MODELS = ["gpt-4.1-mini"]
+
+AZURE_REALTIME_MODELS = ["gpt-4o-realtime-preview"]
+AZURE_REALTIME_VOICES = [
+    "alloy",
+    "ash",
+    "ballad",
+    "coral",
+    "echo",
+    "sage",
+    "shimmer",
+    "verse",
+]
+AZURE_REALTIME_API_VERSIONS = [
+    "2025-04-01-preview",
+    "2024-10-01-preview",
+    "2024-12-17",
+]
+
+AZURE_SPEECH_REGIONS = [
+    "eastus",
+    "eastus2",
+    "westus",
+    "westus2",
+    "westus3",
+    "centralus",
+    "northcentralus",
+    "southcentralus",
+    "westcentralus",
+    "westeurope",
+    "northeurope",
+    "uksouth",
+    "ukwest",
+    "francecentral",
+    "switzerlandnorth",
+    "germanywestcentral",
+    "norwayeast",
+    "australiaeast",
+    "eastasia",
+    "southeastasia",
+    "japaneast",
+    "japanwest",
+    "koreacentral",
+    "centralindia",
+    "southindia",
+    "brazilsouth",
+]
+
+AZURE_SPEECH_TTS_LANGUAGES = [
+    "en-US",
+    "en-GB",
+    "en-AU",
+    "en-CA",
+    "en-IN",
+    "es-ES",
+    "es-MX",
+    "fr-FR",
+    "fr-CA",
+    "de-DE",
+    "it-IT",
+    "ja-JP",
+    "ko-KR",
+    "zh-CN",
+    "zh-HK",
+    "zh-TW",
+    "pt-BR",
+    "pt-PT",
+    "ru-RU",
+    "ar-SA",
+    "nl-NL",
+    "pl-PL",
+    "sv-SE",
+    "hi-IN",
+]
+
+AZURE_SPEECH_TTS_VOICES = [
+    "en-US-AriaNeural",
+    "en-US-GuyNeural",
+    "en-US-JennyNeural",
+    "en-US-DavisNeural",
+    "en-US-AmberNeural",
+    "en-US-AnaNeural",
+    "en-US-AshleyNeural",
+    "en-US-BrandonNeural",
+    "en-US-ChristopherNeural",
+    "en-US-ElizabethNeural",
+    "en-US-EricNeural",
+    "en-US-JacobNeural",
+    "en-US-MichelleNeural",
+    "en-US-MonicaNeural",
+    "en-US-NancyNeural",
+    "en-US-RogerNeural",
+    "en-US-SaraNeural",
+    "en-US-SteffanNeural",
+    "en-US-TonyNeural",
+]
+
+AZURE_SPEECH_STT_LANGUAGES = [
+    "en-US",
+    "en-GB",
+    "en-AU",
+    "en-CA",
+    "en-IN",
+    "es-ES",
+    "es-MX",
+    "fr-FR",
+    "fr-CA",
+    "de-DE",
+    "it-IT",
+    "ja-JP",
+    "ko-KR",
+    "zh-CN",
+    "pt-BR",
+    "pt-PT",
+    "ru-RU",
+    "ar-SA",
+    "nl-NL",
+    "pl-PL",
+    "hi-IN",
+]
+
+AZURE_EMBEDDING_MODELS = [
+    "text-embedding-3-small",
+    "text-embedding-ada-002",
+]
--- a/api/services/configuration/options/cartesia.py
+++ b/api/services/configuration/options/cartesia.py
@ -0,0 +1,105 @@
+CARTESIA_STT_MODELS = ["ink-2", "ink-whisper"]
+CARTESIA_INK_2_STT_LANGUAGES = ("en",)
+CARTESIA_INK_WHISPER_STT_LANGUAGES = (
+    "en",
+    "zh",
+    "de",
+    "es",
+    "ru",
+    "ko",
+    "fr",
+    "ja",
+    "pt",
+    "tr",
+    "pl",
+    "ca",
+    "nl",
+    "ar",
+    "sv",
+    "it",
+    "id",
+    "hi",
+    "fi",
+    "vi",
+    "he",
+    "uk",
+    "el",
+    "ms",
+    "cs",
+    "ro",
+    "da",
+    "hu",
+    "ta",
+    "no",
+    "th",
+    "ur",
+    "hr",
+    "bg",
+    "lt",
+    "la",
+    "mi",
+    "ml",
+    "cy",
+    "sk",
+    "te",
+    "fa",
+    "lv",
+    "bn",
+    "sr",
+    "az",
+    "sl",
+    "kn",
+    "et",
+    "mk",
+    "br",
+    "eu",
+    "is",
+    "hy",
+    "ne",
+    "mn",
+    "bs",
+    "kk",
+    "sq",
+    "sw",
+    "gl",
+    "mr",
+    "pa",
+    "si",
+    "km",
+    "sn",
+    "yo",
+    "so",
+    "af",
+    "oc",
+    "ka",
+    "be",
+    "tg",
+    "sd",
+    "gu",
+    "am",
+    "yi",
+    "lo",
+    "uz",
+    "fo",
+    "ht",
+    "ps",
+    "tk",
+    "nn",
+    "mt",
+    "sa",
+    "lb",
+    "my",
+    "bo",
+    "tl",
+    "mg",
+    "as",
+    "tt",
+    "haw",
+    "ln",
+    "ha",
+    "ba",
+    "jw",
+    "su",
+    "yue",
+)
+CARTESIA_STT_LANGUAGES = CARTESIA_INK_WHISPER_STT_LANGUAGES
--- a/api/services/configuration/options/deepgram.py
+++ b/api/services/configuration/options/deepgram.py
@ -1,4 +1,21 @@
-DEEPGRAM_STT_MODELS = ("nova-3-general", "flux-general-en", "flux-general-multi")
+DEEPGRAM_FLUX_MODELS = ("flux-general-en", "flux-general-multi")
+DEEPGRAM_FLUX_MULTILINGUAL_LANGUAGES = (
+    "de",
+    "en",
+    "es",
+    "fr",
+    "hi",
+    "it",
+    "ja",
+    "nl",
+    "pt",
+    "ru",
+)
+DEEPGRAM_FLUX_MULTILINGUAL_LANGUAGE_OPTIONS = (
+    "multi",
+    *DEEPGRAM_FLUX_MULTILINGUAL_LANGUAGES,
+)
+DEEPGRAM_STT_MODELS = ("nova-3-general", *DEEPGRAM_FLUX_MODELS)
 DEEPGRAM_LANGUAGES = (
    "multi",
    "ar",
--- a/api/services/configuration/options/google.py
+++ b/api/services/configuration/options/google.py
@ -1,6 +1,4 @@
 GOOGLE_MODELS = (
-    "gemini-2.0-flash",
-    "gemini-2.0-flash-lite",
    "gemini-2.5-flash",
    "gemini-2.5-flash-lite",
    "gemini-3.5-flash",
--- a/api/services/configuration/options/sarvam.py
+++ b/api/services/configuration/options/sarvam.py
@ -63,4 +63,38 @@ SARVAM_LANGUAGES = (
    "te-IN",
    "as-IN",
 )
-SARVAM_STT_MODELS = ("saarika:v2.5", "saaras:v2")
+SARVAM_STT_MODELS = ("saarika:v2.5", "saaras:v3")
+# saarika:v2.5 language codes (unknown = auto-detect)
+SARVAM_STT_LANGUAGES_V25 = (
+    "unknown",
+    "hi-IN",
+    "bn-IN",
+    "gu-IN",
+    "kn-IN",
+    "ml-IN",
+    "mr-IN",
+    "od-IN",
+    "pa-IN",
+    "ta-IN",
+    "te-IN",
+    "en-IN",
+)
+# saaras:v3 adds these regional languages on top of the v2.5 set. Full list: https://docs.sarvam.ai/api-reference-docs/speech-to-text/transcribe
+SARVAM_STT_LANGUAGES_V3 = SARVAM_STT_LANGUAGES_V25 + (
+    "as-IN",
+    "ur-IN",
+    "ne-IN",
+    "kok-IN",
+    "ks-IN",
+    "sd-IN",
+    "sa-IN",
+    "sat-IN",
+    "mni-IN",
+    "brx-IN",
+    "mai-IN",
+    "doi-IN",
+)
+SARVAM_LLM_MODELS = (
+    "sarvam-30b",
+    "sarvam-105b",
+)
--- a/api/services/configuration/options/smallest.py
+++ b/api/services/configuration/options/smallest.py
@ -0,0 +1,45 @@
+SMALLEST_TTS_MODELS = ("lightning_v3.1", "lightning_v3.1_pro")
+SMALLEST_TTS_VOICES = (
+    "sophia",
+    "avery",
+    "liam",
+    "lucas",
+    "olivia",
+    "ryan",
+    "freya",
+    "william",
+    "devansh",
+    "arjun",
+    "niharika",
+    "maya",
+    "dhruv",
+    "mia",
+    "maithili",
+)
+# Premium voices for lightning_v3.1_pro (American, British, Indian accents; English + Hindi only)
+SMALLEST_TTS_PRO_VOICES = (
+    "meher",
+    "rhea",
+    "aviraj",
+    "cressida",
+    "willow",
+    "maverick",
+)
+SMALLEST_TTS_LANGUAGES = (
+    "en",
+    "hi",
+    "fr",
+    "de",
+    "es",
+    "it",
+    "nl",
+    "pl",
+    "ru",
+    "ar",
+    "bn",
+    "gu",
+    "he",
+    "kn",
+    "mr",
+    "ta",
+)
--- a/api/services/configuration/registry.py
+++ b/api/services/configuration/registry.py
@ -5,6 +5,21 @@ from typing import Annotated, Dict, Literal, Type, TypeVar, Union
 from pydantic import BaseModel, ConfigDict, Field, computed_field, field_validator

 from api.services.configuration.options import (
+    AZURE_EMBEDDING_MODELS,
+    AZURE_MODELS,
+    AZURE_REALTIME_API_VERSIONS,
+    AZURE_REALTIME_MODELS,
+    AZURE_REALTIME_VOICES,
+    AZURE_SPEECH_REGIONS,
+    AZURE_SPEECH_STT_LANGUAGES,
+    AZURE_SPEECH_TTS_LANGUAGES,
+    AZURE_SPEECH_TTS_VOICES,
+    CARTESIA_INK_2_STT_LANGUAGES,
+    CARTESIA_INK_WHISPER_STT_LANGUAGES,
+    CARTESIA_STT_LANGUAGES,
+    CARTESIA_STT_MODELS,
+    DEEPGRAM_FLUX_MULTILINGUAL_LANGUAGE_OPTIONS,
+    DEEPGRAM_FLUX_MULTILINGUAL_LANGUAGES,
    DEEPGRAM_LANGUAGES,
    DEEPGRAM_STT_MODELS,
    GLADIA_STT_LANGUAGES,
@ -22,10 +37,17 @@ from api.services.configuration.options import (
    GOOGLE_VERTEX_REALTIME_MODELS,
    GOOGLE_VERTEX_REALTIME_VOICES,
    SARVAM_LANGUAGES,
+    SARVAM_LLM_MODELS,
+    SARVAM_STT_LANGUAGES_V3,
+    SARVAM_STT_LANGUAGES_V25,
    SARVAM_STT_MODELS,
    SARVAM_TTS_MODELS,
    SARVAM_V2_VOICES,
    SARVAM_V3_VOICES,
+    SMALLEST_TTS_LANGUAGES,
+    SMALLEST_TTS_MODELS,
+    SMALLEST_TTS_PRO_VOICES,
+    SMALLEST_TTS_VOICES,
    SPEECHMATICS_STT_LANGUAGES,
 )
 from api.services.configuration.options.google import GOOGLE_VERTEX_MODELS
@ -44,17 +66,20 @@ class ServiceProviders(str, Enum):
    DEEPGRAM = "deepgram"
    GROQ = "groq"
    OPENROUTER = "openrouter"
+    INWORLD = "inworld"
    CARTESIA = "cartesia"
    # NEUPHONIC = "neuphonic"
    ELEVENLABS = "elevenlabs"
    GOOGLE = "google"
    AZURE = "azure"
+    AZURE_SPEECH = "azure_speech"
    DOGRAH = "dograh"
    SARVAM = "sarvam"
    SPEECHMATICS = "speechmatics"
    CAMB = "camb"
    AWS_BEDROCK = "aws_bedrock"
    SPEACHES = "speaches"
+    HUGGINGFACE = "huggingface"
    ASSEMBLYAI = "assemblyai"
    GLADIA = "gladia"
    RIME = "rime"
@ -65,6 +90,8 @@ class ServiceProviders(str, Enum):
    ULTRAVOX_REALTIME = "ultravox_realtime"
    GOOGLE_REALTIME = "google_realtime"
    GOOGLE_VERTEX_REALTIME = "google_vertex_realtime"
+    AZURE_REALTIME = "azure_realtime"
+    SMALLEST = "smallest"


 class BaseServiceConfiguration(BaseModel):
@ -73,12 +100,15 @@ class BaseServiceConfiguration(BaseModel):
        ServiceProviders.DEEPGRAM,
        ServiceProviders.GROQ,
        ServiceProviders.OPENROUTER,
+        ServiceProviders.INWORLD,
        ServiceProviders.ELEVENLABS,
        ServiceProviders.GOOGLE,
        ServiceProviders.AZURE,
+        ServiceProviders.AZURE_SPEECH,
        ServiceProviders.DOGRAH,
        ServiceProviders.AWS_BEDROCK,
        ServiceProviders.SPEACHES,
+        ServiceProviders.HUGGINGFACE,
        ServiceProviders.ASSEMBLYAI,
        ServiceProviders.GLADIA,
        ServiceProviders.RIME,
@ -89,7 +119,9 @@ class BaseServiceConfiguration(BaseModel):
        ServiceProviders.ULTRAVOX_REALTIME,
        ServiceProviders.GOOGLE_REALTIME,
        ServiceProviders.GOOGLE_VERTEX_REALTIME,
-        # ServiceProviders.SARVAM,
+        ServiceProviders.AZURE_REALTIME,
+        ServiceProviders.SARVAM,
+        ServiceProviders.SMALLEST,
    ]
    api_key: str | list[str]

@ -224,6 +256,14 @@ GOOGLE_VERTEX_REALTIME_PROVIDER_MODEL_CONFIG = provider_model_config(
 DEEPGRAM_PROVIDER_MODEL_CONFIG = provider_model_config("Deepgram")
 ELEVENLABS_PROVIDER_MODEL_CONFIG = provider_model_config("ElevenLabs")
 CARTESIA_PROVIDER_MODEL_CONFIG = provider_model_config("Cartesia")
+INWORLD_PROVIDER_MODEL_CONFIG = provider_model_config(
+    "Inworld",
+    description=(
+        "Inworld AI streaming text-to-speech with built-in and cloned voices. "
+        "Defaults to the Ashley system voice on inworld-tts-2."
+    ),
+    provider_docs_url="https://docs.inworld.ai/tts/tts",
+)
 SARVAM_PROVIDER_MODEL_CONFIG = provider_model_config("Sarvam")
 CAMB_PROVIDER_MODEL_CONFIG = provider_model_config("Camb.ai")
 RIME_PROVIDER_MODEL_CONFIG = provider_model_config("Rime")
@ -239,6 +279,21 @@ SPEACHES_PROVIDER_MODEL_CONFIG = provider_model_config(
    ),
    provider_docs_url="https://github.com/speaches-ai/speaches",
 )
+HUGGINGFACE_PROVIDER_MODEL_CONFIG = provider_model_config(
+    "Hugging Face",
+    description="Hosted Hugging Face Inference Providers API for usage-based inference.",
+    provider_docs_url="https://huggingface.co/docs/inference-providers/en/index",
+)
+AZURE_SPEECH_PROVIDER_MODEL_CONFIG = provider_model_config(
+    "Azure Speech Services",
+    description="Azure Cognitive Services Speech — TTS and STT via the Azure Speech SDK.",
+    provider_docs_url="https://learn.microsoft.com/en-us/azure/ai-services/speech-service/",
+)
+AZURE_REALTIME_PROVIDER_MODEL_CONFIG = provider_model_config(
+    "Azure OpenAI Realtime",
+    description="Azure OpenAI Realtime API — low-latency speech-to-speech conversations.",
+    provider_docs_url="https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/realtime-audio-quickstart",
+)

 OPENAI_MODELS = [
    "gpt-4.1",
@ -265,11 +320,9 @@ OPENROUTER_MODELS = [
    "openai/gpt-4.1-mini",
    "anthropic/claude-sonnet-4",
    "google/gemini-2.5-flash",
-    "google/gemini-2.0-flash",
    "meta-llama/llama-3.3-70b-instruct",
    "deepseek/deepseek-chat-v3-0324",
 ]
-AZURE_MODELS = ["gpt-4.1-mini"]
 DOGRAH_LLM_MODELS = ["default", "accurate", "fast", "lite", "zen"]
 AWS_BEDROCK_MODELS = [
    "us.amazon.nova-pro-v1:0",
@ -290,6 +343,10 @@ class OpenAILLMService(BaseLLMConfiguration):
        description="OpenAI chat model to use.",
        json_schema_extra={"examples": OPENAI_MODELS, "allow_custom_input": True},
    )
+    base_url: str = Field(
+        default="https://api.openai.com/v1",
+        description="Override only if using an OpenAI-compatible API (e.g. local LLM, proxy).",
+    )


@register_llm
@ -297,7 +354,7 @@ class GoogleLLMService(BaseLLMConfiguration):
    model_config = GOOGLE_PROVIDER_MODEL_CONFIG
    provider: Literal[ServiceProviders.GOOGLE] = ServiceProviders.GOOGLE
    model: str = Field(
-        default="gemini-2.0-flash",
+        default="gemini-2.5-flash",
        description="Gemini model on Google AI Studio (not Vertex).",
        json_schema_extra={"examples": GOOGLE_MODELS, "allow_custom_input": True},
    )
@ -442,6 +499,35 @@ class SpeachesLLMConfiguration(BaseLLMConfiguration):
    )


+HUGGINGFACE_LLM_MODELS = [
+    "openai/gpt-oss-120b:cerebras",
+    "deepseek-ai/DeepSeek-R1:fastest",
+    "Qwen/Qwen3-Coder-480B-A35B-Instruct:fastest",
+]
+
+
+@register_llm
+class HuggingFaceLLMConfiguration(BaseLLMConfiguration):
+    model_config = HUGGINGFACE_PROVIDER_MODEL_CONFIG
+    provider: Literal[ServiceProviders.HUGGINGFACE] = ServiceProviders.HUGGINGFACE
+    model: str = Field(
+        default="openai/gpt-oss-120b:cerebras",
+        description="Hugging Face chat-completion model identifier, optionally with provider suffix.",
+        json_schema_extra={
+            "examples": HUGGINGFACE_LLM_MODELS,
+            "allow_custom_input": True,
+        },
+    )
+    base_url: str = Field(
+        default="https://router.huggingface.co/v1",
+        description="Hugging Face OpenAI-compatible chat-completions router base URL.",
+    )
+    bill_to: str | None = Field(
+        default=None,
+        description="Optional Hugging Face organization or user to bill using X-HF-Bill-To.",
+    )
+
+
 MINIMAX_MODELS = [
    "MiniMax-M2.7",
    "MiniMax-M2.7-highspeed",
@ -468,6 +554,29 @@ class MiniMaxLLMConfiguration(BaseLLMConfiguration):
    )


+@register_llm
+class SarvamLLMConfiguration(BaseLLMConfiguration):
+    model_config = SARVAM_PROVIDER_MODEL_CONFIG
+    provider: Literal[ServiceProviders.SARVAM] = ServiceProviders.SARVAM
+    model: str = Field(
+        default="sarvam-30b",
+        description=(
+            "Sarvam chat model. Use sarvam-30b for low-latency voice agents; "
+            "sarvam-105b for complex multi-step reasoning."
+        ),
+        json_schema_extra={"examples": SARVAM_LLM_MODELS, "allow_custom_input": True},
+    )
+    temperature: float = Field(
+        default=0.5,
+        ge=0.0,
+        le=2.0,
+        description=(
+            "Sampling temperature. Sarvam recommends 0.5 for balanced "
+            "conversational responses."
+        ),
+    )
+
+
 OPENAI_REALTIME_MODELS = ["gpt-realtime-2"]
 OPENAI_REALTIME_VOICES = [
    "alloy",
@ -636,12 +745,45 @@ class GoogleVertexRealtimeLLMConfiguration(BaseLLMConfiguration):
    )


+@register_service(ServiceType.REALTIME)
+class AzureRealtimeLLMConfiguration(BaseLLMConfiguration):
+    model_config = AZURE_REALTIME_PROVIDER_MODEL_CONFIG
+    provider: Literal[ServiceProviders.AZURE_REALTIME] = ServiceProviders.AZURE_REALTIME
+    model: str = Field(
+        default="gpt-4o-realtime-preview",
+        description="Azure OpenAI realtime deployment name.",
+        json_schema_extra={
+            "examples": AZURE_REALTIME_MODELS,
+            "allow_custom_input": True,
+        },
+    )
+    endpoint: str = Field(
+        description="Azure OpenAI resource endpoint (e.g. https://<resource>.openai.azure.com).",
+    )
+    voice: str = Field(
+        default="alloy",
+        description="Voice the model speaks in.",
+        json_schema_extra={
+            "examples": AZURE_REALTIME_VOICES,
+            "allow_custom_input": True,
+        },
+    )
+    api_version: str = Field(
+        default="2025-04-01-preview",
+        description="Azure OpenAI API version.",
+        json_schema_extra={
+            "examples": AZURE_REALTIME_API_VERSIONS,
+        },
+    )
+
+
 REALTIME_PROVIDERS = {
    ServiceProviders.OPENAI_REALTIME.value,
    ServiceProviders.GROK_REALTIME.value,
    ServiceProviders.ULTRAVOX_REALTIME.value,
    ServiceProviders.GOOGLE_REALTIME.value,
    ServiceProviders.GOOGLE_VERTEX_REALTIME.value,
+    ServiceProviders.AZURE_REALTIME.value,
 }


@ -656,7 +798,9 @@ LLMConfig = Annotated[
        DograhLLMService,
        AWSBedrockLLMConfiguration,
        SpeachesLLMConfiguration,
+        HuggingFaceLLMConfiguration,
        MiniMaxLLMConfiguration,
+        SarvamLLMConfiguration,
    ],
    Field(discriminator="provider"),
 ]
@ -668,6 +812,7 @@ RealtimeConfig = Annotated[
        UltravoxRealtimeLLMConfiguration,
        GoogleRealtimeLLMConfiguration,
        GoogleVertexRealtimeLLMConfiguration,
+        AzureRealtimeLLMConfiguration,
    ],
    Field(discriminator="provider"),
 ]
@ -799,6 +944,10 @@ class OpenAITTSService(BaseTTSConfiguration):
        default="alloy",
        description="OpenAI TTS voice name.",
    )
+    base_url: str = Field(
+        default="https://api.openai.com/v1",
+        description="Override only if using an OpenAI-compatible API (e.g. local TTS, proxy).",
+    )


 DOGRAH_TTS_MODELS = ["default"]
@ -816,11 +965,15 @@ class DograhTTSService(BaseTTSConfiguration):
    voice: str = Field(
        default="default",
        description="Voice preset.",
+        json_schema_extra={"allow_custom_input": True},
    )
    speed: float = Field(default=1.0, ge=0.5, le=2.0, description="Speed of the voice.")


-CARTESIA_TTS_MODELS = ["sonic-3"]
+CARTESIA_TTS_MODELS = ["sonic-3.5", "sonic-3"]
+INWORLD_TTS_MODELS = ["inworld-tts-2"]
+INWORLD_TTS_VOICES = ["Ashley"]
+INWORLD_TTS_LANGUAGES = ["en-US"]


@register_tts
@ -828,7 +981,7 @@ class CartesiaTTSConfiguration(BaseTTSConfiguration):
    model_config = CARTESIA_PROVIDER_MODEL_CONFIG
    provider: Literal[ServiceProviders.CARTESIA] = ServiceProviders.CARTESIA
    model: str = Field(
-        default="sonic-3",
+        default="sonic-3.5",
        description="Cartesia TTS model.",
        json_schema_extra={"examples": CARTESIA_TTS_MODELS},
    )
@ -843,6 +996,51 @@ class CartesiaTTSConfiguration(BaseTTSConfiguration):
        le=2.0,
        description="Volume multiplier for generated speech.",
    )
+    language: str = Field(
+        default="en",
+        description="Cartesia language code for TTS synthesis (e.g. 'en', 'tr', 'fr', 'de').",
+        json_schema_extra={"allow_custom_input": True},
+    )
+
+
+@register_tts
+class InworldTTSConfiguration(BaseTTSConfiguration):
+    model_config = INWORLD_PROVIDER_MODEL_CONFIG
+    provider: Literal[ServiceProviders.INWORLD] = ServiceProviders.INWORLD
+    model: str = Field(
+        default="inworld-tts-2",
+        description="Inworld TTS model.",
+        json_schema_extra={"examples": INWORLD_TTS_MODELS, "allow_custom_input": True},
+    )
+    voice: str = Field(
+        default="Ashley",
+        description=(
+            "Inworld voice ID. Use Ashley for the default warm English voice, "
+            "or a workspace voice ID for a cloned/custom voice."
+        ),
+        json_schema_extra={"examples": INWORLD_TTS_VOICES, "allow_custom_input": True},
+    )
+    language: str = Field(
+        default="en-US",
+        description="BCP-47 language code for synthesis.",
+        json_schema_extra={
+            "examples": INWORLD_TTS_LANGUAGES,
+            "allow_custom_input": True,
+        },
+    )
+    speed: float = Field(
+        default=1.0,
+        ge=0.25,
+        le=4.0,
+        description="Speech speed multiplier.",
+    )
+    delivery_mode: Literal["STABLE", "BALANCED", "CREATIVE"] = Field(
+        default="BALANCED",
+        description=(
+            "Controls stability versus expressiveness for inworld-tts-2 "
+            "(STABLE, BALANCED, or CREATIVE)."
+        ),
+    )


@register_tts
@ -856,9 +1054,10 @@ class SarvamTTSConfiguration(BaseTTSConfiguration):
    )
    voice: str = Field(
        default="anushka",
-        description="Sarvam voice name; must match the selected model's voice list.",
+        description="Sarvam voice name or custom voice ID.",
        json_schema_extra={
            "examples": SARVAM_V2_VOICES,
+            "allow_custom_input": True,
            "model_options": {
                "bulbul:v2": SARVAM_V2_VOICES,
                "bulbul:v3": SARVAM_V3_VOICES,
@ -870,6 +1069,12 @@ class SarvamTTSConfiguration(BaseTTSConfiguration):
        description="BCP-47 Indian-language code (e.g. hi-IN, en-IN).",
        json_schema_extra={"examples": SARVAM_LANGUAGES},
    )
+    speed: float = Field(
+        default=1.0,
+        ge=0.5,
+        le=2.0,
+        description="Speech speed multiplier.",
+    )


 CAMB_TTS_MODELS = ["mars-flash", "mars-pro", "mars-instruct"]
@ -989,6 +1194,90 @@ class MiniMaxTTSConfiguration(BaseTTSConfiguration):
    )


+@register_tts
+class AzureSpeechTTSConfiguration(BaseTTSConfiguration):
+    model_config = AZURE_SPEECH_PROVIDER_MODEL_CONFIG
+    provider: Literal[ServiceProviders.AZURE_SPEECH] = ServiceProviders.AZURE_SPEECH
+    model: str = Field(
+        default="neural",
+        description="Azure Speech synthesis engine (neural voices only).",
+        json_schema_extra={"examples": ["neural"]},
+    )
+    region: str = Field(
+        default="eastus",
+        description="Azure region for Speech Services (e.g. 'eastus', 'westeurope').",
+        json_schema_extra={
+            "examples": AZURE_SPEECH_REGIONS,
+        },
+    )
+    voice: str = Field(
+        default="en-US-AriaNeural",
+        description="Azure Neural voice name (e.g. 'en-US-AriaNeural').",
+        json_schema_extra={
+            "examples": AZURE_SPEECH_TTS_VOICES,
+            "allow_custom_input": True,
+        },
+    )
+    language: str = Field(
+        default="en-US",
+        description="BCP-47 language code for synthesis.",
+        json_schema_extra={
+            "examples": AZURE_SPEECH_TTS_LANGUAGES,
+            "allow_custom_input": True,
+        },
+    )
+    speed: float = Field(
+        default=1.0,
+        ge=0.5,
+        le=2.0,
+        description="Speech speed multiplier (0.5 to 2.0).",
+    )
+
+
+SMALLEST_PROVIDER_MODEL_CONFIG = provider_model_config(
+    "Smallest AI",
+    description="Smallest AI ultralow-latency TTS (Waves) and STT (Pulse) APIs.",
+    provider_docs_url="https://smallest.ai/docs",
+)
+
+
+@register_tts
+class SmallestAITTSConfiguration(BaseTTSConfiguration):
+    model_config = SMALLEST_PROVIDER_MODEL_CONFIG
+    provider: Literal[ServiceProviders.SMALLEST] = ServiceProviders.SMALLEST
+    model: str = Field(
+        default="lightning_v3.1",
+        description="Smallest AI TTS model. lightning_v3.1_pro is the premium pool (American, British, Indian accents); lightning_v3.1 is the standard pool with 217 voices across 12 languages.",
+        json_schema_extra={"examples": SMALLEST_TTS_MODELS},
+    )
+    voice: str = Field(
+        default="sophia",
+        description="Smallest AI voice ID. Available voices differ by model: lightning_v3.1 has a broad multilingual pool; lightning_v3.1_pro has premium American, British, and Indian accent voices (English + Hindi only).",
+        json_schema_extra={
+            "examples": list(SMALLEST_TTS_VOICES),
+            "allow_custom_input": True,
+            "model_options": {
+                "lightning_v3.1": list(SMALLEST_TTS_VOICES),
+                "lightning_v3.1_pro": list(SMALLEST_TTS_PRO_VOICES),
+            },
+        },
+    )
+    language: str = Field(
+        default="en",
+        description="ISO 639-1 language code for synthesis.",
+        json_schema_extra={
+            "examples": SMALLEST_TTS_LANGUAGES,
+            "allow_custom_input": True,
+        },
+    )
+    speed: float = Field(
+        default=1.0,
+        ge=0.5,
+        le=2.0,
+        description="Speech speed multiplier (0.5 to 2.0).",
+    )
+
+
 TTSConfig = Annotated[
    Union[
        DeepgramTTSConfiguration,
@ -996,12 +1285,15 @@ TTSConfig = Annotated[
        OpenAITTSService,
        ElevenlabsTTSConfiguration,
        CartesiaTTSConfiguration,
+        InworldTTSConfiguration,
        DograhTTSService,
        SarvamTTSConfiguration,
        CambTTSConfiguration,
        RimeTTSConfiguration,
        SpeachesTTSConfiguration,
        MiniMaxTTSConfiguration,
+        AzureSpeechTTSConfiguration,
+        SmallestAITTSConfiguration,
    ],
    Field(discriminator="provider"),
 ]
@ -1020,20 +1312,21 @@ class DeepgramSTTConfiguration(BaseSTTConfiguration):
    )
    language: str = Field(
        default="multi",
-        description="Language code; 'multi' enables auto-detect (Nova-3 only).",
+        description=(
+            "Language code. 'multi' enables Nova-3 auto-detect and omits "
+            "language hints for Flux multilingual auto-detect."
+        ),
        json_schema_extra={
            "examples": DEEPGRAM_LANGUAGES,
            "model_options": {
                "nova-3-general": DEEPGRAM_LANGUAGES,
                "flux-general-en": ("en",),
+                "flux-general-multi": DEEPGRAM_FLUX_MULTILINGUAL_LANGUAGE_OPTIONS,
            },
        },
    )


-CARTESIA_STT_MODELS = ["ink-whisper"]
-
-
@register_stt
 class CartesiaSTTConfiguration(BaseSTTConfiguration):
    model_config = CARTESIA_PROVIDER_MODEL_CONFIG
@ -1043,6 +1336,17 @@ class CartesiaSTTConfiguration(BaseSTTConfiguration):
        description="Cartesia STT model.",
        json_schema_extra={"examples": CARTESIA_STT_MODELS},
    )
+    language: str = Field(
+        default="en",
+        description="ISO 639-1 language code. ink-2 currently supports English only.",
+        json_schema_extra={
+            "examples": CARTESIA_STT_LANGUAGES,
+            "model_options": {
+                "ink-2": CARTESIA_INK_2_STT_LANGUAGES,
+                "ink-whisper": CARTESIA_INK_WHISPER_STT_LANGUAGES,
+            },
+        },
+    )


 OPENAI_STT_MODELS = ["gpt-4o-transcribe"]
@ -1057,6 +1361,10 @@ class OpenAISTTConfiguration(BaseSTTConfiguration):
        description="OpenAI transcription model.",
        json_schema_extra={"examples": OPENAI_STT_MODELS},
    )
+    base_url: str = Field(
+        default="https://api.openai.com/v1",
+        description="Override only if using an OpenAI-compatible API (e.g. local STT, proxy).",
+    )


@register_stt
@ -1101,6 +1409,10 @@ class GoogleSTTConfiguration(BaseSTTConfiguration):
 # Dograh STT Service
 DOGRAH_STT_MODELS = ["default"]
 DOGRAH_STT_LANGUAGES = DEEPGRAM_LANGUAGES
+# Languages auto-detected when the Dograh STT language is "multi". Dograh STT runs
+# Deepgram Flux multilingual under the hood, which only auto-detects this subset —
+# not the full DOGRAH_STT_LANGUAGES list offered for explicit single-language selection.
+DOGRAH_MULTILINGUAL_AUTODETECT_LANGUAGES = DEEPGRAM_FLUX_MULTILINGUAL_LANGUAGES


@register_stt
@ -1125,13 +1437,24 @@ class SarvamSTTConfiguration(BaseSTTConfiguration):
    provider: Literal[ServiceProviders.SARVAM] = ServiceProviders.SARVAM
    model: str = Field(
        default="saarika:v2.5",
-        description="Sarvam STT model.",
+        description=(
+            "Sarvam STT model. saarika:v2.5 transcribes in the spoken language; "
+            "saaras:v3 is the recommended model with flexible output modes."
+        ),
        json_schema_extra={"examples": SARVAM_STT_MODELS},
    )
    language: str = Field(
-        default="hi-IN",
-        description="BCP-47 Indian-language code.",
-        json_schema_extra={"examples": SARVAM_LANGUAGES},
+        default="unknown",
+        description=(
+            "BCP-47 language code. Use unknown for automatic language detection."
+        ),
+        json_schema_extra={
+            "examples": SARVAM_STT_LANGUAGES_V25,
+            "model_options": {
+                "saarika:v2.5": SARVAM_STT_LANGUAGES_V25,
+                "saaras:v3": SARVAM_STT_LANGUAGES_V3,
+            },
+        },
    )


@ -1187,6 +1510,38 @@ class SpeachesSTTConfiguration(BaseSTTConfiguration):
    )


+HUGGINGFACE_STT_MODELS = [
+    "openai/whisper-large-v3-turbo",
+    "openai/whisper-large-v3",
+]
+
+
+@register_stt
+class HuggingFaceSTTConfiguration(BaseSTTConfiguration):
+    model_config = HUGGINGFACE_PROVIDER_MODEL_CONFIG
+    provider: Literal[ServiceProviders.HUGGINGFACE] = ServiceProviders.HUGGINGFACE
+    model: str = Field(
+        default="openai/whisper-large-v3-turbo",
+        description="Hugging Face ASR model identifier served through Inference Providers.",
+        json_schema_extra={
+            "examples": HUGGINGFACE_STT_MODELS,
+            "allow_custom_input": True,
+        },
+    )
+    base_url: str = Field(
+        default="https://router.huggingface.co/hf-inference",
+        description="Hugging Face Inference Providers router base URL.",
+    )
+    bill_to: str | None = Field(
+        default=None,
+        description="Optional Hugging Face organization or user to bill using X-HF-Bill-To.",
+    )
+    return_timestamps: bool = Field(
+        default=False,
+        description="Request timestamp chunks when supported by the selected provider/model.",
+    )
+
+
 ASSEMBLYAI_STT_MODELS = ["u3-rt-pro"]
 ASSEMBLYAI_STT_LANGUAGES = ["en", "es", "de", "fr", "pt", "it"]

@ -1223,6 +1578,88 @@ class GladiaSTTConfiguration(BaseSTTConfiguration):
    )


+@register_stt
+class AzureSpeechSTTConfiguration(BaseSTTConfiguration):
+    model_config = AZURE_SPEECH_PROVIDER_MODEL_CONFIG
+    provider: Literal[ServiceProviders.AZURE_SPEECH] = ServiceProviders.AZURE_SPEECH
+    model: str = Field(
+        default="latest_long",
+        description="Azure Speech recognition model (use 'latest_long' for continuous recognition).",
+        json_schema_extra={"examples": ["latest_long", "latest_short"]},
+    )
+    region: str = Field(
+        default="eastus",
+        description="Azure region for Speech Services (e.g. 'eastus', 'westeurope').",
+        json_schema_extra={
+            "examples": AZURE_SPEECH_REGIONS,
+        },
+    )
+    language: str = Field(
+        default="en-US",
+        description="BCP-47 language code for recognition.",
+        json_schema_extra={
+            "examples": AZURE_SPEECH_STT_LANGUAGES,
+            "allow_custom_input": True,
+        },
+    )
+
+
+SMALLEST_STT_MODELS = ["pulse"]
+SMALLEST_STT_LANGUAGES = [
+    "en",
+    "hi",
+    "fr",
+    "de",
+    "es",
+    "it",
+    "nl",
+    "pl",
+    "ru",
+    "pt",
+    "bn",
+    "gu",
+    "kn",
+    "ml",
+    "mr",
+    "ta",
+    "te",
+    "pa",
+    "or",
+    "bg",
+    "cs",
+    "da",
+    "et",
+    "fi",
+    "hu",
+    "lt",
+    "lv",
+    "mt",
+    "ro",
+    "sk",
+    "sv",
+    "uk",
+]
+
+
+@register_stt
+class SmallestAISTTConfiguration(BaseSTTConfiguration):
+    model_config = SMALLEST_PROVIDER_MODEL_CONFIG
+    provider: Literal[ServiceProviders.SMALLEST] = ServiceProviders.SMALLEST
+    model: str = Field(
+        default="pulse",
+        description="Smallest AI STT model. Supports 38 languages with real-time streaming.",
+        json_schema_extra={"examples": SMALLEST_STT_MODELS},
+    )
+    language: str = Field(
+        default="en",
+        description="ISO 639-1 language code for transcription.",
+        json_schema_extra={
+            "examples": SMALLEST_STT_LANGUAGES,
+            "allow_custom_input": True,
+        },
+    )
+
+
 STTConfig = Annotated[
    Union[
        DeepgramSTTConfiguration,
@ -1233,8 +1670,11 @@ STTConfig = Annotated[
        SpeechmaticsSTTConfiguration,
        SarvamSTTConfiguration,
        SpeachesSTTConfiguration,
+        HuggingFaceSTTConfiguration,
        AssemblyAISTTConfiguration,
        GladiaSTTConfiguration,
+        AzureSpeechSTTConfiguration,
+        SmallestAISTTConfiguration,
    ],
    Field(discriminator="provider"),
 ]
@ -1274,8 +1714,51 @@ class OpenRouterEmbeddingsConfiguration(BaseEmbeddingsConfiguration):
    )


+@register_embeddings
+class AzureOpenAIEmbeddingsConfiguration(BaseEmbeddingsConfiguration):
+    model_config = AZURE_OPENAI_PROVIDER_MODEL_CONFIG
+    provider: Literal[ServiceProviders.AZURE] = ServiceProviders.AZURE
+    model: str = Field(
+        default="text-embedding-3-small",
+        description=(
+            "Azure OpenAI embedding deployment name. The deployment must return "
+            "1536-dimensional embeddings."
+        ),
+        json_schema_extra={
+            "examples": AZURE_EMBEDDING_MODELS,
+            "allow_custom_input": True,
+        },
+    )
+    endpoint: str = Field(
+        description="Azure OpenAI resource endpoint (e.g. https://<resource>.openai.azure.com).",
+    )
+    api_version: str = Field(
+        default="2024-02-15-preview",
+        description="Azure OpenAI API version for embeddings.",
+    )
+
+
+DOGRAH_EMBEDDING_MODELS = ["dograh_embedding_v1"]
+
+
+@register_embeddings
+class DograhEmbeddingsConfiguration(BaseEmbeddingsConfiguration):
+    model_config = DOGRAH_PROVIDER_MODEL_CONFIG
+    provider: Literal[ServiceProviders.DOGRAH] = ServiceProviders.DOGRAH
+    model: str = Field(
+        default="dograh_embedding_v1",
+        description="Dograh-managed embedding model.",
+        json_schema_extra={"examples": DOGRAH_EMBEDDING_MODELS},
+    )
+
+
 EmbeddingsConfig = Annotated[
-    Union[OpenAIEmbeddingsConfiguration, OpenRouterEmbeddingsConfiguration],
+    Union[
+        OpenAIEmbeddingsConfiguration,
+        OpenRouterEmbeddingsConfiguration,
+        AzureOpenAIEmbeddingsConfiguration,
+        DograhEmbeddingsConfiguration,
+    ],
    Field(discriminator="provider"),
 ]

--- a/api/services/configuration/resolve.py
+++ b/api/services/configuration/resolve.py
@ -2,13 +2,15 @@

 from __future__ import annotations

-from api.schemas.user_configuration import UserConfiguration
+import copy
+
+from api.schemas.ai_model_configuration import EffectiveAIModelConfiguration
 from api.services.configuration.registry import (
    REGISTRY,
    ServiceType,
 )

-# Maps override key → (UserConfiguration field, ServiceType for registry lookup)
+# Maps override key → (EffectiveAIModelConfiguration field, ServiceType for registry lookup)
 _SECTION_MAP: dict[str, ServiceType] = {
    "llm": ServiceType.LLM,
    "tts": ServiceType.TTS,
@ -29,10 +31,52 @@ def _build_section_from_override(service_type: ServiceType, override: dict):
    return config_cls(**override)


+_SECRET_FIELDS = ("api_key", "credentials", "aws_access_key", "aws_secret_key")
+
+
+def enrich_overrides_with_api_keys(
+    model_overrides: dict,
+    user_config: EffectiveAIModelConfiguration,
+) -> dict:
+    """Copy API keys from the global config into model_overrides where missing.
+
+    When a workflow override selects the same provider as the current global
+    config but omits the API key, the override becomes broken if the global
+    config later switches to a different provider. This function stamps the
+    global provider's API key (and other secret fields) into the override at
+    save time so the override is self-contained.
+    """
+    result = copy.deepcopy(model_overrides)
+    for section_key in _SECTION_MAP:
+        if section_key not in result:
+            continue
+        override = result[section_key]
+        override_provider = override.get("provider")
+        if not override_provider:
+            continue
+        global_section = getattr(user_config, section_key, None)
+        if global_section is None:
+            continue
+        if getattr(global_section, "provider", None) != override_provider:
+            continue
+        for field in _SECRET_FIELDS:
+            if override.get(field):
+                continue
+            if field == "api_key" and hasattr(global_section, "get_all_api_keys"):
+                all_keys = global_section.get_all_api_keys()
+                if all_keys:
+                    override[field] = all_keys[0] if len(all_keys) == 1 else all_keys
+            else:
+                global_value = getattr(global_section, field, None)
+                if global_value is not None:
+                    override[field] = global_value
+    return result
+
+
 def resolve_effective_config(
-    user_config: UserConfiguration,
+    user_config: EffectiveAIModelConfiguration,
    model_overrides: dict | None,
-) -> UserConfiguration:
+) -> EffectiveAIModelConfiguration:
    """Deep-merge workflow model_overrides onto global user config.

    - If model_overrides is None or empty, returns a copy of user_config unchanged.
--- a/api/services/filesystem/s3.py
+++ b/api/services/filesystem/s3.py
@ -1,6 +1,7 @@
 from typing import Any, BinaryIO, Dict, Optional

 import aioboto3
+from botocore.config import Config
 from botocore.exceptions import ClientError

 from .base import BaseFileSystem
@ -9,22 +10,56 @@ from .base import BaseFileSystem
 class S3FileSystem(BaseFileSystem):
    """S3 implementation of the filesystem interface."""

-    def __init__(self, bucket_name: str, region_name: str = "us-east-1"):
+    def __init__(
+        self,
+        bucket_name: str,
+        region_name: str = "us-east-1",
+        endpoint_url: Optional[str] = None,
+        signature_version: Optional[str] = None,
+        addressing_style: Optional[str] = None,
+    ):
        """Initialize S3 filesystem.

        Args:
            bucket_name: Name of the S3 bucket
            region_name: AWS region name
+            endpoint_url: Optional custom S3 endpoint (e.g. for MinIO/rustfs).
+                ``None`` uses AWS's default endpoint resolution.
+            signature_version: Optional botocore signature version (e.g.
+                ``"s3v4"``). ``None`` keeps botocore's default signing behavior.
+            addressing_style: Optional S3 addressing style (``"path"`` /
+                ``"virtual"`` / ``"auto"``). ``None`` keeps botocore's default.
        """
        self.bucket_name = bucket_name
        self.region_name = region_name
+        self.endpoint_url = endpoint_url
        self.session = aioboto3.Session()

+        # Build a botocore Config only when an override is requested so that the
+        # default behavior is byte-for-byte unchanged when no env vars are set.
+        config_kwargs: Dict[str, Any] = {}
+        if signature_version:
+            config_kwargs["signature_version"] = signature_version
+        if addressing_style:
+            config_kwargs["s3"] = {"addressing_style": addressing_style}
+        self._config = Config(**config_kwargs) if config_kwargs else None
+
+    def _client_kwargs(self) -> Dict[str, Any]:
+        """Common kwargs for every ``session.client("s3", ...)`` call.
+
+        Only includes ``endpoint_url`` / ``config`` when configured, so default
+        deployments behave exactly as before.
+        """
+        kwargs: Dict[str, Any] = {"region_name": self.region_name}
+        if self.endpoint_url:
+            kwargs["endpoint_url"] = self.endpoint_url
+        if self._config is not None:
+            kwargs["config"] = self._config
+        return kwargs
+
    async def acreate_file(self, file_path: str, content: BinaryIO) -> bool:
        try:
-            async with self.session.client(
-                "s3", region_name=self.region_name
-            ) as s3_client:
+            async with self.session.client("s3", **self._client_kwargs()) as s3_client:
                await s3_client.put_object(
                    Bucket=self.bucket_name, Key=file_path, Body=await content.read()
                )
@ -34,9 +69,7 @@ class S3FileSystem(BaseFileSystem):

    async def aupload_file(self, local_path: str, destination_path: str) -> bool:
        try:
-            async with self.session.client(
-                "s3", region_name=self.region_name
-            ) as s3_client:
+            async with self.session.client("s3", **self._client_kwargs()) as s3_client:
                await s3_client.upload_file(
                    local_path, self.bucket_name, destination_path
                )
@ -59,9 +92,7 @@ class S3FileSystem(BaseFileSystem):
        disposition on the response.
        """
        try:
-            async with self.session.client(
-                "s3", region_name=self.region_name
-            ) as s3_client:
+            async with self.session.client("s3", **self._client_kwargs()) as s3_client:
                params = {"Bucket": self.bucket_name, "Key": file_path}

                # Make artifacts viewable inline in the browser when requested
@ -100,9 +131,7 @@ class S3FileSystem(BaseFileSystem):
    async def aget_file_metadata(self, file_path: str) -> Optional[Dict[str, Any]]:
        """Get S3 object metadata."""
        try:
-            async with self.session.client(
-                "s3", region_name=self.region_name
-            ) as s3_client:
+            async with self.session.client("s3", **self._client_kwargs()) as s3_client:
                response = await s3_client.head_object(
                    Bucket=self.bucket_name, Key=file_path
                )
@ -126,9 +155,7 @@ class S3FileSystem(BaseFileSystem):
    ) -> Optional[str]:
        """Generate a presigned PUT URL for direct file upload."""
        try:
-            async with self.session.client(
-                "s3", region_name=self.region_name
-            ) as s3_client:
+            async with self.session.client("s3", **self._client_kwargs()) as s3_client:
                url = await s3_client.generate_presigned_url(
                    "put_object",
                    Params={
@ -145,9 +172,7 @@ class S3FileSystem(BaseFileSystem):
    async def adownload_file(self, source_path: str, local_path: str) -> bool:
        """Download a file from S3 to local path."""
        try:
-            async with self.session.client(
-                "s3", region_name=self.region_name
-            ) as s3_client:
+            async with self.session.client("s3", **self._client_kwargs()) as s3_client:
                await s3_client.download_file(self.bucket_name, source_path, local_path)
            return True
        except ClientError:
@ -156,9 +181,7 @@ class S3FileSystem(BaseFileSystem):
    async def acopy_file(self, source_path: str, destination_path: str) -> bool:
        """Copy a file within S3 (server-side copy)."""
        try:
-            async with self.session.client(
-                "s3", region_name=self.region_name
-            ) as s3_client:
+            async with self.session.client("s3", **self._client_kwargs()) as s3_client:
                await s3_client.copy_object(
                    Bucket=self.bucket_name,
                    Key=destination_path,
--- a/api/services/gen_ai/init.py
+++ b/api/services/gen_ai/init.py
@ -1,15 +1,25 @@
 """Generative AI services for embeddings and document processing."""

 from .embedding import (
+    AzureEmbeddingAPIKeyNotConfiguredError,
+    AzureOpenAIEmbeddingService,
    BaseEmbeddingService,
+    DograhEmbeddingService,
    EmbeddingAPIKeyNotConfiguredError,
    OpenAIEmbeddingService,
+    build_embedding_service,
+    resolve_embedding_correlation_id,
 )
 from .json_parser import parse_llm_json

 __all__ = [
+    "AzureEmbeddingAPIKeyNotConfiguredError",
+    "AzureOpenAIEmbeddingService",
    "BaseEmbeddingService",
+    "DograhEmbeddingService",
    "EmbeddingAPIKeyNotConfiguredError",
    "OpenAIEmbeddingService",
+    "build_embedding_service",
+    "resolve_embedding_correlation_id",
    "parse_llm_json",
 ]
--- a/api/services/gen_ai/embedding/init.py
+++ b/api/services/gen_ai/embedding/init.py
@ -1,10 +1,21 @@
 """Embedding services for document processing and retrieval."""

+from .azure_openai_service import (
+    AzureEmbeddingAPIKeyNotConfiguredError,
+    AzureOpenAIEmbeddingService,
+)
 from .base import BaseEmbeddingService
+from .dograh_service import DograhEmbeddingService
+from .factory import build_embedding_service, resolve_embedding_correlation_id
 from .openai_service import EmbeddingAPIKeyNotConfiguredError, OpenAIEmbeddingService

 __all__ = [
+    "AzureEmbeddingAPIKeyNotConfiguredError",
+    "AzureOpenAIEmbeddingService",
    "BaseEmbeddingService",
+    "DograhEmbeddingService",
    "EmbeddingAPIKeyNotConfiguredError",
    "OpenAIEmbeddingService",
+    "build_embedding_service",
+    "resolve_embedding_correlation_id",
 ]
--- a/api/services/gen_ai/embedding/azure_openai_service.py
+++ b/api/services/gen_ai/embedding/azure_openai_service.py
@ -0,0 +1,131 @@
+"""Azure OpenAI embedding service.
+
+Uses the Azure OpenAI REST API for text embeddings, compatible with
+1536-dimensional embedding deployments such as text-embedding-3-small and
+text-embedding-ada-002.
+"""
+
+from typing import Any, Dict, List, Optional
+
+from loguru import logger
+from openai import AsyncAzureOpenAI
+
+from api.db.db_client import DBClient
+from api.utils.url_security import validate_user_configured_service_url
+
+from .base import BaseEmbeddingService
+
+DEFAULT_MODEL_ID = "text-embedding-3-small"
+EMBEDDING_DIMENSION = 1536
+
+
+class AzureEmbeddingAPIKeyNotConfiguredError(Exception):
+    """Raised when Azure OpenAI credentials are not configured for embeddings."""
+
+    def __init__(self):
+        super().__init__(
+            "Azure OpenAI endpoint or API key not configured. Please set your "
+            "endpoint and API key in Model Configurations > Embedding to use "
+            "document processing."
+        )
+
+
+class AzureOpenAIEmbeddingService(BaseEmbeddingService):
+    """Embedding service using Azure OpenAI text-embedding deployments."""
+
+    def __init__(
+        self,
+        db_client: DBClient,
+        api_key: Optional[str] = None,
+        endpoint: Optional[str] = None,
+        model_id: str = DEFAULT_MODEL_ID,
+        api_version: str = "2024-02-15-preview",
+    ):
+        """Initialize the Azure OpenAI embedding service.
+
+        Args:
+            db_client: Database client for vector similarity search.
+            api_key: Azure OpenAI API key.
+            endpoint: Azure OpenAI resource endpoint (e.g. https://<resource>.openai.azure.com).
+            model_id: Deployment name, used as both the deployment and model identifier.
+            api_version: Azure OpenAI API version.
+        """
+        self.db = db_client
+        self.model_id = model_id
+
+        self._configured = bool(api_key and endpoint)
+        if self._configured:
+            validate_user_configured_service_url(endpoint, field_name="endpoint")
+            self.client = AsyncAzureOpenAI(
+                api_key=api_key,
+                azure_endpoint=endpoint,
+                api_version=api_version,
+            )
+            logger.info(
+                f"Azure OpenAI embedding service initialized with deployment: {model_id}"
+            )
+        else:
+            self.client = None
+            logger.warning(
+                "Azure OpenAI embedding service initialized without credentials. "
+                "Operations will fail until endpoint and API key are configured."
+            )
+
+    def get_model_id(self) -> str:
+        return self.model_id
+
+    def get_embedding_dimension(self) -> int:
+        return EMBEDDING_DIMENSION
+
+    def _ensure_configured(self):
+        if not self._configured or self.client is None:
+            raise AzureEmbeddingAPIKeyNotConfiguredError()
+
+    async def embed_texts(self, texts: List[str]) -> List[List[float]]:
+        """Embed a batch of texts using Azure OpenAI API."""
+        self._ensure_configured()
+        try:
+            response = await self.client.embeddings.create(
+                input=texts,
+                model=self.model_id,
+            )
+            embeddings = [item.embedding for item in response.data]
+            self._validate_embedding_dimensions(embeddings)
+            return embeddings
+        except Exception as e:
+            logger.error(f"Error generating Azure OpenAI embeddings: {e}")
+            raise
+
+    def _validate_embedding_dimensions(self, embeddings: List[List[float]]) -> None:
+        for embedding in embeddings:
+            if len(embedding) != EMBEDDING_DIMENSION:
+                raise ValueError(
+                    "Azure OpenAI embedding deployment "
+                    f"{self.model_id!r} returned {len(embedding)} dimensions; "
+                    "Dograh knowledge base storage currently supports "
+                    f"{EMBEDDING_DIMENSION}-dimensional embeddings."
+                )
+
+    async def embed_query(self, query: str) -> List[float]:
+        """Embed a single query text using Azure OpenAI API."""
+        self._ensure_configured()
+        embeddings = await self.embed_texts([query])
+        return embeddings[0]
+
+    async def search_similar_chunks(
+        self,
+        query: str,
+        organization_id: int,
+        limit: int = 5,
+        document_uuids: Optional[List[str]] = None,
+    ) -> List[Dict[str, Any]]:
+        """Search for similar chunks using vector similarity."""
+        self._ensure_configured()
+        query_embedding = await self.embed_query(query)
+        return await self.db.search_similar_chunks(
+            query_embedding=query_embedding,
+            organization_id=organization_id,
+            limit=limit,
+            document_uuids=document_uuids,
+            embedding_model=self.model_id,
+        )
--- a/api/services/gen_ai/embedding/dograh_service.py
+++ b/api/services/gen_ai/embedding/dograh_service.py
@ -0,0 +1,69 @@
+"""Dograh-managed embedding service.
+
+Routes embeddings through Dograh's managed proxy (MPS). This mirrors the managed
+voice services (``DograhLLMService`` / ``DograhTTSService``): when a server-minted
+MPS correlation id is present, it forwards the MPS billing v2 protocol
+(``correlation_id`` + ``mps_billing_version``) in the request body so MPS can
+authorize and attribute the call. With no correlation id (e.g. a v1 org) it
+behaves like a plain OpenAI-compatible call, which MPS accepts.
+
+Keeping this in a subclass keeps ``OpenAIEmbeddingService`` a generic
+OpenAI-compatible client; only the managed path carries MPS-specific metadata,
+so BYOK OpenAI/Azure requests never ship MPS fields to the real provider.
+"""
+
+from typing import Any, Dict, Optional
+
+from api.db.db_client import DBClient
+
+from .openai_service import DEFAULT_MODEL_ID, OpenAIEmbeddingService
+
+# Protocol contract with MPS (see model_services
+# api/services/model_service_correlations.py). Kept local to avoid coupling the
+# app layer to the pipecat package, which defines its own copy for voice.
+MPS_BILLING_VERSION_KEY = "mps_billing_version"
+MPS_BILLING_VERSION_V2 = "2"
+
+
+class DograhEmbeddingService(OpenAIEmbeddingService):
+    """OpenAI-compatible embedding client pointed at Dograh's managed proxy."""
+
+    def __init__(
+        self,
+        db_client: DBClient,
+        api_key: Optional[str] = None,
+        model_id: str = DEFAULT_MODEL_ID,
+        base_url: Optional[str] = None,
+        correlation_id: Optional[str] = None,
+    ):
+        """Initialize the managed embedding service.
+
+        Args:
+            db_client: Database client for vector similarity search.
+            api_key: Dograh-managed MPS service key.
+            model_id: Embedding model/tier id (default: text-embedding-3-small).
+            base_url: MPS embeddings base URL.
+            correlation_id: Server-minted MPS correlation id. When set, the MPS
+                billing v2 protocol is forwarded with each request. When None,
+                requests are sent without the protocol (valid for v1 orgs).
+        """
+        super().__init__(
+            db_client=db_client,
+            api_key=api_key,
+            model_id=model_id,
+            base_url=base_url,
+        )
+        self._correlation_id = correlation_id
+
+    def _request_kwargs(self) -> Dict[str, Any]:
+        """Forward the MPS billing v2 protocol when a correlation id is present."""
+        if not self._correlation_id:
+            return {}
+        return {
+            "extra_body": {
+                "metadata": {
+                    "correlation_id": self._correlation_id,
+                    MPS_BILLING_VERSION_KEY: MPS_BILLING_VERSION_V2,
+                }
+            }
+        }
--- a/api/services/gen_ai/embedding/factory.py
+++ b/api/services/gen_ai/embedding/factory.py
@ -0,0 +1,137 @@
+"""Factory for embedding services, including the Dograh-managed (MPS) path.
+
+Centralizes the provider branching (Azure BYOK / Dograh-managed / OpenAI-compatible
+BYOK) that was previously duplicated across document ingestion, the search route,
+and the RAG tool, and resolves the MPS billing v2 protocol the same way the voice
+path does: attach it only for orgs already on v2, and never create a billing
+account to do so.
+"""
+
+from typing import Optional
+
+from loguru import logger
+
+from api.db.db_client import DBClient
+
+from .azure_openai_service import AzureOpenAIEmbeddingService
+from .base import BaseEmbeddingService
+from .dograh_service import DograhEmbeddingService
+from .openai_service import OpenAIEmbeddingService
+
+DEFAULT_EMBEDDING_MODEL = "text-embedding-3-small"
+DEFAULT_AZURE_API_VERSION = "2024-02-15-preview"
+
+
+async def resolve_embedding_correlation_id(
+    *,
+    organization_id: Optional[int],
+    service_key: Optional[str],
+    created_by: Optional[str] = None,
+) -> Optional[str]:
+    """Resolve an MPS correlation id for a managed embedding call made outside a run.
+
+    Mirrors the voice path's gating:
+
+    - OSS deployments use a pasted hosted v2 key (v2 by definition), so mint
+      directly via the bearer endpoint — matching ``_authorize_oss_managed_v2_correlation``.
+    - Hosted/SaaS: read the org's billing mode (no side effects) and mint only when
+      it is already v2. Minting for an already-v2 org is a no-op on the account.
+
+    Returns ``None`` when the call should be sent without the protocol; MPS accepts
+    un-gated embedding calls from v1 orgs. Never creates a v2 billing account.
+    """
+    if not service_key:
+        return None
+
+    # Imported lazily to avoid import-time cycles between the gen_ai and service
+    # layers (matches the inline-import convention used elsewhere in the app).
+    from api.constants import DEPLOYMENT_MODE
+    from api.services.mps_service_key_client import mps_service_key_client
+
+    try:
+        if DEPLOYMENT_MODE == "oss":
+            minted = await mps_service_key_client.create_correlation_id(
+                service_key=service_key
+            )
+            return minted.get("correlation_id")
+
+        if organization_id is None:
+            return None
+
+        status = await mps_service_key_client.get_billing_account_status(
+            organization_id, created_by=created_by
+        )
+        if not status or status.get("billing_mode") != "v2":
+            return None
+
+        minted = await mps_service_key_client.create_correlation_id(
+            service_key=service_key
+        )
+        return minted.get("correlation_id")
+    except Exception as e:
+        logger.warning(
+            "Could not resolve MPS correlation id for managed embeddings; "
+            "sending without v2 protocol: {}",
+            e,
+        )
+        return None
+
+
+async def build_embedding_service(
+    *,
+    db_client: DBClient,
+    provider: Optional[str],
+    api_key: Optional[str],
+    model: Optional[str],
+    base_url: Optional[str] = None,
+    endpoint: Optional[str] = None,
+    api_version: Optional[str] = None,
+    correlation_id: Optional[str] = None,
+    organization_id: Optional[int] = None,
+    created_by: Optional[str] = None,
+    resolve_correlation: bool = False,
+) -> BaseEmbeddingService:
+    """Construct the right embedding service for a provider/config.
+
+    Args:
+        correlation_id: A correlation id already available in context (e.g. the
+            running workflow's MPS correlation id). Used for the Dograh provider.
+        resolve_correlation: When True and no ``correlation_id`` is supplied, resolve
+            one for the Dograh provider via ``resolve_embedding_correlation_id``
+            (for calls made outside a workflow run: ingestion, manual search).
+    """
+    from api.services.configuration.registry import ServiceProviders
+
+    model_id = model or DEFAULT_EMBEDDING_MODEL
+
+    if provider == ServiceProviders.AZURE.value and endpoint:
+        return AzureOpenAIEmbeddingService(
+            db_client=db_client,
+            api_key=api_key,
+            endpoint=endpoint,
+            model_id=model_id,
+            api_version=api_version or DEFAULT_AZURE_API_VERSION,
+        )
+
+    if provider == ServiceProviders.DOGRAH.value:
+        cid = correlation_id
+        if cid is None and resolve_correlation:
+            cid = await resolve_embedding_correlation_id(
+                organization_id=organization_id,
+                service_key=api_key,
+                created_by=created_by,
+            )
+        return DograhEmbeddingService(
+            db_client=db_client,
+            api_key=api_key,
+            model_id=model_id,
+            base_url=base_url,
+            correlation_id=cid,
+        )
+
+    return OpenAIEmbeddingService(
+        db_client=db_client,
+        api_key=api_key,
+        model_id=model_id,
+        base_url=base_url,
+    )
--- a/api/services/gen_ai/embedding/openai_service.py
+++ b/api/services/gen_ai/embedding/openai_service.py
@ -11,6 +11,7 @@ from loguru import logger
 from openai import AsyncOpenAI

 from api.db.db_client import DBClient
+from api.utils.url_security import validate_user_configured_service_url

 from .base import BaseEmbeddingService

@ -37,6 +38,7 @@ class OpenAIEmbeddingService(BaseEmbeddingService):
        api_key: Optional[str] = None,
        model_id: str = DEFAULT_MODEL_ID,
        base_url: Optional[str] = None,
+        default_headers: Optional[Dict[str, str]] = None,
    ):
        """Initialize the OpenAI embedding service.

@ -54,7 +56,13 @@ class OpenAIEmbeddingService(BaseEmbeddingService):
        if self._api_key_configured:
            client_kwargs = {"api_key": api_key}
            if base_url:
+                validate_user_configured_service_url(
+                    base_url,
+                    field_name="base_url",
+                )
                client_kwargs["base_url"] = base_url
+            if default_headers:
+                client_kwargs["default_headers"] = default_headers
            self.client = AsyncOpenAI(**client_kwargs)
            logger.info(f"OpenAI embedding service initialized with model: {model_id}")
        else:
@ -77,6 +85,14 @@ class OpenAIEmbeddingService(BaseEmbeddingService):
        if not self._api_key_configured or self.client is None:
            raise EmbeddingAPIKeyNotConfiguredError()

+    def _request_kwargs(self) -> Dict[str, Any]:
+        """Extra kwargs merged into every embeddings.create() call.
+
+        Override hook for subclasses (e.g. DograhEmbeddingService injects the MPS
+        billing protocol here). The base service adds nothing.
+        """
+        return {}
+
    async def embed_texts(self, texts: List[str]) -> List[List[float]]:
        """Embed a batch of texts using OpenAI API.

@ -89,6 +105,7 @@ class OpenAIEmbeddingService(BaseEmbeddingService):
            response = await self.client.embeddings.create(
                input=texts,
                model=self.model_id,
+                **self._request_kwargs(),
            )
            return [item.embedding for item in response.data]
        except Exception as e:
--- a/api/services/integrations/tuner/node.py
+++ b/api/services/integrations/tuner/node.py
@ -40,10 +40,7 @@ from api.services.workflow.node_specs.model_spec import (
        )
    ],
    graph_constraints=GraphConstraints(
-        min_incoming=0,
-        max_incoming=0,
-        min_outgoing=0,
-        max_outgoing=0,
+        min_incoming=0, max_incoming=0, min_outgoing=0, max_outgoing=0, max_instances=1
    ),
    property_order=(
        "name",
--- a/api/services/managed_model_services.py
+++ b/api/services/managed_model_services.py
@ -0,0 +1,78 @@
+from __future__ import annotations
+
+from typing import Any
+
+from api.schemas.ai_model_configuration import EffectiveAIModelConfiguration
+from api.services.configuration.registry import ServiceProviders
+
+MPS_CORRELATION_ID_CONTEXT_KEY = "mps_correlation_id"
+
+
+def uses_managed_model_services_v2(
+    ai_model_config: EffectiveAIModelConfiguration | None,
+) -> bool:
+    if (
+        ai_model_config is None
+        or getattr(ai_model_config, "managed_service_version", None) != 2
+    ):
+        return False
+
+    return any(
+        _is_dograh_service(getattr(ai_model_config, section_name, None))
+        for section_name in ("llm", "tts", "stt", "embeddings")
+    )
+
+
+def get_mps_correlation_id(initial_context: dict[str, Any] | None) -> str | None:
+    if not initial_context:
+        return None
+    correlation_id = initial_context.get(MPS_CORRELATION_ID_CONTEXT_KEY)
+    if correlation_id is None:
+        return None
+    return str(correlation_id)
+
+
+async def ensure_mps_correlation_id(
+    *,
+    ai_model_config: EffectiveAIModelConfiguration,
+    workflow_run_id: int,
+    initial_context: dict[str, Any] | None,
+) -> str | None:
+    existing = get_mps_correlation_id(initial_context)
+    if existing:
+        return existing
+
+    if not uses_managed_model_services_v2(ai_model_config):
+        return None
+
+    raise ValueError(
+        "Managed model services v2 requires workflow run authorization before "
+        f"the run starts. Missing correlation id for workflow_run_id={workflow_run_id}."
+    )
+
+
+def _is_dograh_service(service: Any) -> bool:
+    provider = getattr(service, "provider", None)
+    return (
+        provider == ServiceProviders.DOGRAH or provider == ServiceProviders.DOGRAH.value
+    )
+
+
+def get_dograh_service_api_key(
+    ai_model_config: EffectiveAIModelConfiguration,
+) -> str | None:
+    for section_name in ("llm", "tts", "stt", "embeddings"):
+        service = getattr(ai_model_config, section_name, None)
+        if not _is_dograh_service(service):
+            continue
+
+        if hasattr(service, "get_all_api_keys"):
+            keys = service.get_all_api_keys()
+            if keys:
+                return keys[0]
+
+        api_key = getattr(service, "api_key", None)
+        if isinstance(api_key, str) and api_key:
+            return api_key
+
+    return None
--- a/api/services/mps_billing.py
+++ b/api/services/mps_billing.py
@ -0,0 +1,23 @@
+from typing import Optional
+
+from api.constants import DEPLOYMENT_MODE
+from api.services.mps_service_key_client import mps_service_key_client
+
+
+async def ensure_hosted_mps_billing_account_v2(
+    organization_id: int,
+    *,
+    created_by: Optional[str] = None,
+) -> Optional[dict]:
+    """Ensure hosted orgs have an MPS billing v2 account.
+
+    OSS deployments use legacy per-key quota accounting and do not create MPS
+    billing accounts.
+    """
+    if DEPLOYMENT_MODE == "oss":
+        return None
+
+    return await mps_service_key_client.ensure_billing_account_v2(
+        organization_id=organization_id,
+        created_by=created_by,
+    )
--- a/Show more
+++ b/Show more
 @ -1 +1 @@
 .13.7
 .13.7