Merge pull request #1476 from MODSetter/dev

feat(0.0.27): bug fixes and optimizations
2026-07-26 23:51:14 +02:00 · 2026-06-09 23:10:44 -07:00 · 2026-06-09 23:10:44 -07:00 · 4c29938528
commit 4c29938528
parent 61adc80615 2624392c4a
1169 changed files with 30332 additions and 38144 deletions
--- a/.github/workflows/docker-build.yml
+++ b/.github/workflows/docker-build.yml
@ -5,6 +5,9 @@ on:
    branches:
      - main
      - dev
+    tags:
+      - 'v*'
+      - 'beta-v*'
    paths:
      - 'surfsense_backend/**'
      - 'surfsense_web/**'
@ -24,11 +27,13 @@ permissions:
  packages: write

 jobs:
-  tag_release:
+  compute_version:
    runs-on: ubuntu-latest
-    if: github.ref == format('refs/heads/{0}', github.event.repository.default_branch) || github.event_name == 'workflow_dispatch'
+    if: github.ref == format('refs/heads/{0}', github.event.repository.default_branch) || github.event_name == 'workflow_dispatch' || startsWith(github.ref, 'refs/tags/v') || startsWith(github.ref, 'refs/tags/beta-v')
    outputs:
      new_tag: ${{ steps.tag_version.outputs.next_version }}
+      commit_sha: ${{ steps.tag_version.outputs.commit_sha }}
+      is_release_tag: ${{ steps.tag_version.outputs.is_release_tag }}
    steps:
      - name: Checkout code
        uses: actions/checkout@v6
@ -37,57 +42,65 @@ jobs:
          ref: ${{ github.event.inputs.branch }}
          token: ${{ secrets.GITHUB_TOKEN }}

+      # Compute-only: tag is pushed by finalize_release after everything succeeds.
      - name: Read app version and calculate next Docker build version
        id: tag_version
        run: |
-          APP_VERSION=$(tr -d '[:space:]' < VERSION)
-          echo "App version from VERSION file: $APP_VERSION"
+          if [[ "$GITHUB_REF" == refs/tags/beta-v* ]]; then
+            VERSION="${GITHUB_REF#refs/tags/beta-v}"
+            NEXT_VERSION="beta-${VERSION}"
+            IS_RELEASE_TAG="true"

-          if [ -z "$APP_VERSION" ]; then
-            echo "Error: Could not read version from VERSION file"
-            exit 1
-          fi
+            if ! echo "$VERSION" | grep -qE '^[0-9]+\.[0-9]+\.[0-9]+(-[a-zA-Z0-9.]+)?$'; then
+              echo "::error::Version '$VERSION' is not valid semver (expected X.Y.Z). Fix your tag name."
+              exit 1
+            fi

-          git fetch --tags
+            echo "Docker beta release version from git tag: $NEXT_VERSION"
+          elif [[ "$GITHUB_REF" == refs/tags/v* ]]; then
+            NEXT_VERSION="${GITHUB_REF#refs/tags/v}"
+            IS_RELEASE_TAG="true"

-          LATEST_BUILD_TAG=$(git tag --list "${APP_VERSION}.*" --sort='-v:refname' | head -n 1)
+            if ! echo "$NEXT_VERSION" | grep -qE '^[0-9]+\.[0-9]+\.[0-9]+(-[a-zA-Z0-9.]+)?$'; then
+              echo "::error::Version '$NEXT_VERSION' is not valid semver (expected X.Y.Z). Fix your tag name."
+              exit 1
+            fi

-          if [ -z "$LATEST_BUILD_TAG" ]; then
-            echo "No previous Docker build tag found for version ${APP_VERSION}. Starting with ${APP_VERSION}.1"
-            NEXT_VERSION="${APP_VERSION}.1"
+            echo "Docker release version from git tag: $NEXT_VERSION"
          else
-            echo "Latest Docker build tag found: $LATEST_BUILD_TAG"
-            BUILD_NUMBER=$(echo "$LATEST_BUILD_TAG" | rev | cut -d. -f1 | rev)
-            NEXT_BUILD=$((BUILD_NUMBER + 1))
-            NEXT_VERSION="${APP_VERSION}.${NEXT_BUILD}"
+            APP_VERSION=$(tr -d '[:space:]' < VERSION)
+            echo "App version from VERSION file: $APP_VERSION"
+
+            if [ -z "$APP_VERSION" ]; then
+              echo "Error: Could not read version from VERSION file"
+              exit 1
+            fi
+
+            git fetch --tags
+
+            LATEST_BUILD_TAG=$(git tag --list "${APP_VERSION}.*" --sort='-v:refname' | head -n 1)
+
+            if [ -z "$LATEST_BUILD_TAG" ]; then
+              echo "No previous Docker build tag found for version ${APP_VERSION}. Starting with ${APP_VERSION}.1"
+              NEXT_VERSION="${APP_VERSION}.1"
+            else
+              echo "Latest Docker build tag found: $LATEST_BUILD_TAG"
+              BUILD_NUMBER=$(echo "$LATEST_BUILD_TAG" | rev | cut -d. -f1 | rev)
+              NEXT_BUILD=$((BUILD_NUMBER + 1))
+              NEXT_VERSION="${APP_VERSION}.${NEXT_BUILD}"
+            fi
+
+            IS_RELEASE_TAG="false"
+            echo "Calculated next Docker version: $NEXT_VERSION"
          fi

-          echo "Calculated next Docker version: $NEXT_VERSION"
          echo "next_version=$NEXT_VERSION" >> $GITHUB_OUTPUT
-
-      - name: Create and Push Tag
-        run: |
-          git config --global user.name 'github-actions[bot]'
-          git config --global user.email 'github-actions[bot]@users.noreply.github.com'
-
-          NEXT_TAG="${{ steps.tag_version.outputs.next_version }}"
-          COMMIT_SHA=$(git rev-parse HEAD)
-          echo "Tagging commit $COMMIT_SHA with $NEXT_TAG"
-
-          git tag -a "$NEXT_TAG" -m "Docker build $NEXT_TAG"
-          echo "Pushing tag $NEXT_TAG to origin"
-          git push origin "$NEXT_TAG"
-
-      - name: Verify Tag Push
-        run: |
-          echo "Checking if tag ${{ steps.tag_version.outputs.next_version }} exists remotely..."
-          sleep 5
-          git ls-remote --tags origin | grep "refs/tags/${{ steps.tag_version.outputs.next_version }}" || (echo "Tag push verification failed!" && exit 1)
-          echo "Tag successfully pushed."
+          echo "commit_sha=$(git rev-parse HEAD)" >> $GITHUB_OUTPUT
+          echo "is_release_tag=$IS_RELEASE_TAG" >> $GITHUB_OUTPUT

  build:
-    needs: tag_release
-    if: always() && (needs.tag_release.result == 'success' || needs.tag_release.result == 'skipped')
+    needs: compute_version
+    if: always() && (needs.compute_version.result == 'success' || needs.compute_version.result == 'skipped')
    runs-on: ${{ matrix.os }}
    permissions:
      packages: write
@ -97,6 +110,12 @@ jobs:
      matrix:
        platform: [linux/amd64, linux/arm64]
        image: [backend, web]
+        variant: [cpu, cuda, cuda126]
+        exclude:
+          - image: web
+            variant: cuda
+          - image: web
+            variant: cuda126
        include:
          - platform: linux/amd64
            suffix: amd64
@ -114,6 +133,18 @@ jobs:
            context: ./surfsense_web
            file: ./surfsense_web/Dockerfile
            target: runner
+          - variant: cpu
+            tag_suffix: ""
+            use_cuda: "false"
+            cuda_extra: cpu
+          - variant: cuda
+            tag_suffix: "-cuda"
+            use_cuda: "true"
+            cuda_extra: cu128
+          - variant: cuda126
+            tag_suffix: "-cuda126"
+            use_cuda: "true"
+            cuda_extra: cu126
    env:
      REGISTRY_IMAGE: ghcr.io/${{ github.repository_owner }}/${{ matrix.name }}

@ -149,7 +180,7 @@ jobs:
          sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true
          docker system prune -af

-      - name: Build and push by digest ${{ matrix.name }} (${{ matrix.suffix }})
+      - name: Build and push by digest ${{ matrix.name }} (${{ matrix.variant }}, ${{ matrix.suffix }})
        id: build
        uses: docker/build-push-action@v7
        with:
@ -160,10 +191,14 @@ jobs:
          tags: ${{ steps.image.outputs.name }}
          outputs: type=image,push-by-digest=true,name-canonical=true,push=true
          platforms: ${{ matrix.platform }}
-          cache-from: type=gha,scope=${{ matrix.image }}-${{ matrix.suffix }}
-          cache-to: type=gha,mode=max,scope=${{ matrix.image }}-${{ matrix.suffix }}
+          cache-from: type=registry,ref=${{ steps.image.outputs.name }}:buildcache-${{ matrix.variant }}-${{ matrix.suffix }}
+          cache-to: type=registry,ref=${{ steps.image.outputs.name }}:buildcache-${{ matrix.variant }}-${{ matrix.suffix }},mode=max,image-manifest=true,oci-mediatypes=true
+          secrets: |
+            HF_TOKEN=${{ secrets.HF_TOKEN }}
          provenance: false
          build-args: |
+            ${{ matrix.image == 'backend' && format('USE_CUDA={0}', matrix.use_cuda) || '' }}
+            ${{ matrix.image == 'backend' && format('CUDA_EXTRA={0}', matrix.cuda_extra) || '' }}
            ${{ matrix.image == 'web' && 'NEXT_PUBLIC_FASTAPI_BACKEND_URL=__NEXT_PUBLIC_FASTAPI_BACKEND_URL__' || '' }}
            ${{ matrix.image == 'web' && 'NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE=__NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE__' || '' }}
            ${{ matrix.image == 'web' && 'NEXT_PUBLIC_ETL_SERVICE=__NEXT_PUBLIC_ETL_SERVICE__' || '' }}
@ -179,15 +214,47 @@ jobs:
      - name: Upload digest
        uses: actions/upload-artifact@v7
        with:
-          name: digests-${{ matrix.image }}-${{ matrix.suffix }}
+          name: digests-${{ matrix.image }}-${{ matrix.variant }}-${{ matrix.suffix }}
          path: /tmp/digests/*
          if-no-files-found: error
          retention-days: 1

+  # Release gate: require both arches for every variant, else block publishing.
+  # Release-only; skipped on dev so the tolerant create_manifest path is kept.
+  verify_digests:
+    runs-on: ubuntu-latest
+    needs: [compute_version, build]
+    if: ${{ always() && needs.compute_version.result == 'success' && needs.compute_version.outputs.new_tag != '' }}
+    steps:
+      - name: Download all digests
+        uses: actions/download-artifact@v8
+        with:
+          pattern: digests-*
+          path: /tmp/digests
+          merge-multiple: false
+
+      - name: Require both arches for every required variant
+        run: |
+          fail=0
+          check() {
+            c=$(find /tmp/digests -type f -path "*/digests-$1-*/*" 2>/dev/null | wc -l | tr -d ' ')
+            if [ "$c" -lt 2 ]; then
+              echo "::error::$1 has $c/2 arch digests — blocking release"
+              fail=1
+            else
+              echo "OK: $1 ($c/2)"
+            fi
+          }
+          check backend-cpu
+          check backend-cuda
+          check backend-cuda126
+          check web-cpu
+          [ "$fail" -eq 0 ] || exit 1
+
  create_manifest:
    runs-on: ubuntu-latest
-    needs: [tag_release, build]
-    if: always() && needs.build.result == 'success'
+    needs: [compute_version, build, verify_digests]
+    if: ${{ !cancelled() && needs.verify_digests.result != 'failure' }}
    permissions:
      packages: write
      contents: read
@ -197,8 +264,20 @@ jobs:
        include:
          - name: surfsense-backend
            image: backend
+            variant: cpu
+            tag_suffix: ""
+          - name: surfsense-backend
+            image: backend
+            variant: cuda
+            tag_suffix: "-cuda"
+          - name: surfsense-backend
+            image: backend
+            variant: cuda126
+            tag_suffix: "-cuda126"
          - name: surfsense-web
            image: web
+            variant: cpu
+            tag_suffix: ""
    env:
      REGISTRY_IMAGE: ghcr.io/${{ github.repository_owner }}/${{ matrix.name }}

@ -207,22 +286,33 @@ jobs:
        id: image
        run: echo "name=${REGISTRY_IMAGE,,}" >> $GITHUB_OUTPUT

-      - name: Download amd64 digest
+      - name: Download digests
+        id: download
        uses: actions/download-artifact@v8
        with:
-          name: digests-${{ matrix.image }}-amd64
+          pattern: digests-${{ matrix.image }}-${{ matrix.variant }}-*
          path: /tmp/digests
+          merge-multiple: true
+        continue-on-error: true

-      - name: Download arm64 digest
-        uses: actions/download-artifact@v8
-        with:
-          name: digests-${{ matrix.image }}-arm64
-          path: /tmp/digests
+      - name: Check digests
+        id: check
+        run: |
+          count=$(find /tmp/digests -type f 2>/dev/null | wc -l | tr -d ' ')
+          echo "digest_count=$count" >> $GITHUB_OUTPUT
+          if [ "$count" -lt 2 ]; then
+            echo "::warning::${{ matrix.variant }}: $count/2 digests, skipping merge"
+            echo "skip=true" >> $GITHUB_OUTPUT
+          else
+            echo "skip=false" >> $GITHUB_OUTPUT
+          fi

      - name: Set up Docker Buildx
+        if: steps.check.outputs.skip != 'true'
        uses: docker/setup-buildx-action@v4

      - name: Login to GitHub Container Registry
+        if: steps.check.outputs.skip != 'true'
        uses: docker/login-action@v4
        with:
          registry: ghcr.io
@ -230,9 +320,10 @@ jobs:
          password: ${{ secrets.GITHUB_TOKEN }}

      - name: Compute app version
+        if: steps.check.outputs.skip != 'true'
        id: appver
        run: |
-          VERSION_TAG="${{ needs.tag_release.outputs.new_tag }}"
+          VERSION_TAG="${{ needs.compute_version.outputs.new_tag }}"
          if [ -n "$VERSION_TAG" ]; then
            APP_VERSION=$(echo "$VERSION_TAG" | rev | cut -d. -f2- | rev)
          else
@ -241,29 +332,69 @@ jobs:
          echo "app_version=$APP_VERSION" >> $GITHUB_OUTPUT

      - name: Docker meta
+        if: steps.check.outputs.skip != 'true'
        id: meta
        uses: docker/metadata-action@v6
        with:
          images: ${{ steps.image.outputs.name }}
          tags: |
-            type=raw,value=${{ needs.tag_release.outputs.new_tag }},enable=${{ needs.tag_release.outputs.new_tag != '' }}
-            type=raw,value=${{ steps.appver.outputs.app_version }},enable=${{ needs.tag_release.outputs.new_tag != '' && (github.ref == format('refs/heads/{0}', github.event.repository.default_branch) || github.event.inputs.branch == github.event.repository.default_branch) }}
+            type=raw,value=${{ needs.compute_version.outputs.new_tag }},enable=${{ needs.compute_version.outputs.new_tag != '' }}
+            type=raw,value=${{ steps.appver.outputs.app_version }},enable=${{ needs.compute_version.outputs.new_tag != '' && needs.compute_version.outputs.is_release_tag != 'true' && (github.ref == format('refs/heads/{0}', github.event.repository.default_branch) || github.event.inputs.branch == github.event.repository.default_branch) }}
            type=ref,event=branch
            type=sha,prefix=git-
          flavor: |
-            latest=${{ github.ref == format('refs/heads/{0}', github.event.repository.default_branch) || github.event.inputs.branch == github.event.repository.default_branch }}
+            latest=${{ github.ref == format('refs/heads/{0}', github.event.repository.default_branch) || github.event.inputs.branch == github.event.repository.default_branch || startsWith(github.ref, 'refs/tags/v') }}
+            ${{ matrix.tag_suffix != '' && format('suffix={0},onlatest=true', matrix.tag_suffix) || '' }}

      - name: Create manifest list and push
+        if: steps.check.outputs.skip != 'true'
        working-directory: /tmp/digests
        run: |
          docker buildx imagetools create \
            $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \
            $(printf '${{ steps.image.outputs.name }}@sha256:%s ' *)
      - name: Inspect image
+        if: steps.check.outputs.skip != 'true'
        run: |
          docker buildx imagetools inspect ${{ steps.image.outputs.name }}:${{ steps.meta.outputs.version }}

      - name: Summary
+        if: steps.check.outputs.skip != 'true'
        run: | 
          echo "Multi-arch manifest created for ${{ matrix.name }}!"
          echo "Tags: $(jq -cr '.tags | join(", ")' <<< "$DOCKER_METADATA_OUTPUT_JSON")"
+
+  # Push the git tag only after build, gate, and manifest publish all succeed.
+  finalize_release:
+    runs-on: ubuntu-latest
+    needs: [compute_version, create_manifest]
+    if: ${{ success() && needs.compute_version.outputs.new_tag != '' && needs.compute_version.outputs.is_release_tag != 'true' }}
+    permissions:
+      contents: write
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v6
+        with:
+          fetch-depth: 0
+          ref: ${{ github.event.inputs.branch }}
+          token: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Create and push git tag
+        run: |
+          git config --global user.name 'github-actions[bot]'
+          git config --global user.email 'github-actions[bot]@users.noreply.github.com'
+
+          NEXT_TAG="${{ needs.compute_version.outputs.new_tag }}"
+          COMMIT_SHA="${{ needs.compute_version.outputs.commit_sha }}"
+          echo "Tagging commit $COMMIT_SHA with $NEXT_TAG"
+
+          git tag -a "$NEXT_TAG" "$COMMIT_SHA" -m "Docker build $NEXT_TAG"
+          echo "Pushing tag $NEXT_TAG to origin"
+          git push origin "$NEXT_TAG"
+
+      - name: Verify tag push
+        run: |
+          echo "Checking if tag ${{ needs.compute_version.outputs.new_tag }} exists remotely..."
+          sleep 5
+          git ls-remote --tags origin | grep "refs/tags/${{ needs.compute_version.outputs.new_tag }}" || (echo "Tag push verification failed!" && exit 1)
+          echo "Tag successfully pushed."
--- a/2
+++ b/2
@ -1 +1 @@
-0.0.26
+0.0.27
--- a/docker/.env.example
+++ b/docker/.env.example
@ -7,6 +7,16 @@
 # SurfSense version (use "latest" or a specific version like "0.0.14")
 SURFSENSE_VERSION=latest

+# Image variant: empty = CPU (default), "cuda" = CUDA 12.8, "cuda126" = CUDA 12.6.
+# GPU acceleration also requires the NVIDIA Container Toolkit on the host and
+# the GPU overlay in COMPOSE_FILE. Linux/macOS use ":"; Windows uses ";".
+# Example Linux/macOS: COMPOSE_FILE=docker-compose.yml:docker-compose.gpu.yml
+# Example Windows:     COMPOSE_FILE=docker-compose.yml;docker-compose.gpu.yml
+# Use "cuda126" for older NVIDIA driver stacks; use "cuda" for newer drivers.
+SURFSENSE_VARIANT=
+# COMPOSE_FILE=docker-compose.yml:docker-compose.gpu.yml
+# SURFSENSE_GPU_COUNT=1
+
 # Deployment environment: dev or production
 SURFSENSE_ENV=production

@ -55,6 +65,9 @@ EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2
 # -- Redis exposed port (dev only; Redis is internal-only in prod) --
 # REDIS_PORT=6379

+# -- WhatsApp bridge exposed port (dev/hybrid only; prod keeps it Docker-internal) --
+# WHATSAPP_BRIDGE_PORT=9929
+
 # -- Frontend Build Args --
 # In dev, the frontend is built from source and these are passed as build args.
 # In prod, they are automatically derived from AUTH_TYPE, ETL_SERVICE, and the port settings above.
@ -67,7 +80,7 @@ EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2
 # ------------------------------------------------------------------------------
 # ONLY set these if you are serving SurfSense on a real domain via a reverse
 # proxy (e.g. Caddy, Nginx, Cloudflare Tunnel).
-# For standard localhost deployments, leave all of these commented out —
+# For standard localhost deployments, leave all of these commented out.
 # they are automatically derived from the port settings above.
 #
 # NEXT_FRONTEND_URL=https://app.yourdomain.com
@ -89,7 +102,11 @@ EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2
 # Only change this if you manage publications manually.
 # ZERO_APP_PUBLICATIONS=zero_publication

-# Sync worker tuning — zero-cache defaults ZERO_NUM_SYNC_WORKERS to the number
+# Keep Zero's documented halt safety net enabled. If replication halts, Zero
+# can wipe and re-sync its local SQLite replica without touching Postgres.
+# ZERO_AUTO_RESET=true
+
+# Sync worker tuning. zero-cache defaults ZERO_NUM_SYNC_WORKERS to the number
 # of CPU cores, which can exceed the connection pool limits on high-core machines.
 # Each sync worker needs at least 1 connection from both the UPSTREAM and CVR
 # pools, so these constraints must hold:
@ -134,7 +151,7 @@ EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2
 # SSL mode for database connections: disable, require, verify-ca, verify-full
 # DB_SSLMODE=disable

-# Full DATABASE_URL override — when set, takes precedence over the individual
+# Full DATABASE_URL override. When set, this takes precedence over the individual
 # DB_USER / DB_PASSWORD / DB_NAME / DB_HOST / DB_PORT settings above.
 # Use this for managed databases (AWS RDS, GCP Cloud SQL, Supabase, etc.)
 # DATABASE_URL=postgresql+asyncpg://user:password@your-rds-host:5432/surfsense?sslmode=require
@ -149,7 +166,7 @@ EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2
 # REDIS_URL=redis://redis:6379/0

 # ------------------------------------------------------------------------------
-# Stripe (pay-as-you-go page packs — disabled by default)
+# Stripe (pay-as-you-go page packs, disabled by default)
 # ------------------------------------------------------------------------------

 # Set TRUE to allow users to buy additional page packs via Stripe Checkout
@ -168,7 +185,6 @@ STRIPE_PAGE_BUYING_ENABLED=FALSE
 # STRIPE_TOKEN_BUYING_ENABLED=FALSE
 # STRIPE_PREMIUM_TOKEN_PRICE_ID=price_...
 # STRIPE_CREDIT_MICROS_PER_UNIT=1000000
-# DEPRECATED — STRIPE_TOKENS_PER_UNIT=1000000

 # ------------------------------------------------------------------------------
 # TTS & STT (Text-to-Speech / Speech-to-Text)
@ -263,7 +279,44 @@ STT_SERVICE=local/base
 # COMPOSIO_REDIRECT_URI=http://localhost:8000/api/v1/auth/composio/connector/callback

 # ------------------------------------------------------------------------------
-# SearXNG (bundled web search — works out of the box, no config needed)
+# Messaging Channels (optional)
+# ------------------------------------------------------------------------------
+# Configure only the external chat channels you want to use.
+
+# -- Telegram --
+# TELEGRAM_SHARED_BOT_TOKEN=
+# TELEGRAM_SHARED_BOT_USERNAME=
+# TELEGRAM_WEBHOOK_SECRET=
+# GATEWAY_BASE_URL=http://localhost:8929
+# GATEWAY_TELEGRAM_INTAKE_MODE=webhook
+
+# -- WhatsApp --
+# GATEWAY_WHATSAPP_INTAKE_MODE=disabled
+# WHATSAPP_SHARED_BUSINESS_TOKEN=
+# WHATSAPP_SHARED_PHONE_NUMBER_ID=
+# WHATSAPP_SHARED_DISPLAY_PHONE_NUMBER=
+# WHATSAPP_SHARED_WABA_ID=
+# WHATSAPP_GRAPH_API_VERSION=v25.0
+# WHATSAPP_WEBHOOK_VERIFY_TOKEN=
+# WHATSAPP_WEBHOOK_APP_SECRET=
+# WHATSAPP_BRIDGE_URL=http://whatsapp-bridge:9929
+
+# -- Slack --
+# Uses SLACK_CLIENT_ID and SLACK_CLIENT_SECRET from the Slack connector section.
+#
+# GATEWAY_SLACK_ENABLED=FALSE
+# GATEWAY_SLACK_SIGNING_SECRET=
+# GATEWAY_SLACK_REDIRECT_URI=http://localhost:8929/api/v1/gateway/slack/callback
+
+# -- Discord --
+# Uses DISCORD_CLIENT_ID, DISCORD_CLIENT_SECRET, and DISCORD_BOT_TOKEN from the
+# Discord connector section.
+#
+# GATEWAY_DISCORD_ENABLED=FALSE
+# GATEWAY_DISCORD_REDIRECT_URI=http://localhost:8929/api/v1/gateway/discord/callback
+
+# ------------------------------------------------------------------------------
+# SearXNG (bundled web search, works out of the box with no config needed)
 # ------------------------------------------------------------------------------
 # SearXNG provides web search to all search spaces automatically.
 # To access the SearXNG UI directly: http://localhost:8888
@ -273,7 +326,7 @@ STT_SERVICE=local/base
 # SEARXNG_SECRET=surfsense-searxng-secret

 # ------------------------------------------------------------------------------
-# Daytona Sandbox (optional — cloud code execution for the deep agent)
+# Daytona Sandbox (optional cloud code execution for the deep agent)
 # ------------------------------------------------------------------------------
 # Set DAYTONA_SANDBOX_ENABLED=TRUE and provide credentials to give the agent
 # an isolated code execution environment via the Daytona cloud API.
@ -286,9 +339,6 @@ STT_SERVICE=local/base
 # External API Keys (optional)
 # ------------------------------------------------------------------------------

-# Firecrawl (web scraping)
-# FIRECRAWL_API_KEY=
-
 # Unstructured (if ETL_SERVICE=UNSTRUCTURED)
 # UNSTRUCTURED_API_KEY=

@ -364,7 +414,6 @@ SURFSENSE_ENABLE_DOOM_LOOP=true
 # Premium turns are debited at the actual per-call provider cost reported
 # by LiteLLM. Only applies to models with billing_tier=premium.
 # PREMIUM_CREDIT_MICROS_LIMIT=5000000
-# DEPRECATED — PREMIUM_TOKEN_LIMIT=5000000

 # Safety ceiling on per-call premium reservation, in micro-USD ($1.00 default).
 # QUOTA_MAX_RESERVE_MICROS=1000000
@ -376,10 +425,10 @@ SURFSENSE_ENABLE_DOOM_LOOP=true
 # QUOTA_DEFAULT_PODCAST_RESERVE_MICROS=200000

 # Per-video-presentation reservation for the video Celery task ($1.00 default).
-# Override path bypasses QUOTA_MAX_RESERVE_MICROS clamp — raise with care.
+# Override path bypasses QUOTA_MAX_RESERVE_MICROS clamp. Raise with care.
 # QUOTA_DEFAULT_VIDEO_PRESENTATION_RESERVE_MICROS=1000000

-# No-login (anonymous) mode — public users can chat without an account
+# No-login (anonymous) mode. Public users can chat without an account
 # Set TRUE to enable /free pages and anonymous chat API
 NOLOGIN_MODE_ENABLED=FALSE
 # ANON_TOKEN_LIMIT=1000000
--- a/docker/docker-compose.deps-only.yml
+++ b/docker/docker-compose.deps-only.yml
@ -114,6 +114,7 @@ services:
      - ZERO_REPLICA_FILE=/data/zero.db
      - ZERO_ADMIN_PASSWORD=${ZERO_ADMIN_PASSWORD:-surfsense-zero-admin}
      - ZERO_APP_PUBLICATIONS=${ZERO_APP_PUBLICATIONS:-zero_publication}
+      - ZERO_AUTO_RESET=${ZERO_AUTO_RESET:-true}
      - ZERO_NUM_SYNC_WORKERS=${ZERO_NUM_SYNC_WORKERS:-4}
      - ZERO_UPSTREAM_MAX_CONNS=${ZERO_UPSTREAM_MAX_CONNS:-20}
      - ZERO_CVR_MAX_CONNS=${ZERO_CVR_MAX_CONNS:-30}
@ -122,11 +123,30 @@ services:
    volumes:
      - zero_cache_data:/data
    restart: unless-stopped
+    stop_grace_period: 300s
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:4848/keepalive"]
      interval: 10s
      timeout: 5s
      retries: 5
+      start_period: 600s
+
+  # OPTIONAL — Azurite emulates Azure Blob Storage for testing the Azure
+  # original-file backend. The default filesystem backend needs none of this.
+  # To exercise it, set in surfsense_backend/.env:
+  #   FILE_STORAGE_BACKEND=azure
+  #   AZURE_STORAGE_CONTAINER=surfsense-documents
+  #   AZURE_STORAGE_CONNECTION_STRING=DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://localhost:${AZURITE_BLOB_PORT:-10000}/devstoreaccount1;
+  # The backend creates blobs on upload; create the container once first
+  # (Azure CLI / Storage Explorer), then upload a document.
+  azurite:
+    image: mcr.microsoft.com/azure-storage/azurite:3.33.0
+    command: azurite-blob --blobHost 0.0.0.0 --blobPort 10000
+    ports:
+      - "${AZURITE_BLOB_PORT:-10000}:10000"
+    volumes:
+      - azurite_data:/data
+    restart: unless-stopped

 volumes:
  postgres_data:
@ -137,3 +157,5 @@ volumes:
    name: surfsense-deps-redis
  zero_cache_data:
    name: surfsense-deps-zero-cache
+  azurite_data:
+    name: surfsense-deps-azurite
--- a/docker/docker-compose.dev.yml
+++ b/docker/docker-compose.dev.yml
@ -46,8 +46,6 @@ services:
      - PYTHONPATH=/app
      - SERVICE_ROLE=migrate
      - MIGRATION_TIMEOUT=${MIGRATION_TIMEOUT:-900}
-    volumes:
-      - zero_init:/zero-init
    depends_on:
      db:
        condition: service_healthy
@ -126,6 +124,7 @@ services:
      - AUTH_TYPE=${AUTH_TYPE:-LOCAL}
      - NEXT_FRONTEND_URL=${NEXT_FRONTEND_URL:-http://localhost:3000}
      - SEARXNG_DEFAULT_HOST=${SEARXNG_DEFAULT_HOST:-http://searxng:8080}
+      - WHATSAPP_BRIDGE_URL=${WHATSAPP_BRIDGE_URL:-http://whatsapp-bridge:9929}
      # Daytona Sandbox – uncomment and set credentials to enable cloud code execution
      # - DAYTONA_SANDBOX_ENABLED=TRUE
      # - DAYTONA_API_KEY=${DAYTONA_API_KEY:-}
@ -148,6 +147,25 @@ services:
      retries: 30
      start_period: 200s

+  whatsapp-bridge:
+    build: ../surfsense_backend/scripts/whatsapp-bridge
+    profiles:
+      - whatsapp
+    ports:
+      - "127.0.0.1:${WHATSAPP_BRIDGE_PORT:-9929}:9929"
+    volumes:
+      - whatsapp_sessions:/data/sessions
+    environment:
+      - PORT=9929
+      - WHATSAPP_MODE=${WHATSAPP_MODE:-self-chat}
+      - WHATSAPP_SESSION_DIR=/data/sessions
+    restart: unless-stopped
+    healthcheck:
+      test: ["CMD", "wget", "-qO-", "http://localhost:9929/health"]
+      interval: 30s
+      timeout: 5s
+      retries: 5
+
  celery_worker:
    build: *backend-build
    volumes:
@ -197,21 +215,6 @@ services:
      celery_worker:
        condition: service_started

-  # flower:
-  #   build: *backend-build
-  #   ports:
-  #     - "${FLOWER_PORT:-5555}:5555"
-  #   env_file:
-  #     - ../surfsense_backend/.env
-  #   environment:
-  #     - CELERY_BROKER_URL=${REDIS_URL:-redis://redis:6379/0}
-  #     - CELERY_RESULT_BACKEND=${REDIS_URL:-redis://redis:6379/0}
-  #     - PYTHONPATH=/app
-  #   command: celery -A app.celery_app flower --port=5555
-  #   depends_on:
-  #     - redis
-  #     - celery_worker
-
  zero-cache:
    image: rocicorp/zero:1.4.0
    ports:
@ -230,6 +233,7 @@ services:
      - ZERO_REPLICA_FILE=/data/zero.db
      - ZERO_ADMIN_PASSWORD=${ZERO_ADMIN_PASSWORD:-surfsense-zero-admin}
      - ZERO_APP_PUBLICATIONS=${ZERO_APP_PUBLICATIONS:-zero_publication}
+      - ZERO_AUTO_RESET=${ZERO_AUTO_RESET:-true}
      - ZERO_NUM_SYNC_WORKERS=${ZERO_NUM_SYNC_WORKERS:-4}
      - ZERO_UPSTREAM_MAX_CONNS=${ZERO_UPSTREAM_MAX_CONNS:-20}
      - ZERO_CVR_MAX_CONNS=${ZERO_CVR_MAX_CONNS:-30}
@ -237,18 +241,14 @@ services:
      - ZERO_MUTATE_URL=${ZERO_MUTATE_URL:-http://frontend:3000/api/zero/mutate}
    volumes:
      - zero_cache_data:/data
-      - zero_init:/zero-init
-    # Wrapper: see docker/docker-compose.yml `zero-cache` for rationale.
-    entrypoint: ["sh", "-c"]
-    # Pass the script as a single list element so Compose does not tokenize it.
-    command:
-      - 'if [ -f /zero-init/needs_reset ]; then echo "[zero-init] publication change detected; wiping replica file(s) under /data" && rm -f /data/zero.db /data/zero.db-shm /data/zero.db-wal && rm -f /zero-init/needs_reset; fi; exec zero-cache'
    restart: unless-stopped
+    stop_grace_period: 300s
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:4848/keepalive"]
      interval: 10s
      timeout: 5s
      retries: 5
+      start_period: 600s

  frontend:
    build:
@ -280,5 +280,5 @@ volumes:
    name: surfsense-dev-shared-temp
  zero_cache_data:
    name: surfsense-dev-zero-cache
-  zero_init:
-    name: surfsense-dev-zero-init
+  whatsapp_sessions:
+    name: surfsense-dev-whatsapp-sessions
--- a/docker/docker-compose.gpu.yml
+++ b/docker/docker-compose.gpu.yml
@ -0,0 +1,30 @@
+services:
+  backend:
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: ${SURFSENSE_GPU_DRIVER:-nvidia}
+              count: ${SURFSENSE_GPU_COUNT:-1}
+              capabilities:
+                - gpu
+
+  celery_worker:
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: ${SURFSENSE_GPU_DRIVER:-nvidia}
+              count: ${SURFSENSE_GPU_COUNT:-1}
+              capabilities:
+                - gpu
+
+  celery_beat:
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: ${SURFSENSE_GPU_DRIVER:-nvidia}
+              count: ${SURFSENSE_GPU_COUNT:-1}
+              capabilities:
+                - gpu
--- a/docker/docker-compose.yml
+++ b/docker/docker-compose.yml
@ -29,12 +29,11 @@ services:

  # Short-lived schema runner. Executes `alembic upgrade head` and verifies
  # that the `zero_publication` Postgres logical-replication publication
-  # exists, then exits 0. Downstream services (backend, celery_*, zero-cache)
-  # gate on this with `condition: service_completed_successfully` so a failed
-  # migration halts the whole stack instead of silently producing a half-built
-  # system that crash-loops zero-cache on missing publications.
+  # matches the canonical shape, then exits 0. Downstream services gate on this
+  # with `condition: service_completed_successfully` so a failed migration halts
+  # the whole stack instead of booting zero-cache against a drifted publication.
  migrations:
-    image: ghcr.io/modsetter/surfsense-backend:${SURFSENSE_VERSION:-latest}
+    image: ghcr.io/modsetter/surfsense-backend:${SURFSENSE_VERSION:-latest}${SURFSENSE_VARIANT:+-${SURFSENSE_VARIANT}}
    env_file:
      - .env
    environment:
@ -42,8 +41,6 @@ services:
      PYTHONPATH: /app
      SERVICE_ROLE: migrate
      MIGRATION_TIMEOUT: ${MIGRATION_TIMEOUT:-900}
-    volumes:
-      - zero_init:/zero-init
    depends_on:
      db:
        condition: service_healthy
@ -61,28 +58,28 @@ services:
      timeout: 5s
      retries: 5

-  otel-collector:
-    image: otel/opentelemetry-collector-contrib:0.152.1
-    profiles:
-      - observability
-    command: ["--config=/etc/otelcol/config.yaml"]
-    volumes:
-      - ./otel-collector/config.yaml:/etc/otelcol/config.yaml:ro
-    environment:
-      GRAFANA_CLOUD_OTLP_ENDPOINT: ${GRAFANA_CLOUD_OTLP_ENDPOINT:-}
-      GRAFANA_CLOUD_INSTANCE_ID: ${GRAFANA_CLOUD_INSTANCE_ID:-}
-      GRAFANA_CLOUD_API_KEY: ${GRAFANA_CLOUD_API_KEY:-}
-    ports:
-      - "${OTEL_GRPC_PORT:-4317}:4317"
-      - "${OTEL_HTTP_PORT:-4318}:4318"
-      - "${OTEL_HEALTH_PORT:-13133}:13133"
-    mem_limit: 2g
-    restart: unless-stopped
-    healthcheck:
-      test: ["CMD", "/otelcol-contrib", "--version"]
-      interval: 30s
-      timeout: 5s
-      retries: 3
+  # otel-collector:
+  #   image: otel/opentelemetry-collector-contrib:0.152.1
+  #   profiles:
+  #     - observability
+  #   command: ["--config=/etc/otelcol/config.yaml"]
+  #   volumes:
+  #     - ./otel-collector/config.yaml:/etc/otelcol/config.yaml:ro
+  #   environment:
+  #     GRAFANA_CLOUD_OTLP_ENDPOINT: ${GRAFANA_CLOUD_OTLP_ENDPOINT:-}
+  #     GRAFANA_CLOUD_INSTANCE_ID: ${GRAFANA_CLOUD_INSTANCE_ID:-}
+  #     GRAFANA_CLOUD_API_KEY: ${GRAFANA_CLOUD_API_KEY:-}
+  #   ports:
+  #     - "${OTEL_GRPC_PORT:-4317}:4317"
+  #     - "${OTEL_HTTP_PORT:-4318}:4318"
+  #     - "${OTEL_HEALTH_PORT:-13133}:13133"
+  #   mem_limit: 2g
+  #   restart: unless-stopped
+  #   healthcheck:
+  #     test: ["CMD", "/otelcol-contrib", "--version"]
+  #     interval: 30s
+  #     timeout: 5s
+  #     retries: 3

  searxng:
    image: searxng/searxng:2026.3.13-3c1f68c59
@ -98,7 +95,7 @@ services:
      retries: 5

  backend:
-    image: ghcr.io/modsetter/surfsense-backend:${SURFSENSE_VERSION:-latest}
+    image: ghcr.io/modsetter/surfsense-backend:${SURFSENSE_VERSION:-latest}${SURFSENSE_VARIANT:+-${SURFSENSE_VARIANT}}
    ports:
      - "${BACKEND_PORT:-8929}:8000"
    volumes:
@ -118,6 +115,7 @@ services:
      UNSTRUCTURED_HAS_PATCHED_LOOP: "1"
      NEXT_FRONTEND_URL: ${NEXT_FRONTEND_URL:-http://localhost:${FRONTEND_PORT:-3929}}
      SEARXNG_DEFAULT_HOST: ${SEARXNG_DEFAULT_HOST:-http://searxng:8080}
+      WHATSAPP_BRIDGE_URL: ${WHATSAPP_BRIDGE_URL:-http://whatsapp-bridge:9929}
      # Daytona Sandbox – uncomment and set credentials to enable cloud code execution
      # DAYTONA_SANDBOX_ENABLED: "TRUE"
      # DAYTONA_API_KEY: ${DAYTONA_API_KEY:-}
@ -143,8 +141,28 @@ services:
      retries: 30
      start_period: 200s

+  # whatsapp-bridge:
+  #   build: ../surfsense_backend/scripts/whatsapp-bridge
+  #   profiles:
+  #     - whatsapp
+  #   expose:
+  #     - "9929"
+  #   volumes:
+  #     - whatsapp_sessions:/data/sessions
+  #   environment:
+  #     PORT: 9929
+  #     WHATSAPP_MODE: ${WHATSAPP_MODE:-self-chat}
+  #     WHATSAPP_SESSION_DIR: /data/sessions
+  #   mem_limit: 512m
+  #   restart: unless-stopped
+  #   healthcheck:
+  #     test: ["CMD", "wget", "-qO-", "http://localhost:9929/health"]
+  #     interval: 30s
+  #     timeout: 5s
+  #     retries: 5
+
  celery_worker:
-    image: ghcr.io/modsetter/surfsense-backend:${SURFSENSE_VERSION:-latest}
+    image: ghcr.io/modsetter/surfsense-backend:${SURFSENSE_VERSION:-latest}${SURFSENSE_VARIANT:+-${SURFSENSE_VARIANT}}
    volumes:
      - shared_temp:/shared_tmp
    env_file:
@ -174,7 +192,7 @@ services:
    restart: unless-stopped

  celery_beat:
-    image: ghcr.io/modsetter/surfsense-backend:${SURFSENSE_VERSION:-latest}
+    image: ghcr.io/modsetter/surfsense-backend:${SURFSENSE_VERSION:-latest}${SURFSENSE_VARIANT:+-${SURFSENSE_VARIANT}}
    env_file:
      - .env
    environment:
@ -197,22 +215,6 @@ services:
      - "com.centurylinklabs.watchtower.enable=true"
    restart: unless-stopped

-  # flower:
-  #   image: ghcr.io/modsetter/surfsense-backend:${SURFSENSE_VERSION:-latest}
-  #   ports:
-  #     - "${FLOWER_PORT:-5555}:5555"
-  #   env_file:
-  #     - .env
-  #   environment:
-  #     CELERY_BROKER_URL: ${REDIS_URL:-redis://redis:6379/0}
-  #     CELERY_RESULT_BACKEND: ${REDIS_URL:-redis://redis:6379/0}
-  #     PYTHONPATH: /app
-  #   command: celery -A app.celery_app flower --port=5555
-  #   depends_on:
-  #     - redis
-  #     - celery_worker
-  #   restart: unless-stopped
-
  zero-cache:
    image: rocicorp/zero:1.4.0
    ports:
@ -226,6 +228,7 @@ services:
      ZERO_REPLICA_FILE: /data/zero.db
      ZERO_ADMIN_PASSWORD: ${ZERO_ADMIN_PASSWORD:-surfsense-zero-admin}
      ZERO_APP_PUBLICATIONS: ${ZERO_APP_PUBLICATIONS:-zero_publication}
+      ZERO_AUTO_RESET: ${ZERO_AUTO_RESET:-true}
      ZERO_NUM_SYNC_WORKERS: ${ZERO_NUM_SYNC_WORKERS:-4}
      ZERO_UPSTREAM_MAX_CONNS: ${ZERO_UPSTREAM_MAX_CONNS:-20}
      ZERO_CVR_MAX_CONNS: ${ZERO_CVR_MAX_CONNS:-30}
@ -233,16 +236,8 @@ services:
      ZERO_MUTATE_URL: ${ZERO_MUTATE_URL:-http://frontend:3000/api/zero/mutate}
    volumes:
      - zero_cache_data:/data
-      - zero_init:/zero-init
-    # Wrapper: if the migrations service flagged a publication change via
-    # /zero-init/needs_reset, wipe the SQLite replica before starting so
-    # zero-cache does a clean initial sync. Recovers from the half-built
-    # replica state (`_zero.tableMetadata` missing) caused by earlier crashes.
-    entrypoint: ["sh", "-c"]
-    # Pass the script as a single list element so Compose does not tokenize it.
-    command:
-      - 'if [ -f /zero-init/needs_reset ]; then echo "[zero-init] publication change detected; wiping replica file(s) under /data" && rm -f /data/zero.db /data/zero.db-shm /data/zero.db-wal && rm -f /zero-init/needs_reset; fi; exec zero-cache'
    restart: unless-stopped
+    stop_grace_period: 300s
    depends_on:
      db:
        condition: service_healthy
@ -253,6 +248,7 @@ services:
      interval: 10s
      timeout: 5s
      retries: 5
+      start_period: 600s

  frontend:
    image: ghcr.io/modsetter/surfsense-web:${SURFSENSE_VERSION:-latest}
@ -264,6 +260,7 @@ services:
      NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE: ${AUTH_TYPE:-LOCAL}
      NEXT_PUBLIC_ETL_SERVICE: ${ETL_SERVICE:-DOCLING}
      NEXT_PUBLIC_DEPLOYMENT_MODE: ${DEPLOYMENT_MODE:-self-hosted}
+      NEXT_PUBLIC_WHATSAPP_DISPLAY_PHONE_NUMBER: ${WHATSAPP_SHARED_DISPLAY_PHONE_NUMBER:-}
      FASTAPI_BACKEND_INTERNAL_URL: ${FASTAPI_BACKEND_INTERNAL_URL:-http://backend:8000}
    labels:
      - "com.centurylinklabs.watchtower.enable=true"
@ -283,5 +280,5 @@ volumes:
    name: surfsense-shared-temp
  zero_cache_data:
    name: surfsense-zero-cache
-  zero_init:
-    name: surfsense-zero-init
+  whatsapp_sessions:
+    name: surfsense-whatsapp-sessions
--- a/docker/scripts/install.ps1
+++ b/docker/scripts/install.ps1
@ -7,6 +7,8 @@
 # To pass flags, save and run locally:
 #   .\install.ps1 -NoWatchtower
 #   .\install.ps1 -WatchtowerInterval 3600
+#   .\install.ps1 -Variant cuda
+#   .\install.ps1 -Variant cuda -GpuCount all
 #
 # Handles two cases automatically:
 #   1. Fresh install        — no prior SurfSense data detected
@ -17,7 +19,11 @@

 param(
    [switch]$NoWatchtower,
-    [int]$WatchtowerInterval = 86400
+    [int]$WatchtowerInterval = 86400,
+    [ValidateSet("cpu", "cuda", "cuda126")]
+    [string]$Variant,
+    [string]$GpuCount,
+    [switch]$Quiet
 )

 $ErrorActionPreference = 'Stop'
@ -34,6 +40,11 @@ $MigrationMode      = $false
 $SetupWatchtower    = -not $NoWatchtower
 $WatchtowerContainer = "watchtower"

+if ($GpuCount -and $GpuCount -notmatch '^([0-9]+|all)$') {
+    Write-Host "[SurfSense] ERROR: Invalid -GpuCount '$GpuCount'. Use a number or 'all'." -ForegroundColor Red
+    exit 1
+}
+
 # ── Output helpers ──────────────────────────────────────────────────────────

 function Write-Info    { param([string]$Msg) Write-Host "[SurfSense] " -ForegroundColor Cyan -NoNewline; Write-Host $Msg }
@ -42,6 +53,27 @@ function Write-Warn    { param([string]$Msg) Write-Host "[SurfSense] " -Foregrou
 function Write-Step    { param([string]$Msg) Write-Host "`n-- $Msg" -ForegroundColor Cyan }
 function Write-Err     { param([string]$Msg) Write-Host "[SurfSense] ERROR: $Msg" -ForegroundColor Red; exit 1 }

+function Show-Banner {
+    Write-Host ""
+    Write-Host @"
+
+
+███████╗██╗   ██╗██████╗ ███████╗███████╗███████╗███╗   ██╗███████╗███████╗
+██╔════╝██║   ██║██╔══██╗██╔════╝██╔════╝██╔════╝████╗  ██║██╔════╝██╔════╝
+███████╗██║   ██║██████╔╝█████╗  ███████╗█████╗  ██╔██╗ ██║███████╗█████╗  
+╚════██║██║   ██║██╔══██╗██╔══╝  ╚════██║██╔══╝  ██║╚██╗██║╚════██║██╔══╝  
+███████║╚██████╔╝██║  ██║██║     ███████║███████╗██║ ╚████║███████║███████╗
+╚══════╝ ╚═════╝ ╚═╝  ╚═╝╚═╝     ╚══════╝╚══════╝╚═╝  ╚═══╝╚══════╝╚══════╝
+                                                                           
+
+"@ -ForegroundColor White
+    Write-Host "         OSS Alternative to NotebookLM for Teams" -ForegroundColor Yellow
+    Write-Host ("=" * 62) -ForegroundColor Cyan
+    Write-Info "This installer will create $InstallDir\ and start SurfSense with Docker Compose."
+}
+
+Show-Banner
+
 function Invoke-NativeSafe {
    param([scriptblock]$Command)
    $previousErrorActionPreference = $ErrorActionPreference
@ -53,6 +85,28 @@ function Invoke-NativeSafe {
    }
 }

+function Resolve-WatchtowerPreference {
+    if ($NoWatchtower -or $Quiet -or -not [Environment]::UserInteractive) {
+        return
+    }
+
+    Write-Host ""
+    Write-Host "Automatic updates" -ForegroundColor Cyan
+    $choice = Read-Host "Enable automatic daily updates with Watchtower? (may download several GB in the background) [Y/n]"
+
+    switch ($choice) {
+        "" { $script:SetupWatchtower = $true }
+        { $_ -match '^(?i)y(es)?$' } { $script:SetupWatchtower = $true }
+        { $_ -match '^(?i)n(o)?$' } { $script:SetupWatchtower = $false }
+        default {
+            Write-Warn "Unrecognized choice '$choice'; enabling Watchtower by default. Use -NoWatchtower to skip it."
+            $script:SetupWatchtower = $true
+        }
+    }
+}
+
+Resolve-WatchtowerPreference
+
 # ── Pre-flight checks ──────────────────────────────────────────────────────

 Write-Step "Checking prerequisites"
@ -97,143 +151,11 @@ function Wait-ForPostgres {
    Write-Ok "PostgreSQL is ready."
 }

-# ── Stack health helpers ────────────────────────────────────────────────────
-
-function Get-ComposeServices {
-    Push-Location $InstallDir
-    try {
-        $raw = Invoke-NativeSafe { docker compose ps -a --format json 2>$null }
-    } finally {
-        Pop-Location
-    }
-    if ([string]::IsNullOrWhiteSpace($raw)) { return @() }
-
-    # Compose v2.21+ emits a JSON array; older versions emit one object per line.
-    try {
-        $parsed = $raw | ConvertFrom-Json
-        if ($parsed -is [System.Collections.IEnumerable] -and -not ($parsed -is [string])) {
-            return @($parsed)
-        }
-        return @($parsed)
-    } catch {
-        $services = @()
-        foreach ($line in ($raw -split "`r?`n")) {
-            $line = $line.Trim()
-            if (-not $line) { continue }
-            try { $services += ($line | ConvertFrom-Json) } catch { }
-        }
-        return $services
-    }
-}
-
-function Wait-StackHealthy {
-    param([int]$TimeoutSec = 300)
-
-    $deadline = (Get-Date).AddSeconds($TimeoutSec)
-    $lastReport = ""
-
-    while ((Get-Date) -lt $deadline) {
-        $services = Get-ComposeServices
-        if (-not $services -or $services.Count -eq 0) {
-            Start-Sleep -Seconds 3
-            continue
-        }
-
-        $bad = @()
-        $waiting = @()
-        $good = @()
-
-        foreach ($svc in $services) {
-            $name = $svc.Service
-            $state = $svc.State
-            $health = if ($svc.PSObject.Properties.Name -contains 'Health') { $svc.Health } else { '' }
-            $exit = if ($svc.PSObject.Properties.Name -contains 'ExitCode') { $svc.ExitCode } else { $null }
-
-            if ($name -eq 'migrations') {
-                if ($state -eq 'exited' -and $exit -eq 0) { $good += $name }
-                elseif ($state -eq 'exited') { $bad += "${name} (exit=${exit})" }
-                else { $waiting += "${name} (${state})" }
-                continue
-            }
-
-            if ($state -eq 'running') {
-                if ([string]::IsNullOrEmpty($health) -or $health -eq 'healthy') {
-                    $good += $name
-                } elseif ($health -eq 'starting') {
-                    $waiting += "${name} (starting)"
-                } elseif ($health -eq 'unhealthy') {
-                    $bad += "${name} (unhealthy)"
-                } else {
-                    $waiting += "${name} (${health})"
-                }
-            } elseif ($state -eq 'restarting') {
-                $bad += "${name} (restarting)"
-            } elseif ($state -eq 'exited') {
-                $bad += "${name} (exited, code=${exit})"
-            } else {
-                $waiting += "${name} (${state})"
-            }
-        }
-
-        if ($bad.Count -gt 0) {
-            return @{ Ok = $false; Reason = 'failure'; Bad = $bad; Waiting = $waiting; Good = $good }
-        }
-        if ($waiting.Count -eq 0) {
-            return @{ Ok = $true; Reason = 'all_healthy'; Good = $good }
-        }
-
-        $report = "Waiting on: " + ($waiting -join ', ')
-        if ($report -ne $lastReport) {
-            Write-Info $report
-            $lastReport = $report
-        }
-        Start-Sleep -Seconds 5
-    }
-
-    return @{ Ok = $false; Reason = 'timeout'; Bad = $bad; Waiting = $waiting; Good = $good }
-}
-
-function Test-StaleZeroCacheVolume {
-    $raw = Invoke-NativeSafe { docker volume ls --format '{{.Name}}' 2>$null }
-    if ([string]::IsNullOrWhiteSpace($raw)) { return $false }
-    $names = $raw -split "`r?`n" | ForEach-Object { $_.Trim() } | Where-Object { $_ }
-    $hasZeroCache = $names -contains 'surfsense-zero-cache'
-    $hasZeroInit = $names -contains 'surfsense-zero-init'
-    # Pre-fix installs created surfsense-zero-cache but never surfsense-zero-init.
-    # Such a volume may hold a half-initialized SQLite replica from an earlier
-    # crash-loop. Wiping it forces zero-cache to do a fresh initial sync.
-    return ($hasZeroCache -and -not $hasZeroInit)
-}
-
-function Invoke-StaleZeroCacheCleanup {
-    if (-not (Test-StaleZeroCacheVolume)) { return }
-
-    Write-Warn "Detected pre-existing 'surfsense-zero-cache' volume from an install that"
-    Write-Warn "predates the migrations-service fix. It may contain a half-initialized"
-    Write-Warn "SQLite replica that would block zero-cache from starting."
-    Write-Warn "The volume will be removed in 5 seconds; press Ctrl+C to cancel."
-    Start-Sleep -Seconds 5
-
-    Push-Location $InstallDir
-    Invoke-NativeSafe { docker compose down --remove-orphans 2>$null } | Out-Null
-    Pop-Location
-    Invoke-NativeSafe { docker volume rm surfsense-zero-cache 2>$null } | Out-Null
-    Write-Ok "Removed surfsense-zero-cache volume; zero-cache will re-sync on next start."
-}
-
-function Write-Err-NoExit {
-    param([string]$Message)
-    Write-Host "[ERROR] $Message" -ForegroundColor Red
-}
+# ── Stack startup helper ────────────────────────────────────────────────────

 function Invoke-StackFailureReport {
-    param([hashtable]$Result)
-
    Write-Host ""
-    Write-Err-NoExit "Stack did not reach a healthy state."
-    if ($Result.Bad.Count -gt 0) { Write-Host ("  Failed: " + ($Result.Bad -join ', ')) }
-    if ($Result.Waiting.Count -gt 0) { Write-Host ("  Stuck:  " + ($Result.Waiting -join ', ')) }
-
+    Write-Host "[ERROR] Stack did not reach a healthy state." -ForegroundColor Red
    Write-Host ""
    Write-Info "Recent logs from migrations / zero-cache / backend:"
    Push-Location $InstallDir
@ -247,11 +169,151 @@ function Invoke-StackFailureReport {
    Write-Host "Recovery hints:" -ForegroundColor Yellow
    Write-Host "  1. Inspect migrations:   cd $InstallDir; docker compose logs migrations"
    Write-Host "  2. Verify publication:   cd $InstallDir; docker compose exec db psql -U surfsense -d surfsense -c 'SELECT pubname FROM pg_publication;'"
-    Write-Host "  3. Hard reset zero db:   cd $InstallDir; docker compose down; docker volume rm surfsense-zero-cache; docker compose up -d"
+    Write-Host "  3. Hard reset zero db:   cd $InstallDir; docker compose down; docker volume rm surfsense-zero-cache; docker compose up -d --wait"
    Write-Host ""
    exit 1
 }

+function Invoke-ComposeUpWait {
+    Push-Location $InstallDir
+    try {
+        Invoke-NativeSafe { docker compose up -d --wait }
+    } finally {
+        Pop-Location
+    }
+    if ($LASTEXITCODE -ne 0) {
+        Invoke-StackFailureReport
+    }
+}
+
+# ── Variant and .env helpers ────────────────────────────────────────────────
+
+function Set-EnvValue {
+    param([string]$Path, [string]$Key, [string]$Value)
+    $lines = @()
+    if (Test-Path $Path) {
+        $lines = @(Get-Content $Path)
+    }
+    $updated = $false
+    $newLines = foreach ($line in $lines) {
+        if ($line -match "^$([regex]::Escape($Key))=") {
+            $updated = $true
+            "$Key=$Value"
+        } else {
+            $line
+        }
+    }
+    if (-not $updated) {
+        $newLines += "$Key=$Value"
+    }
+    Set-Content -Path $Path -Value $newLines
+}
+
+function Remove-EnvValue {
+    param([string]$Path, [string]$Key)
+    if (-not (Test-Path $Path)) { return }
+    $newLines = Get-Content $Path | Where-Object { $_ -notmatch "^$([regex]::Escape($Key))=" }
+    Set-Content -Path $Path -Value $newLines
+}
+
+function Test-NvidiaGpu {
+    if (-not (Get-Command nvidia-smi -ErrorAction SilentlyContinue)) { return $false }
+    Invoke-NativeSafe { nvidia-smi *>$null } | Out-Null
+    return ($LASTEXITCODE -eq 0)
+}
+
+function Test-NvidiaRuntime {
+    $info = Invoke-NativeSafe { docker info 2>$null }
+    if ($info -match 'nvidia') { return $true }
+    if (Get-Command nvidia-ctk -ErrorAction SilentlyContinue) { return $true }
+    if (Get-Command nvidia-container-runtime -ErrorAction SilentlyContinue) { return $true }
+    return $false
+}
+
+function Get-RecommendedVariant {
+    $driver = (Invoke-NativeSafe { nvidia-smi --query-gpu=driver_version --format=csv,noheader 2>$null } | Select-Object -First 1)
+    $major = 0
+    if ($driver -match '^(\d+)') {
+        $major = [int]$Matches[1]
+    }
+    if ($major -gt 0 -and $major -lt 570) {
+        return "cuda126"
+    }
+    return "cuda"
+}
+
+function Resolve-Variant {
+    $hasGpu = Test-NvidiaGpu
+    $hasRuntime = $false
+    $recommended = "cpu"
+
+    if ($hasGpu) {
+        $recommended = Get-RecommendedVariant
+        $hasRuntime = Test-NvidiaRuntime
+    }
+
+    if ($Variant) {
+        if ($Variant -eq "cpu") { return "cpu" }
+        if (-not $hasGpu) {
+            Write-Warn "No NVIDIA GPU detected; falling back to CPU variant."
+            return "cpu"
+        }
+        if (-not $hasRuntime) {
+            Write-Warn "NVIDIA GPU detected, but NVIDIA Container Toolkit was not detected; falling back to CPU variant."
+            Write-Warn "Install the toolkit before enabling SurfSense GPU acceleration."
+            return "cpu"
+        }
+        return $Variant
+    }
+
+    if ($hasGpu -and -not $hasRuntime) {
+        Write-Warn "NVIDIA GPU detected, but NVIDIA Container Toolkit was not detected; using CPU variant."
+    }
+
+    if ($hasGpu -and $hasRuntime -and -not $Quiet -and [Environment]::UserInteractive) {
+        Write-Host ""
+        Write-Host "SurfSense detected an NVIDIA GPU." -ForegroundColor Cyan
+        $choice = Read-Host "Use GPU acceleration? [Y/n]"
+        switch ($choice) {
+            "" { return $recommended }
+            { $_ -match '^(?i)y(es)?$' } { return $recommended }
+            { $_ -match '^(?i)n(o)?$' } { return "cpu" }
+            default {
+                Write-Warn "Unrecognized choice '$choice'; using CPU variant."
+                return "cpu"
+            }
+        }
+    }
+
+    return "cpu"
+}
+
+function Set-VariantEnv {
+    param([string]$Path, [string]$SelectedVariant, [bool]$AllowExistingUpdate)
+
+    if ((Test-Path $Path) -and -not $AllowExistingUpdate) {
+        Write-Warn ".env already exists - keeping your existing configuration."
+        Write-Info "To change variants later, edit SURFSENSE_VARIANT and COMPOSE_FILE in $Path, then run docker compose up -d --wait."
+        return
+    }
+
+    if ($SelectedVariant -eq "cpu") {
+        Set-EnvValue -Path $Path -Key "SURFSENSE_VARIANT" -Value ""
+        Remove-EnvValue -Path $Path -Key "COMPOSE_FILE"
+        Remove-EnvValue -Path $Path -Key "SURFSENSE_GPU_COUNT"
+    } else {
+        Set-EnvValue -Path $Path -Key "SURFSENSE_VARIANT" -Value $SelectedVariant
+        Set-EnvValue -Path $Path -Key "COMPOSE_FILE" -Value "docker-compose.yml;docker-compose.gpu.yml"
+        if ($GpuCount) {
+            Set-EnvValue -Path $Path -Key "SURFSENSE_GPU_COUNT" -Value $GpuCount
+        }
+    }
+
+    Remove-EnvValue -Path $Path -Key "COMPOSE_PROFILES"
+}
+
+$SelectedVariant = Resolve-Variant
+
 # ── Download files ──────────────────────────────────────────────────────────

 Write-Step "Downloading SurfSense files"
@ -262,6 +324,7 @@ New-Item -ItemType Directory -Path "$InstallDir\searxng" -Force | Out-Null

 $Files = @(
    @{ Src = "docker/docker-compose.yml";                Dest = "docker-compose.yml" }
+    @{ Src = "docker/docker-compose.gpu.yml";            Dest = "docker-compose.gpu.yml" }
    @{ Src = "docker/.env.example";                      Dest = ".env.example" }
    @{ Src = "docker/postgresql.conf";                   Dest = "postgresql.conf" }
    @{ Src = "docker/scripts/migrate-database.ps1";      Dest = "scripts/migrate-database.ps1" }
@ -339,15 +402,19 @@ if (-not (Test-Path $envPath)) {
    $content = $content -replace 'SECRET_KEY=replace_me_with_a_random_string', "SECRET_KEY=$SecretKey"
    Set-Content -Path $envPath -Value $content -NoNewline

+    Set-VariantEnv -Path $envPath -SelectedVariant $SelectedVariant -AllowExistingUpdate $false
    Write-Info "Created $envPath"
 } else {
-    Write-Warn ".env already exists - keeping your existing configuration."
+    if ($PSBoundParameters.ContainsKey('Variant')) {
+        Set-VariantEnv -Path $envPath -SelectedVariant $SelectedVariant -AllowExistingUpdate $true
+        Write-Info "Updated SurfSense image variant in existing $envPath"
+    } else {
+        Set-VariantEnv -Path $envPath -SelectedVariant $SelectedVariant -AllowExistingUpdate $false
+    }
 }

 # ── Start containers ────────────────────────────────────────────────────────

-Invoke-StaleZeroCacheCleanup
-
 if ($MigrationMode) {
    $envContent = Get-Content $envPath
    $DbUser = ($envContent | Select-String '^DB_USER=' | ForEach-Object { ($_ -split '=',2)[1].Trim('"') }) | Select-Object -First 1
@ -405,31 +472,15 @@ if ($MigrationMode) {
    }

    Write-Step "Starting all SurfSense services"
-    Push-Location $InstallDir
-    Invoke-NativeSafe { docker compose up -d }
-    Pop-Location
-    Write-Ok "All containers started; waiting for stack to become healthy..."
-
-    $waitResult = Wait-StackHealthy -TimeoutSec 300
-    if (-not $waitResult.Ok) {
-        Invoke-StackFailureReport -Result $waitResult
-    }
-    Write-Ok "All services healthy."
+    Invoke-ComposeUpWait
+    Write-Ok "All services started and healthy."

    Remove-Item $KeyFile -ErrorAction SilentlyContinue

 } else {
    Write-Step "Starting SurfSense"
-    Push-Location $InstallDir
-    Invoke-NativeSafe { docker compose up -d }
-    Pop-Location
-    Write-Ok "All containers started; waiting for stack to become healthy..."
-
-    $waitResult = Wait-StackHealthy -TimeoutSec 300
-    if (-not $waitResult.Ok) {
-        Invoke-StackFailureReport -Result $waitResult
-    }
-    Write-Ok "All services healthy."
+    Invoke-ComposeUpWait
+    Write-Ok "All services started and healthy."
 }

 # ── Watchtower (auto-update) ────────────────────────────────────────────────
@ -461,7 +512,7 @@ if ($SetupWatchtower) {
        if ($LASTEXITCODE -eq 0) {
            Write-Ok "Watchtower started - labeled SurfSense containers will auto-update."
        } else {
-            Write-Warn "Could not start Watchtower. You can set it up manually or use: docker compose pull; docker compose up -d"
+            Write-Warn "Could not start Watchtower. You can set it up manually or use: docker compose pull; docker compose up -d --wait"
        }
    }
 } else {
@ -471,39 +522,26 @@ if ($SetupWatchtower) {
 # ── Done ────────────────────────────────────────────────────────────────────

 Write-Host ""
-Write-Host @"
-
-
- .d8888b.                    .d888 .d8888b.                                      
-d88P  Y88b                  d88P" d88P  Y88b                                     
-Y88b.                       888   Y88b.                                          
- "Y888b.   888  888 888d888 888888 "Y888b.    .d88b.  88888b.  .d8888b   .d88b.  
-    "Y88b. 888  888 888P"   888       "Y88b. d8P  Y8b 888 "88b 88K      d8P  Y8b 
-      "888 888  888 888     888         "888 88888888 888  888 "Y8888b. 88888888 
-Y88b  d88P Y88b 888 888     888   Y88b  d88P Y8b.     888  888      X88 Y8b.     
- "Y8888P"   "Y88888 888     888    "Y8888P"   "Y8888  888  888  88888P'  "Y8888  
-
-
-"@ -ForegroundColor White
-
 $versionDisplay = (Get-Content $envPath | Select-String '^SURFSENSE_VERSION=' | ForEach-Object { ($_ -split '=',2)[1].Trim('"') }) | Select-Object -First 1
 if (-not $versionDisplay) { $versionDisplay = "latest" }
-Write-Host "         OSS Alternative to NotebookLM for Teams  [$versionDisplay]" -ForegroundColor Yellow
-Write-Host ("=" * 62) -ForegroundColor Cyan
-Write-Host ""
+$variantDisplay = (Get-Content $envPath | Select-String '^SURFSENSE_VARIANT=' | ForEach-Object { ($_ -split '=',2)[1].Trim('"') }) | Select-Object -First 1
+if (-not $variantDisplay) { $variantDisplay = "cpu" }
+$wtHours = [math]::Floor($WatchtowerInterval / 3600)
+Write-Step "SurfSense is now installed [$versionDisplay]"

 Write-Info "  Frontend:  http://localhost:3929"
 Write-Info "  Backend:   http://localhost:8929"
 Write-Info "  API Docs:  http://localhost:8929/docs"
 Write-Info ""
 Write-Info "  Config:    $InstallDir\.env"
+Write-Info "  Variant:   $variantDisplay"
 Write-Info "  Logs:      cd $InstallDir; docker compose logs -f"
 Write-Info "  Stop:      cd $InstallDir; docker compose down"
-Write-Info "  Update:    cd $InstallDir; docker compose pull; docker compose up -d"
+Write-Info "  Update:    cd $InstallDir; docker compose pull; docker compose up -d --wait"
 Write-Info ""

 if ($SetupWatchtower) {
-    Write-Info "  Watchtower: auto-updates every ${wtHours}h (stop: docker rm -f $WatchtowerContainer)"
+    Write-Info "  Watchtower: auto-updates every ${wtHours}h (disable: docker rm -f $WatchtowerContainer)"
 } else {
    Write-Warn "  Watchtower skipped. For auto-updates, re-run without -NoWatchtower."
 }
--- a/docker/scripts/install.sh
+++ b/docker/scripts/install.sh
@ -8,6 +8,11 @@
 # Flags:
 #   --no-watchtower              Skip automatic Watchtower setup
 #   --watchtower-interval=SECS   Check interval in seconds (default: 86400 = 24h)
+#   --variant=cpu|cuda|cuda126   Select backend image variant
+#   --gpu                        Alias for --variant=cuda
+#   --cpu                        Alias for --variant=cpu
+#   --gpu-count=N|all            Number of GPUs to reserve when GPU is enabled
+#   --quiet                      Skip interactive prompts
 #
 # Handles two cases automatically:
 #   1. Fresh install        — no prior SurfSense data detected
@ -35,12 +40,22 @@ MIGRATION_MODE=false
 SETUP_WATCHTOWER=true
 WATCHTOWER_INTERVAL=86400
 WATCHTOWER_CONTAINER="watchtower"
+WATCHTOWER_EXPLICIT=false
+REQUESTED_VARIANT=""
+VARIANT_EXPLICIT=false
+GPU_COUNT=""
+QUIET=false

 # ── Parse flags ─────────────────────────────────────────────────────────────
 for arg in "$@"; do
    case "$arg" in
-        --no-watchtower) SETUP_WATCHTOWER=false ;;
+        --no-watchtower) SETUP_WATCHTOWER=false; WATCHTOWER_EXPLICIT=true ;;
        --watchtower-interval=*) WATCHTOWER_INTERVAL="${arg#*=}" ;;
+        --variant=*) REQUESTED_VARIANT="${arg#*=}"; VARIANT_EXPLICIT=true ;;
+        --gpu) REQUESTED_VARIANT="cuda"; VARIANT_EXPLICIT=true ;;
+        --cpu) REQUESTED_VARIANT="cpu"; VARIANT_EXPLICIT=true ;;
+        --gpu-count=*) GPU_COUNT="${arg#*=}" ;;
+        --quiet) QUIET=true ;;
    esac
 done

@ -57,6 +72,57 @@ warn()    { printf "${YELLOW}[SurfSense]${NC} %s\n"      "$1"; }
 error()   { printf "${RED}[SurfSense]${NC} ERROR: %s\n"  "$1" >&2; exit 1; }
 step()    { printf "\n${BOLD}${CYAN}── %s${NC}\n"        "$1"; }

+show_banner() {
+    echo ""
+    printf '\033[1;37m'
+    cat << 'EOF'
+
+
+███████╗██╗   ██╗██████╗ ███████╗███████╗███████╗███╗   ██╗███████╗███████╗
+██╔════╝██║   ██║██╔══██╗██╔════╝██╔════╝██╔════╝████╗  ██║██╔════╝██╔════╝
+███████╗██║   ██║██████╔╝█████╗  ███████╗█████╗  ██╔██╗ ██║███████╗█████╗  
+╚════██║██║   ██║██╔══██╗██╔══╝  ╚════██║██╔══╝  ██║╚██╗██║╚════██║██╔══╝  
+███████║╚██████╔╝██║  ██║██║     ███████║███████╗██║ ╚████║███████║███████╗
+╚══════╝ ╚═════╝ ╚═╝  ╚═╝╚═╝     ╚══════╝╚══════╝╚═╝  ╚═══╝╚══════╝╚══════╝
+                                                                           
+
+EOF
+    printf "${YELLOW}         OSS Alternative to NotebookLM for Teams${NC}\n"
+    printf "${CYAN}══════════════════════════════════════════════════════════════${NC}\n"
+    info "This installer will create ${INSTALL_DIR}/ and start SurfSense with Docker Compose."
+}
+
+show_banner
+
+case "${REQUESTED_VARIANT}" in
+    ""|cpu|cuda|cuda126) ;;
+    *) error "Invalid --variant='${REQUESTED_VARIANT}'. Use cpu, cuda, or cuda126." ;;
+esac
+
+if [[ -n "${GPU_COUNT}" && ! "${GPU_COUNT}" =~ ^([0-9]+|all)$ ]]; then
+    error "Invalid --gpu-count='${GPU_COUNT}'. Use a number or 'all'."
+fi
+
+resolve_watchtower_preference() {
+    if $WATCHTOWER_EXPLICIT || $QUIET || [[ ! -r /dev/tty || ! -w /dev/tty ]]; then
+        return 0
+    fi
+
+    local choice
+    echo "" > /dev/tty
+    printf "${BOLD}${CYAN}Automatic updates${NC}\n" > /dev/tty
+    printf "Enable automatic daily updates with Watchtower? (may download several GB in the background) [Y/n]: " > /dev/tty
+    read -r choice < /dev/tty || choice=""
+
+    case "$choice" in
+        ""|[Yy]|[Yy][Ee][Ss]) SETUP_WATCHTOWER=true ;;
+        [Nn]|[Nn][Oo]) SETUP_WATCHTOWER=false ;;
+        *) warn "Unrecognized choice '${choice}', enabling Watchtower by default. Use --no-watchtower to skip it." >&2; SETUP_WATCHTOWER=true ;;
+    esac
+}
+
+resolve_watchtower_preference
+
 # ── Pre-flight checks ────────────────────────────────────────────────────────

 step "Checking prerequisites"
@ -97,126 +163,11 @@ wait_for_pg() {
    success "PostgreSQL is ready."
 }

-# ── Stack health helpers ─────────────────────────────────────────────────────
-
-# Enumerate compose services for project `surfsense` as `service|state|health|exitcode`
-# lines. Uses `docker inspect` so we don't depend on `jq`, `python3`, or the
-# exact ordering of fields in `docker compose ps --format json` output.
-get_compose_services() {
-    local containers
-    containers=$(docker ps -a --filter "label=com.docker.compose.project=surfsense" --format '{{.Names}}' 2>/dev/null) || true
-    [[ -z "$containers" ]] && return 0
-
-    while IFS= read -r container; do
-        [[ -z "$container" ]] && continue
-        local svc state health code
-        svc=$(docker inspect -f '{{index .Config.Labels "com.docker.compose.service"}}' "$container" 2>/dev/null || echo "")
-        state=$(docker inspect -f '{{.State.Status}}' "$container" 2>/dev/null || echo "unknown")
-        health=$(docker inspect -f '{{if .State.Health}}{{.State.Health.Status}}{{end}}' "$container" 2>/dev/null || echo "")
-        code=$(docker inspect -f '{{.State.ExitCode}}' "$container" 2>/dev/null || echo "")
-        [[ -z "$svc" ]] && continue
-        printf '%s|%s|%s|%s\n' "$svc" "$state" "$health" "$code"
-    done <<< "$containers"
-}
-
-# Globals populated by wait_stack_healthy / consumed by stack_failure_report.
-STACK_BAD=()
-STACK_WAITING=()
-STACK_GOOD=()
-STACK_TIMEOUT=false
-
-wait_stack_healthy() {
-    local timeout_sec=${1:-300}
-    local deadline=$(($(date +%s) + timeout_sec))
-    local last_report=""
-    local bad=()
-    local waiting=()
-    local good=()
-
-    while [[ $(date +%s) -lt $deadline ]]; do
-        local lines
-        lines=$(get_compose_services)
-        if [[ -z "$lines" ]]; then
-            sleep 3
-            continue
-        fi
-
-        bad=()
-        waiting=()
-        good=()
-
-        while IFS='|' read -r name state health code; do
-            [[ -z "$name" ]] && continue
-            if [[ "$name" == "migrations" ]]; then
-                if [[ "$state" == "exited" && "$code" == "0" ]]; then
-                    good+=("$name")
-                elif [[ "$state" == "exited" ]]; then
-                    bad+=("${name} (exit=${code})")
-                else
-                    waiting+=("${name} (${state})")
-                fi
-                continue
-            fi
-
-            if [[ "$state" == "running" ]]; then
-                if [[ -z "$health" || "$health" == "healthy" ]]; then
-                    good+=("$name")
-                elif [[ "$health" == "starting" ]]; then
-                    waiting+=("${name} (starting)")
-                elif [[ "$health" == "unhealthy" ]]; then
-                    bad+=("${name} (unhealthy)")
-                else
-                    waiting+=("${name} (${health})")
-                fi
-            elif [[ "$state" == "restarting" ]]; then
-                bad+=("${name} (restarting)")
-            elif [[ "$state" == "exited" ]]; then
-                bad+=("${name} (exited, code=${code})")
-            else
-                waiting+=("${name} (${state})")
-            fi
-        done <<< "$lines"
-
-        if (( ${#bad[@]} > 0 )); then
-            STACK_BAD=("${bad[@]}")
-            STACK_WAITING=("${waiting[@]}")
-            STACK_GOOD=("${good[@]}")
-            return 1
-        fi
-        if (( ${#waiting[@]} == 0 )); then
-            STACK_GOOD=("${good[@]}")
-            return 0
-        fi
-
-        local report="Waiting on: ${waiting[*]}"
-        if [[ "$report" != "$last_report" ]]; then
-            info "$report"
-            last_report="$report"
-        fi
-        sleep 5
-    done
-
-    # bad/waiting/good are declared at function scope so referencing them is
-    # safe even if the polling loop never executed its body.
-    STACK_BAD=()
-    [[ ${#bad[@]} -gt 0 ]] && STACK_BAD=("${bad[@]}")
-    STACK_WAITING=()
-    [[ ${#waiting[@]} -gt 0 ]] && STACK_WAITING=("${waiting[@]}")
-    STACK_GOOD=()
-    [[ ${#good[@]} -gt 0 ]] && STACK_GOOD=("${good[@]}")
-    STACK_TIMEOUT=true
-    return 1
-}
+# ── Stack startup helper ─────────────────────────────────────────────────────

 stack_failure_report() {
    echo ""
    echo -e "\033[31m[ERROR]\033[0m Stack did not reach a healthy state."
-    if (( ${#STACK_BAD[@]} > 0 )) && [[ -n "${STACK_BAD[0]}" ]]; then
-        echo "  Failed: ${STACK_BAD[*]}"
-    fi
-    if (( ${#STACK_WAITING[@]} > 0 )) && [[ -n "${STACK_WAITING[0]}" ]]; then
-        echo "  Stuck:  ${STACK_WAITING[*]}"
-    fi
    echo ""
    info "Recent logs from migrations / zero-cache / backend:"
    (cd "${INSTALL_DIR}" && ${DC} logs --tail=60 migrations zero-cache backend 2>&1) || true
@ -224,36 +175,158 @@ stack_failure_report() {
    echo "Recovery hints:"
    echo "  1. Inspect migrations:   cd ${INSTALL_DIR} && ${DC} logs migrations"
    echo "  2. Verify publication:   cd ${INSTALL_DIR} && ${DC} exec db psql -U surfsense -d surfsense -c 'SELECT pubname FROM pg_publication;'"
-    echo "  3. Hard reset zero db:   cd ${INSTALL_DIR} && ${DC} down && docker volume rm surfsense-zero-cache && ${DC} up -d"
+    echo "  3. Hard reset zero db:   cd ${INSTALL_DIR} && ${DC} down && docker volume rm surfsense-zero-cache && ${DC} up -d --wait"
    echo ""
    exit 1
 }

-# True if `surfsense-zero-cache` exists but `surfsense-zero-init` does not.
-# That signals an install that predates the migrations-service fix; the old
-# replica may be half-initialized and would block zero-cache on next start.
-test_stale_zero_cache_volume() {
-    local has_zc has_zi
-    has_zc=$(docker volume ls --format '{{.Name}}' 2>/dev/null | grep -Fx 'surfsense-zero-cache' || true)
-    has_zi=$(docker volume ls --format '{{.Name}}' 2>/dev/null | grep -Fx 'surfsense-zero-init' || true)
-    [[ -n "$has_zc" && -z "$has_zi" ]]
+compose_up_wait() {
+    local service="${1:-}"
+    if [[ -n "$service" ]]; then
+        (cd "${INSTALL_DIR}" && ${DC} up -d --wait "$service") < /dev/null
+    else
+        (cd "${INSTALL_DIR}" && ${DC} up -d --wait) < /dev/null
+    fi
 }

-invoke_stale_zero_cache_cleanup() {
-    if ! test_stale_zero_cache_volume; then
+# ── Variant and .env helpers ─────────────────────────────────────────────────
+
+set_env_value() {
+    local file="$1"
+    local key="$2"
+    local value="$3"
+    local tmp
+    tmp=$(mktemp)
+
+    if grep -q "^${key}=" "$file" 2>/dev/null; then
+        awk -v key="$key" -v value="$value" 'BEGIN { prefix = key "=" } $0 ~ "^" prefix { print prefix value; next } { print }' "$file" > "$tmp"
+    else
+        cp "$file" "$tmp"
+        printf '\n%s=%s\n' "$key" "$value" >> "$tmp"
+    fi
+    mv "$tmp" "$file"
+}
+
+remove_env_value() {
+    local file="$1"
+    local key="$2"
+    local tmp
+    tmp=$(mktemp)
+    awk -v key="$key" 'BEGIN { prefix = key "=" } $0 !~ "^" prefix { print }' "$file" > "$tmp"
+    mv "$tmp" "$file"
+}
+
+version_major() {
+    printf '%s' "$1" | cut -d. -f1
+}
+
+recommend_cuda_variant() {
+    local driver_version driver_major
+    driver_version=$(nvidia-smi --query-gpu=driver_version --format=csv,noheader 2>/dev/null | head -n 1 | tr -d '[:space:]' || true)
+    driver_major=$(version_major "$driver_version")
+
+    # CUDA 12.8 generally requires an R570+ driver. Use CUDA 12.6 as the
+    # compatibility fallback for older 12.x driver stacks and GPUs.
+    if [[ "$driver_major" =~ ^[0-9]+$ && "$driver_major" -lt 570 ]]; then
+        printf 'cuda126'
+    else
+        printf 'cuda'
+    fi
+}
+
+gpu_runtime_available() {
+    docker info 2>/dev/null | grep -qi 'nvidia' \
+        || command -v nvidia-ctk >/dev/null 2>&1 \
+        || command -v nvidia-container-runtime >/dev/null 2>&1
+}
+
+host_has_nvidia_gpu() {
+    command -v nvidia-smi >/dev/null 2>&1 && nvidia-smi >/dev/null 2>&1
+}
+
+resolve_variant() {
+    local detected_variant="cpu"
+    local has_gpu=false
+    local has_runtime=false
+
+    if host_has_nvidia_gpu; then
+        has_gpu=true
+        detected_variant=$(recommend_cuda_variant)
+        if gpu_runtime_available; then
+            has_runtime=true
+        fi
+    fi
+
+    if $VARIANT_EXPLICIT; then
+        if [[ "$REQUESTED_VARIANT" == "cpu" ]]; then
+            printf 'cpu'
+            return 0
+        fi
+        if ! $has_gpu; then
+            warn "No NVIDIA GPU detected; falling back to CPU variant." >&2
+            printf 'cpu'
+            return 0
+        fi
+        if ! $has_runtime; then
+            warn "NVIDIA GPU detected, but NVIDIA Container Toolkit was not detected; falling back to CPU variant." >&2
+            warn "Install the toolkit before enabling SurfSense GPU acceleration." >&2
+            printf 'cpu'
+            return 0
+        fi
+        printf '%s' "$REQUESTED_VARIANT"
        return 0
    fi
-    warn "Detected pre-existing 'surfsense-zero-cache' volume from an install that"
-    warn "predates the migrations-service fix. It may contain a half-initialized"
-    warn "SQLite replica that would block zero-cache from starting."
-    warn "The volume will be removed in 5 seconds; press Ctrl+C to cancel."
-    sleep 5

-    (cd "${INSTALL_DIR}" && ${DC} down --remove-orphans 2>/dev/null) || true
-    docker volume rm surfsense-zero-cache 2>/dev/null || true
-    success "Removed surfsense-zero-cache volume; zero-cache will re-sync on next start."
+    if $has_gpu && ! $has_runtime; then
+        warn "NVIDIA GPU detected, but NVIDIA Container Toolkit was not detected; using CPU variant." >&2
+    fi
+
+    if $has_gpu && $has_runtime && ! $QUIET && [[ -r /dev/tty && -w /dev/tty ]]; then
+        local choice
+        echo "" > /dev/tty
+        printf "${BOLD}${CYAN}SurfSense detected an NVIDIA GPU.${NC}\n" > /dev/tty
+        printf "Use GPU acceleration? [Y/n]: " > /dev/tty
+        read -r choice < /dev/tty || choice=""
+        case "$choice" in
+            "") printf '%s' "$detected_variant" ;;
+            [Yy]|[Yy][Ee][Ss]) printf '%s' "$detected_variant" ;;
+            [Nn]|[Nn][Oo]) printf 'cpu' ;;
+            *) warn "Unrecognized choice '${choice}', using CPU variant." >&2; printf 'cpu' ;;
+        esac
+        return 0
+    fi
+
+    printf 'cpu'
 }

+apply_variant_env() {
+    local env_file="$1"
+    local variant="$2"
+    local allow_existing_update="$3"
+
+    if [[ -f "$env_file" && "$allow_existing_update" != "true" ]]; then
+        warn ".env already exists — keeping your existing configuration."
+        info "To change variants later, edit SURFSENSE_VARIANT and COMPOSE_FILE in ${env_file}, then run ${DC} up -d --wait."
+        return 0
+    fi
+
+    if [[ "$variant" == "cpu" ]]; then
+        set_env_value "$env_file" "SURFSENSE_VARIANT" ""
+        remove_env_value "$env_file" "COMPOSE_FILE"
+        remove_env_value "$env_file" "SURFSENSE_GPU_COUNT"
+    else
+        set_env_value "$env_file" "SURFSENSE_VARIANT" "$variant"
+        set_env_value "$env_file" "COMPOSE_FILE" "docker-compose.yml:docker-compose.gpu.yml"
+        if [[ -n "$GPU_COUNT" ]]; then
+            set_env_value "$env_file" "SURFSENSE_GPU_COUNT" "$GPU_COUNT"
+        fi
+    fi
+
+    remove_env_value "$env_file" "COMPOSE_PROFILES"
+}
+
+SELECTED_VARIANT=$(resolve_variant)
+
 # ── Download files ───────────────────────────────────────────────────────────

 step "Downloading SurfSense files"
@ -263,6 +336,7 @@ mkdir -p "${INSTALL_DIR}/searxng"

 FILES=(
    "docker/docker-compose.yml:docker-compose.yml"
+    "docker/docker-compose.gpu.yml:docker-compose.gpu.yml"
    "docker/.env.example:.env.example"
    "docker/postgresql.conf:postgresql.conf"
    "docker/scripts/migrate-database.sh:scripts/migrate-database.sh"
@ -336,15 +410,19 @@ if [ ! -f "${INSTALL_DIR}/.env" ]; then
    else
        sed -i "s|SECRET_KEY=replace_me_with_a_random_string|SECRET_KEY=${SECRET_KEY}|" "${INSTALL_DIR}/.env"
    fi
+    apply_variant_env "${INSTALL_DIR}/.env" "$SELECTED_VARIANT" "false"
    info "Created ${INSTALL_DIR}/.env"
 else
-    warn ".env already exists — keeping your existing configuration."
+    if $VARIANT_EXPLICIT; then
+        apply_variant_env "${INSTALL_DIR}/.env" "$SELECTED_VARIANT" "true"
+        info "Updated SurfSense image variant in existing ${INSTALL_DIR}/.env"
+    else
+        apply_variant_env "${INSTALL_DIR}/.env" "$SELECTED_VARIANT" "false"
+    fi
 fi

 # ── Start containers ─────────────────────────────────────────────────────────

-invoke_stale_zero_cache_cleanup
-
 if $MIGRATION_MODE; then
    # Read DB credentials from .env (fall back to defaults from docker-compose.yml)
    DB_USER=$(grep '^DB_USER=' "${INSTALL_DIR}/.env" 2>/dev/null | cut -d= -f2 | tr -d '"' | head -1 || true)
@ -401,26 +479,20 @@ if $MIGRATION_MODE; then
    fi

    step "Starting all SurfSense services"
-    (cd "${INSTALL_DIR}" && ${DC} up -d) < /dev/null
-    success "All containers started; waiting for stack to become healthy..."
-
-    if ! wait_stack_healthy 300; then
+    if ! compose_up_wait; then
        stack_failure_report
    fi
-    success "All services healthy."
+    success "All services started and healthy."

    # Key file is no longer needed — SECRET_KEY is now in .env
    rm -f "${KEY_FILE}"

 else
    step "Starting SurfSense"
-    (cd "${INSTALL_DIR}" && ${DC} up -d) < /dev/null
-    success "All containers started; waiting for stack to become healthy..."
-
-    if ! wait_stack_healthy 300; then
+    if ! compose_up_wait; then
        stack_failure_report
    fi
-    success "All services healthy."
+    success "All services started and healthy."
 fi

 # ── Watchtower (auto-update) ─────────────────────────────────────────────────
@ -445,7 +517,7 @@ if $SETUP_WATCHTOWER; then
            --label-enable \
            --interval "${WATCHTOWER_INTERVAL}" >/dev/null 2>&1 < /dev/null \
            && success "Watchtower started — labeled SurfSense containers will auto-update." \
-            || warn "Could not start Watchtower. You can set it up manually or use: docker compose pull && docker compose up -d"
+            || warn "Could not start Watchtower. You can set it up manually or use: docker compose pull && docker compose up -d --wait"
    fi
 else
    info "Skipping Watchtower setup (--no-watchtower flag)."
@ -454,38 +526,25 @@ fi
 # ── Done ─────────────────────────────────────────────────────────────────────

 echo ""
-printf '\033[1;37m'
-cat << 'EOF'
-
-
- .d8888b.                    .d888 .d8888b.                                      
-d88P  Y88b                  d88P" d88P  Y88b                                     
-Y88b.                       888   Y88b.                                          
- "Y888b.   888  888 888d888 888888 "Y888b.    .d88b.  88888b.  .d8888b   .d88b.  
-    "Y88b. 888  888 888P"   888       "Y88b. d8P  Y8b 888 "88b 88K      d8P  Y8b 
-      "888 888  888 888     888         "888 88888888 888  888 "Y8888b. 88888888 
-Y88b  d88P Y88b 888 888     888   Y88b  d88P Y8b.     888  888      X88 Y8b.     
- "Y8888P"   "Y88888 888     888    "Y8888P"   "Y8888  888  888  88888P'  "Y8888  
-
-
-EOF
 _version_display=$(grep '^SURFSENSE_VERSION=' "${INSTALL_DIR}/.env" 2>/dev/null | cut -d= -f2 | tr -d '"' | head -1 || true)
 _version_display="${_version_display:-latest}"
-printf "         OSS Alternative to NotebookLM for Teams  ${YELLOW}[%s]${NC}\n" "${_version_display}"
-printf "${CYAN}══════════════════════════════════════════════════════════════${NC}\n\n"
+_variant_display=$(grep '^SURFSENSE_VARIANT=' "${INSTALL_DIR}/.env" 2>/dev/null | cut -d= -f2 | tr -d '"' | head -1 || true)
+_variant_display="${_variant_display:-cpu}"
+step "SurfSense is now installed [${_version_display}]"

 info "  Frontend:  http://localhost:3929"
 info "  Backend:   http://localhost:8929"
 info "  API Docs:  http://localhost:8929/docs"
 info ""
 info "  Config:    ${INSTALL_DIR}/.env"
+info "  Variant:   ${_variant_display}"
 info "  Logs:      cd ${INSTALL_DIR} && ${DC} logs -f"
 info "  Stop:      cd ${INSTALL_DIR} && ${DC} down"
-info "  Update:    cd ${INSTALL_DIR} && ${DC} pull && ${DC} up -d"
+info "  Update:    cd ${INSTALL_DIR} && ${DC} pull && ${DC} up -d --wait"
 info ""

 if $SETUP_WATCHTOWER; then
-    info "  Watchtower: auto-updates every $((WATCHTOWER_INTERVAL / 3600))h (stop: docker rm -f ${WATCHTOWER_CONTAINER})"
+    info "  Watchtower: auto-updates every $((WATCHTOWER_INTERVAL / 3600))h (disable: docker rm -f ${WATCHTOWER_CONTAINER})"
 else
    warn "  Watchtower skipped. For auto-updates, re-run without --no-watchtower."
 fi
--- a/docs/chinese-llm-setup.md
+++ b/docs/chinese-llm-setup.md
@ -212,9 +212,9 @@ API Base URL: https://open.bigmodel.cn/api/paas/v4

 | 字段 | 值 | 说明 |
 |------|-----|------|
-| **Configuration Name** | `MiniMax M2.5` | 配置名称（自定义） |
+| **Configuration Name** | `MiniMax M3` | 配置名称（自定义） |
 | **Provider** | `MINIMAX` | 选择 MiniMax |
-| **Model Name** | `MiniMax-M2.5` | 推荐模型<br>其他选项: `MiniMax-M2.5-highspeed` |
+| **Model Name** | `MiniMax-M3` | 推荐模型<br>其他选项: `MiniMax-M2.7`、`MiniMax-M2.7-highspeed` |
 | **API Key** | `eyJ...` | 你的 MiniMax API Key |
 | **API Base URL** | `https://api.minimax.io/v1` | MiniMax API 地址 |
 | **Parameters** | `{"temperature": 1.0}` | 注意：temperature 必须在 (0.0, 1.0] 范围内，不能为 0 |
@ -222,22 +222,23 @@ API Base URL: https://open.bigmodel.cn/api/paas/v4
 ### 示例配置

 ```
-Configuration Name: MiniMax M2.5
+Configuration Name: MiniMax M3
 Provider: MINIMAX
-Model Name: MiniMax-M2.5
+Model Name: MiniMax-M3
 API Key: eyJxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
 API Base URL: https://api.minimax.io/v1
 ```

 ### 可用模型

- **MiniMax-M2.5**: 高性能通用模型，204K 上下文窗口（推荐）
- **MiniMax-M2.5-highspeed**: 高速推理版本，204K 上下文窗口
+- **MiniMax-M3**: 旗舰模型，512K 上下文窗口（推荐）
+- **MiniMax-M2.7**: 上一代通用模型，204K 上下文窗口
+- **MiniMax-M2.7-highspeed**: 上一代高速推理版本，204K 上下文窗口

 ### 注意事项

 - **temperature 参数**: MiniMax 要求 temperature 必须在 (0.0, 1.0] 范围内，不能设置为 0。建议使用 1.0。
- 两个模型都支持 204K 超长上下文窗口，适合处理长文本任务。
+- M3 支持 512K 超长上下文，M2.7 系列保留 204K，适合按需求选择。

 ### 定价
 - 请访问 [MiniMax 定价页面](https://platform.minimaxi.com/document/Price) 查看最新价格
@ -315,8 +316,8 @@ docker compose logs backend | grep -i "error"
 |---------|---------|------|
 | **文档摘要** | Qwen-Plus, GLM-4 | 平衡性能和成本 |
 | **代码分析** | DeepSeek-Coder | 代码专用 |
-| **长文本处理** | Kimi 128K, MiniMax-M2.5 (204K) | 超长上下文 |
-| **快速响应** | Qwen-Turbo, GLM-4-Flash, MiniMax-M2.5-highspeed | 速度优先 |
+| **长文本处理** | Kimi 128K, MiniMax-M3 (512K) | 超长上下文 |
+| **快速响应** | Qwen-Turbo, GLM-4-Flash, MiniMax-M2.7-highspeed | 速度优先 |

 ### 2. 成本优化

--- a/surfsense_backend/.env.example
+++ b/surfsense_backend/.env.example
@ -3,18 +3,46 @@ DATABASE_URL=postgresql+asyncpg://postgres:postgres@localhost:5432/surfsense
 # Deployment environment: dev or production
 SURFSENSE_ENV=dev

-#Celery Config
-CELERY_BROKER_URL=redis://localhost:6379/0
-CELERY_RESULT_BACKEND=redis://localhost:6379/0
+# Redis (single endpoint for Celery broker/result backend + app features)
+REDIS_URL=redis://localhost:6379/0
+# Optional: override individually only to split Redis across instances.
+# Each defaults to REDIS_URL when unset.
+# CELERY_BROKER_URL=redis://localhost:6379/0
+# CELERY_RESULT_BACKEND=redis://localhost:6379/0
+# REDIS_APP_URL=redis://localhost:6379/0
 # Optional: isolate queues when sharing Redis with other apps
 CELERY_TASK_DEFAULT_QUEUE=surfsense
-
-# Redis for app-level features (heartbeats, podcast markers)
-# Defaults to CELERY_BROKER_URL when not set
-REDIS_APP_URL=redis://localhost:6379/0
 # Optional: TTL in seconds for connector indexing lock key
 # CONNECTOR_INDEXING_LOCK_TTL_SECONDS=28800

+# Messaging Gateway (global)
+# GATEWAY_ENABLED: master switch for ALL messaging gateway channels (Telegram, WhatsApp,
+# Slack, Discord). When FALSE, no gateway background workers/supervisors start and all
+# gateway HTTP routes (webhooks, OAuth callbacks, pairing) return 404. Set per-channel
+# flags below to control individual platforms once the gateway is enabled.
+GATEWAY_ENABLED=TRUE
+
+# Telegram Gateway
+# TELEGRAM_WEBHOOK_SECRET must be 1-256 chars and contain only A-Z, a-z, 0-9, _ or -
+# GATEWAY_TELEGRAM_INTAKE_MODE: `webhook` for production, `longpoll` for single-replica self-host fallback, `disabled` to skip Telegram intake
+TELEGRAM_SHARED_BOT_TOKEN=
+TELEGRAM_SHARED_BOT_USERNAME=
+TELEGRAM_WEBHOOK_SECRET=
+GATEWAY_BASE_URL=http://localhost:8000
+GATEWAY_TELEGRAM_INTAKE_MODE=webhook
+
+# WhatsApp Gateway
+# GATEWAY_WHATSAPP_INTAKE_MODE: `cloud` for Meta Cloud API, `baileys` for self-hosted bridge, `disabled` to skip WhatsApp intake
+GATEWAY_WHATSAPP_INTAKE_MODE=disabled
+WHATSAPP_SHARED_BUSINESS_TOKEN=
+WHATSAPP_SHARED_PHONE_NUMBER_ID=
+WHATSAPP_SHARED_DISPLAY_PHONE_NUMBER=
+WHATSAPP_SHARED_WABA_ID=
+WHATSAPP_GRAPH_API_VERSION=v25.0
+WHATSAPP_WEBHOOK_VERIFY_TOKEN=
+WHATSAPP_WEBHOOK_APP_SECRET=
+WHATSAPP_BRIDGE_URL=http://whatsapp-bridge:9929
+
 # Platform Web Search (SearXNG)
 # Set this to enable built-in web search. Docker Compose sets it automatically.
 # Only uncomment if running the backend outside Docker (e.g. uvicorn on host).
@ -64,8 +92,6 @@ STRIPE_PAGE_BUYING_ENABLED=TRUE
 STRIPE_TOKEN_BUYING_ENABLED=FALSE
 STRIPE_PREMIUM_TOKEN_PRICE_ID=price_...
 STRIPE_CREDIT_MICROS_PER_UNIT=1000000
-# DEPRECATED — use STRIPE_CREDIT_MICROS_PER_UNIT (1:1 numerical mapping):
-# STRIPE_TOKENS_PER_UNIT=1000000

 # Periodic Stripe safety net for purchases left in PENDING (minutes old)
 STRIPE_RECONCILIATION_LOOKBACK_MINUTES=10
@ -98,11 +124,14 @@ CLICKUP_CLIENT_ID=your_clickup_client_id_here
 CLICKUP_CLIENT_SECRET=your_clickup_client_secret_here
 CLICKUP_REDIRECT_URI=http://localhost:8000/api/v1/auth/clickup/connector/callback

-# Discord OAuth Configuration
+# Discord OAuth / Gateway Configuration
+# The Discord connector and Discord gateway use the same Discord application/bot.
 DISCORD_CLIENT_ID=your_discord_client_id_here
 DISCORD_CLIENT_SECRET=your_discord_client_secret_here
 DISCORD_REDIRECT_URI=http://localhost:8000/api/v1/auth/discord/connector/callback
 DISCORD_BOT_TOKEN=your_bot_token_from_developer_portal
+GATEWAY_DISCORD_ENABLED=FALSE
+GATEWAY_DISCORD_REDIRECT_URI=http://localhost:8000/api/v1/gateway/discord/callback

 # Atlassian OAuth Configuration (Jira & Confluence)
 ATLASSIAN_CLIENT_ID=your_atlassian_client_id_here
@ -120,10 +149,14 @@ NOTION_CLIENT_ID=your_notion_client_id_here
 NOTION_CLIENT_SECRET=your_notion_client_secret_here
 NOTION_REDIRECT_URI=http://localhost:8000/api/v1/auth/notion/connector/callback

-# Slack OAuth Configuration
+# Slack OAuth / Gateway Configuration
+# The Slack connector and Slack gateway can use the same Slack app client ID/secret.
 SLACK_CLIENT_ID=your_slack_client_id_here
 SLACK_CLIENT_SECRET=your_slack_client_secret_here
 SLACK_REDIRECT_URI=http://localhost:8000/api/v1/auth/slack/connector/callback
+GATEWAY_SLACK_ENABLED=FALSE
+GATEWAY_SLACK_SIGNING_SECRET=your_slack_signing_secret_here
+GATEWAY_SLACK_REDIRECT_URI=http://localhost:8000/api/v1/gateway/slack/callback

 # Microsoft OAuth (Teams & OneDrive)
 MICROSOFT_CLIENT_ID=your_microsoft_client_id_here
@ -197,8 +230,6 @@ PAGES_LIMIT=500
 # models bill proportionally. Applies only to models with
 # billing_tier=premium in global_llm_config.yaml.
 PREMIUM_CREDIT_MICROS_LIMIT=5000000
-# DEPRECATED — use PREMIUM_CREDIT_MICROS_LIMIT (1:1 numerical mapping):
-# PREMIUM_TOKEN_LIMIT=5000000

 # Safety ceiling on per-call premium reservation, in micro-USD.
 # stream_new_chat estimates an upper-bound cost from the model's
@ -246,17 +277,19 @@ TURNSTILE_ENABLED=FALSE
 TURNSTILE_SECRET_KEY=


+# Proxy provider selection. Selects a ProxyProvider implementation registered in
+# app/utils/proxy/registry.py. Default: "anonymous_proxies". Add new vendors there.
+# PROXY_PROVIDER=anonymous_proxies
+
 # Residential Proxy Configuration (anonymous-proxies.net)
 # Used for web crawling, link previews, and YouTube transcript fetching to avoid IP bans.
-# Leave commented out to disable proxying.
+# Consumed by the "anonymous_proxies" provider. Leave commented out to disable proxying.
 # RESIDENTIAL_PROXY_USERNAME=your_proxy_username
 # RESIDENTIAL_PROXY_PASSWORD=your_proxy_password
 # RESIDENTIAL_PROXY_HOSTNAME=rotating.dnsproxifier.com:31230
 # RESIDENTIAL_PROXY_LOCATION=
 # RESIDENTIAL_PROXY_TYPE=1

-FIRECRAWL_API_KEY=fcr-01J0000000000000000000000
-
 # File Parser Service
 ETL_SERVICE=UNSTRUCTURED or LLAMACLOUD or DOCLING
 UNSTRUCTURED_API_KEY=Tpu3P0U8iy
@ -265,6 +298,16 @@ LLAMA_CLOUD_API_KEY=llx-nnn
 # AZURE_DI_ENDPOINT=https://your-resource.cognitiveservices.azure.com/
 # AZURE_DI_KEY=your-key

+# Original File Storage
+# Where to persist the original bytes of uploaded documents (for download today,
+# redaction / form-filling later). "local" needs no cloud creds and is the dev default.
+FILE_STORAGE_BACKEND=local
+# Local backend: directory for stored files (defaults to surfsense_backend/.local_object_store)
+# FILE_STORAGE_LOCAL_PATH=/var/lib/surfsense/object-store
+# Azure Blob backend (set FILE_STORAGE_BACKEND=azure):
+# AZURE_STORAGE_CONNECTION_STRING=DefaultEndpointsProtocol=https;AccountName=...;AccountKey=...;EndpointSuffix=core.windows.net
+# AZURE_STORAGE_CONTAINER=surfsense-documents
+
 # Daytona Sandbox (isolated code execution)
 # DAYTONA_SANDBOX_ENABLED=FALSE
 # DAYTONA_API_KEY=your-daytona-api-key
@ -285,9 +328,6 @@ LANGSMITH_PROJECT=surfsense
 # =============================================================================
 # OPTIONAL: New-chat agent feature flags
 # =============================================================================
-# Multi-agent orchestrator switch for authenticated chat streaming.
-# MULTI_AGENT_CHAT_ENABLED=false
-
 # Master kill-switch — when true, every flag below is forced OFF.
 # SURFSENSE_DISABLE_NEW_AGENT_STACK=false

@ -322,6 +362,13 @@ LANGSMITH_PROJECT=surfsense
 # SURFSENSE_ENABLE_SPECIALIZED_SUBAGENTS=false
 # SURFSENSE_ENABLE_KB_PLANNER_RUNNABLE=false

+# KB retrieval mode (default OFF = lazy). When OFF, the main agent retrieves
+# KB content on demand via the `search_knowledge_base` tool and skips the
+# expensive per-turn pre-injection (planner LLM + embed + hybrid search,
+# ~2.3s); explicit @-mentions are still surfaced cheaply. Set to true to
+# restore the original eager `<priority_documents>` pre-injection.
+# SURFSENSE_ENABLE_KB_PRIORITY_PREINJECTION=false
+
 # Snapshot / revert
 # SURFSENSE_ENABLE_ACTION_LOG=false
 # SURFSENSE_ENABLE_REVERT_ROUTE=false        # Backend-only; flip when UI ships
@ -342,6 +389,15 @@ LANGSMITH_PROJECT=surfsense
 # rollback if you suspect cache-related staleness.
 # SURFSENSE_ENABLE_AGENT_CACHE=true

+# Cross-thread reuse (default ON). Drops thread_id from the cache key so a
+# returning user's NEW chats (same user + search space + config + visibility)
+# hit the already-compiled graph instead of paying a fresh ~4-5s compile —
+# turning a cold first turn into a warm one. Safe because ActionLog,
+# KB-persistence, and the deliverables tools now resolve the chat thread from
+# the live RunnableConfig at call time rather than a build-time closure. Flip
+# OFF to fall back to a per-thread cache key (instant rollback).
+# SURFSENSE_ENABLE_CROSS_THREAD_AGENT_CACHE=true
+
 # Cache capacity (max number of compiled-agent entries kept in memory)
 # and TTL per entry (seconds). Working set is typically one entry per
 # active thread on this replica; tune up for very large deployments.
--- a/surfsense_backend/.gitignore
+++ b/surfsense_backend/.gitignore
@ -2,6 +2,7 @@
 .venv
 venv/
 data/
+.local_object_store/
 __pycache__/
 .flashrank_cache
 surf_new_backend.egg-info/
--- a/surfsense_backend/Dockerfile
+++ b/surfsense_backend/Dockerfile
@ -1,3 +1,4 @@
+# syntax=docker.io/docker/dockerfile:1
 # =============================================================================
 # SurfSense Backend — Multi-stage Dockerfile
 # =============================================================================
@ -61,15 +62,25 @@ COPY pyproject.toml uv.lock ./
 # Exporting the lock to requirements.txt and feeding it to `uv pip install`
 # pins every transitive package to the exact version captured in uv.lock.
 #
-# Note on torch/CUDA: we do NOT install torch from a separate cu* index here.
-# PyPI's torch wheels for Linux x86_64 already ship CUDA-enabled and pull
-# nvidia-cudnn-cu13, nvidia-nccl-cu13, triton, etc. as install deps (all
-# captured in uv.lock). If a specific CUDA version is needed, wire it through
-# [tool.uv.sources] in pyproject.toml so the lock stays the source of truth.
+# Note on torch/CUDA: the export must always select either the cpu or CUDA
+# extra declared in pyproject.toml. A no-extra export would resolve torch from
+# PyPI on Linux, which currently pulls CUDA-enabled wheels and nvidia-* deps.
+# Keep CUDA version selection in [tool.uv.sources] so uv.lock remains the
+# source of truth. The install step also needs the matching PyTorch index,
+# because requirements.txt preserves the +cpu/+cu wheel pins but not uv's
+# package source metadata.
+ARG USE_CUDA=false
+ARG CUDA_EXTRA=cu128
 RUN pip install --no-cache-dir uv && \
+    if [ "$USE_CUDA" = "true" ]; then EXTRA="$CUDA_EXTRA"; else EXTRA="cpu"; fi && \
+    TORCH_INDEX="https://download.pytorch.org/whl/${EXTRA}" && \
    uv export --frozen --no-dev --no-hashes --no-emit-project \
+        --extra "$EXTRA" \
        --format requirements-txt -o /tmp/requirements.txt && \
-    uv pip install --system --no-cache-dir -r /tmp/requirements.txt && \
+    uv pip install --system --no-cache-dir \
+        --index "$TORCH_INDEX" \
+        --index-strategy unsafe-best-match \
+        -r /tmp/requirements.txt && \
    rm /tmp/requirements.txt


@ -94,10 +105,14 @@ RUN printf '%s\n' \
    | python || true

 ARG EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2
-RUN python -c "from chonkie import AutoEmbeddings; AutoEmbeddings.get_embeddings('${EMBEDDING_MODEL}')"
+RUN --mount=type=secret,id=HF_TOKEN \
+    HF_TOKEN="$(cat /run/secrets/HF_TOKEN 2>/dev/null || true)" \
+    python -c "from chonkie import AutoEmbeddings; AutoEmbeddings.get_embeddings('${EMBEDDING_MODEL}')"

-# Install Playwright browsers (the playwright python package itself is in deps)
-RUN playwright install chromium --with-deps
+# Install Scrapling's browser engines (patchright Chromium + Camoufox).
+# Scrapling pulls playwright/patchright via the `fetchers` extra; `scrapling install`
+# downloads the matching browser binaries used by DynamicFetcher/StealthyFetcher.
+RUN scrapling install

 # Shared temp directory for file uploads between API and Worker containers.
 # Python's tempfile module uses TMPDIR, so uploaded files land here.
--- a/surfsense_backend/alembic/env.py
+++ b/surfsense_backend/alembic/env.py
@ -3,6 +3,7 @@ import os
 import sys
 from logging.config import fileConfig

+import sqlalchemy as sa
 from sqlalchemy import pool
 from sqlalchemy.engine import Connection
 from sqlalchemy.ext.asyncio import async_engine_from_config
@ -36,6 +37,9 @@ if config.config_file_name is not None:
 # target_metadata = mymodel.Base.metadata
 target_metadata = Base.metadata

+MIGRATION_ADVISORY_LOCK_NAMESPACE = "surfsense"
+MIGRATION_ADVISORY_LOCK_NAME = "alembic_migrations"
+
 # other values from the config, defined by the needs of env.py,
 # can be acquired:
 # my_important_option = config.get_main_option("my_important_option")
@ -73,8 +77,22 @@ def do_run_migrations(connection: Connection) -> None:
        transaction_per_migration=True,
    )

-    with context.begin_transaction():
-        context.run_migrations()
+    lock_params = {
+        "namespace": MIGRATION_ADVISORY_LOCK_NAMESPACE,
+        "name": MIGRATION_ADVISORY_LOCK_NAME,
+    }
+    connection.execute(
+        sa.text("SELECT pg_advisory_lock(hashtext(:namespace), hashtext(:name))"),
+        lock_params,
+    )
+    try:
+        with context.begin_transaction():
+            context.run_migrations()
+    finally:
+        connection.execute(
+            sa.text("SELECT pg_advisory_unlock(hashtext(:namespace), hashtext(:name))"),
+            lock_params,
+        )


 async def run_async_migrations() -> None:
--- a/surfsense_backend/alembic/versions/143_force_zero_publication_resync.py
+++ b/surfsense_backend/alembic/versions/143_force_zero_publication_resync.py
@ -47,7 +47,6 @@ depends_on: str | Sequence[str] | None = None

 PUBLICATION_NAME = "zero_publication"

-# Must stay in sync with the column lists in migrations 117 / 139 / 140.
 DOCUMENT_COLS = [
    "id",
    "title",
--- a/surfsense_backend/alembic/versions/148_add_automation_runs_to_zero_publication.py
+++ b/surfsense_backend/alembic/versions/148_add_automation_runs_to_zero_publication.py
@ -0,0 +1,175 @@
+"""add automation_runs to zero_publication with thin column list
+
+Publishes ``automation_runs`` so the dashboard can replace polling with a
+live run status + per-step ticker. Only the columns the list and ticker
+read are exposed (``id, automation_id, trigger_id, status, step_results,
+started_at, finished_at, created_at``); heavy JSONB
+(``definition_snapshot``, ``inputs``, ``output``, ``artifacts``, ``error``)
+stays on REST and is fetched lazily on detail expand.
+
+Uses the canonical ``ALTER PUBLICATION ... SET TABLE`` + ``COMMENT``
+bookend pattern (see migration 143) -- the shape Zero ``>=1.0`` requires
+to fire its schema-change hook. Existing tables are re-emitted unchanged.
+
+Revision ID: 148
+Revises: 147
+"""
+
+from collections.abc import Sequence
+
+import sqlalchemy as sa
+
+from alembic import op
+
+revision: str = "148"
+down_revision: str | None = "147"
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
+
+PUBLICATION_NAME = "zero_publication"
+
+# Mirrors migration 143. Kept in sync explicitly: any change to these lists
+# must be re-emitted in a new resync migration with COMMENT bookends.
+DOCUMENT_COLS = [
+    "id",
+    "title",
+    "document_type",
+    "search_space_id",
+    "folder_id",
+    "created_by_id",
+    "status",
+    "created_at",
+    "updated_at",
+]
+
+USER_COLS = [
+    "id",
+    "pages_limit",
+    "pages_used",
+    "premium_credit_micros_limit",
+    "premium_credit_micros_used",
+]
+
+# Thin set: status + lightweight progress only. Heavy JSONB stays on REST.
+AUTOMATION_RUN_COLS = [
+    "id",
+    "automation_id",
+    "trigger_id",
+    "status",
+    "step_results",
+    "started_at",
+    "finished_at",
+    "created_at",
+]
+
+
+def _has_zero_version(conn, table: str) -> bool:
+    return (
+        conn.execute(
+            sa.text(
+                "SELECT 1 FROM information_schema.columns "
+                "WHERE table_name = :tbl AND column_name = '_0_version'"
+            ),
+            {"tbl": table},
+        ).fetchone()
+        is not None
+    )
+
+
+def _build_set_table_ddl(
+    *, documents_has_zero_ver: bool, user_has_zero_ver: bool
+) -> str:
+    doc_cols = DOCUMENT_COLS + (['"_0_version"'] if documents_has_zero_ver else [])
+    user_cols = USER_COLS + (['"_0_version"'] if user_has_zero_ver else [])
+    doc_col_list = ", ".join(doc_cols)
+    user_col_list = ", ".join(user_cols)
+    run_col_list = ", ".join(AUTOMATION_RUN_COLS)
+    return (
+        f"ALTER PUBLICATION {PUBLICATION_NAME} SET TABLE "
+        f"notifications, "
+        f"documents ({doc_col_list}), "
+        f"folders, "
+        f"search_source_connectors, "
+        f"new_chat_messages, "
+        f"chat_comments, "
+        f"chat_session_state, "
+        f'"user" ({user_col_list}), '
+        f"automation_runs ({run_col_list})"
+    )
+
+
+def upgrade() -> None:
+    conn = op.get_bind()
+
+    exists = conn.execute(
+        sa.text("SELECT 1 FROM pg_publication WHERE pubname = :name"),
+        {"name": PUBLICATION_NAME},
+    ).fetchone()
+    if not exists:
+        return
+
+    documents_has_zero_ver = _has_zero_version(conn, "documents")
+    user_has_zero_ver = _has_zero_version(conn, "user")
+
+    # COMMENT-ALTER-COMMENT trio must be one transaction so Zero observes
+    # them as one schema-change event. Matches the SAVEPOINT pattern used
+    # in migrations 117 / 139 / 140 / 143.
+    tx = conn.begin_nested() if conn.in_transaction() else conn.begin()
+    with tx:
+        conn.execute(
+            sa.text(f"COMMENT ON PUBLICATION {PUBLICATION_NAME} IS 'pre-148-resync'")
+        )
+        conn.execute(
+            sa.text(
+                _build_set_table_ddl(
+                    documents_has_zero_ver=documents_has_zero_ver,
+                    user_has_zero_ver=user_has_zero_ver,
+                )
+            )
+        )
+        conn.execute(
+            sa.text(f"COMMENT ON PUBLICATION {PUBLICATION_NAME} IS 'post-148-resync'")
+        )
+
+
+def downgrade() -> None:
+    """Re-emit migration 143's shape (no automation_runs)."""
+    conn = op.get_bind()
+
+    exists = conn.execute(
+        sa.text("SELECT 1 FROM pg_publication WHERE pubname = :name"),
+        {"name": PUBLICATION_NAME},
+    ).fetchone()
+    if not exists:
+        return
+
+    documents_has_zero_ver = _has_zero_version(conn, "documents")
+    user_has_zero_ver = _has_zero_version(conn, "user")
+
+    doc_cols = DOCUMENT_COLS + (['"_0_version"'] if documents_has_zero_ver else [])
+    user_cols = USER_COLS + (['"_0_version"'] if user_has_zero_ver else [])
+    doc_col_list = ", ".join(doc_cols)
+    user_col_list = ", ".join(user_cols)
+    ddl = (
+        f"ALTER PUBLICATION {PUBLICATION_NAME} SET TABLE "
+        f"notifications, "
+        f"documents ({doc_col_list}), "
+        f"folders, "
+        f"search_source_connectors, "
+        f"new_chat_messages, "
+        f"chat_comments, "
+        f"chat_session_state, "
+        f'"user" ({user_col_list})'
+    )
+
+    tx = conn.begin_nested() if conn.in_transaction() else conn.begin()
+    with tx:
+        conn.execute(
+            sa.text(f"COMMENT ON PUBLICATION {PUBLICATION_NAME} IS 'pre-148-downgrade'")
+        )
+        conn.execute(sa.text(ddl))
+        conn.execute(
+            sa.text(
+                f"COMMENT ON PUBLICATION {PUBLICATION_NAME} IS 'post-148-downgrade'"
+            )
+        )
--- a/surfsense_backend/alembic/versions/149_add_gateway_tables.py
+++ b/surfsense_backend/alembic/versions/149_add_gateway_tables.py
@ -0,0 +1,667 @@
+"""add external chat surface tables
+
+Revision ID: 149
+Revises: 148
+Create Date: 2026-05-27
+
+Adds the lean external chat surface schema:
+
+* external_chat_accounts
+* external_chat_bindings
+* external_chat_inbound_events
+
+External chat surfaces store Telegram-originated conversations in the existing
+chat tables. This migration adds ``source`` to ``new_chat_threads`` and
+``new_chat_messages`` as UI metadata while publishing all chat-message sources
+through Zero so a future SurfSense UI layer can render external chats. External
+chat adapter tables are served through REST in v1, so they are intentionally not
+added to ``zero_publication``.
+"""
+
+from __future__ import annotations
+
+from collections.abc import Sequence
+
+import sqlalchemy as sa
+from sqlalchemy.dialects import postgresql
+
+from alembic import op
+
+revision: str = "149"
+down_revision: str | None = "148"
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
+
+PUBLICATION_NAME = "zero_publication"
+
+DOCUMENT_COLS = [
+    "id",
+    "title",
+    "document_type",
+    "search_space_id",
+    "folder_id",
+    "created_by_id",
+    "status",
+    "created_at",
+    "updated_at",
+]
+
+USER_COLS = [
+    "id",
+    "pages_limit",
+    "pages_used",
+    "premium_credit_micros_limit",
+    "premium_credit_micros_used",
+]
+
+AUTOMATION_RUN_COLS = [
+    "id",
+    "automation_id",
+    "trigger_id",
+    "status",
+    "step_results",
+    "started_at",
+    "finished_at",
+    "created_at",
+]
+
+
+def _has_zero_version(conn, table: str) -> bool:
+    return (
+        conn.execute(
+            sa.text(
+                "SELECT 1 FROM information_schema.columns "
+                "WHERE table_name = :tbl AND column_name = '_0_version'"
+            ),
+            {"tbl": table},
+        ).fetchone()
+        is not None
+    )
+
+
+def _cols(columns: list[str]) -> str:
+    return ", ".join(columns)
+
+
+def _table_exists(conn, table: str) -> bool:
+    return (
+        conn.execute(
+            sa.text(
+                "SELECT 1 FROM information_schema.tables "
+                "WHERE table_schema = current_schema() AND table_name = :tbl"
+            ),
+            {"tbl": table},
+        ).fetchone()
+        is not None
+    )
+
+
+def _column_exists(conn, table: str, column: str) -> bool:
+    return (
+        conn.execute(
+            sa.text(
+                "SELECT 1 FROM information_schema.columns "
+                "WHERE table_schema = current_schema() "
+                "AND table_name = :tbl AND column_name = :col"
+            ),
+            {"tbl": table, "col": column},
+        ).fetchone()
+        is not None
+    )
+
+
+def _index_exists(conn, index_name: str) -> bool:
+    return (
+        conn.execute(
+            sa.text(
+                "SELECT 1 FROM pg_indexes "
+                "WHERE schemaname = current_schema() AND indexname = :name"
+            ),
+            {"name": index_name},
+        ).fetchone()
+        is not None
+    )
+
+
+def _constraint_exists(conn, table: str, constraint_name: str) -> bool:
+    return (
+        conn.execute(
+            sa.text(
+                "SELECT 1 FROM information_schema.table_constraints "
+                "WHERE table_schema = current_schema() "
+                "AND table_name = :tbl AND constraint_name = :name"
+            ),
+            {"tbl": table, "name": constraint_name},
+        ).fetchone()
+        is not None
+    )
+
+
+def _drop_index_if_exists(index_name: str, table_name: str) -> None:
+    if _index_exists(op.get_bind(), index_name):
+        op.drop_index(index_name, table_name=table_name)
+
+
+def _drop_column_if_exists(table_name: str, column_name: str) -> None:
+    if _column_exists(op.get_bind(), table_name, column_name):
+        op.drop_column(table_name, column_name)
+
+
+def _build_set_table_ddl(
+    *, documents_has_zero_ver: bool, user_has_zero_ver: bool
+) -> str:
+    doc_cols = DOCUMENT_COLS + (['"_0_version"'] if documents_has_zero_ver else [])
+    user_cols = USER_COLS + (['"_0_version"'] if user_has_zero_ver else [])
+
+    return (
+        f"ALTER PUBLICATION {PUBLICATION_NAME} SET TABLE "
+        f"notifications, "
+        f"documents ({_cols(doc_cols)}), "
+        f"folders, "
+        f"search_source_connectors, "
+        f"new_chat_messages, "
+        f"chat_comments, "
+        f"chat_session_state, "
+        f'"user" ({_cols(user_cols)}), '
+        f"automation_runs ({_cols(AUTOMATION_RUN_COLS)})"
+    )
+
+
+def _create_enum(name: str, values: tuple[str, ...]) -> postgresql.ENUM:
+    enum = postgresql.ENUM(*values, name=name)
+    enum.create(op.get_bind(), checkfirst=True)
+    return postgresql.ENUM(*values, name=name, create_type=False)
+
+
+def upgrade() -> None:
+    conn = op.get_bind()
+    external_chat_platform_enum = _create_enum(
+        "external_chat_platform", ("telegram", "whatsapp", "signal")
+    )
+    external_chat_account_mode_enum = _create_enum(
+        "external_chat_account_mode", ("cloud_shared", "self_host_byo")
+    )
+    external_chat_health_status_enum = _create_enum(
+        "external_chat_health_status", ("unknown", "ok", "failing")
+    )
+    external_chat_binding_state_enum = _create_enum(
+        "external_chat_binding_state", ("pending", "bound", "revoked", "suspended")
+    )
+    external_chat_peer_kind_enum = _create_enum(
+        "external_chat_peer_kind", ("direct", "group", "channel", "unknown")
+    )
+    external_chat_event_kind_enum = _create_enum(
+        "external_chat_event_kind",
+        ("message", "edited_message", "callback_query", "other"),
+    )
+    external_chat_event_status_enum = _create_enum(
+        "external_chat_event_status",
+        ("received", "processing", "processed", "ignored", "failed"),
+    )
+
+    if not _table_exists(conn, "external_chat_accounts"):
+        op.create_table(
+            "external_chat_accounts",
+            sa.Column("id", sa.BigInteger(), primary_key=True),
+            sa.Column("platform", external_chat_platform_enum, nullable=False),
+            sa.Column("mode", external_chat_account_mode_enum, nullable=False),
+            sa.Column("owner_user_id", postgresql.UUID(as_uuid=True), nullable=True),
+            sa.Column("owner_search_space_id", sa.Integer(), nullable=True),
+            sa.Column(
+                "is_system_account",
+                sa.Boolean(),
+                nullable=False,
+                server_default="false",
+            ),
+            sa.Column("encrypted_credentials", sa.Text(), nullable=True),
+            sa.Column("bot_username", sa.String(255), nullable=True),
+            sa.Column("webhook_secret", sa.String(64), nullable=True),
+            sa.Column(
+                "cursor_state",
+                postgresql.JSONB(astext_type=sa.Text()),
+                nullable=False,
+                server_default=sa.text("'{}'::jsonb"),
+            ),
+            sa.Column(
+                "health_status",
+                external_chat_health_status_enum,
+                nullable=False,
+                server_default="unknown",
+            ),
+            sa.Column(
+                "last_health_check_at", sa.TIMESTAMP(timezone=True), nullable=True
+            ),
+            sa.Column("suspended_at", sa.TIMESTAMP(timezone=True), nullable=True),
+            sa.Column("suspended_reason", sa.Text(), nullable=True),
+            sa.Column(
+                "created_at",
+                sa.TIMESTAMP(timezone=True),
+                nullable=False,
+                server_default=sa.text("(now() AT TIME ZONE 'utc')"),
+            ),
+            sa.Column(
+                "updated_at",
+                sa.TIMESTAMP(timezone=True),
+                nullable=False,
+                server_default=sa.text("(now() AT TIME ZONE 'utc')"),
+            ),
+            sa.CheckConstraint(
+                "(is_system_account = true AND owner_user_id IS NULL) OR "
+                "(is_system_account = false AND owner_user_id IS NOT NULL)",
+                name="ck_external_chat_accounts_owner_shape",
+            ),
+            sa.ForeignKeyConstraint(["owner_user_id"], ["user.id"], ondelete="CASCADE"),
+            sa.ForeignKeyConstraint(
+                ["owner_search_space_id"], ["searchspaces.id"], ondelete="CASCADE"
+            ),
+        )
+    op.create_index(
+        "uq_external_chat_accounts_owner_platform",
+        "external_chat_accounts",
+        ["owner_user_id", "platform"],
+        unique=True,
+        postgresql_where=sa.text("is_system_account = false"),
+        if_not_exists=True,
+    )
+    op.create_index(
+        "uq_external_chat_accounts_system_platform",
+        "external_chat_accounts",
+        ["platform"],
+        unique=True,
+        postgresql_where=sa.text("is_system_account = true"),
+        if_not_exists=True,
+    )
+    op.create_index(
+        "uq_external_chat_accounts_webhook_secret",
+        "external_chat_accounts",
+        ["webhook_secret"],
+        unique=True,
+        postgresql_where=sa.text("webhook_secret IS NOT NULL"),
+        if_not_exists=True,
+    )
+
+    if not _table_exists(conn, "external_chat_bindings"):
+        op.create_table(
+            "external_chat_bindings",
+            sa.Column("id", sa.BigInteger(), primary_key=True),
+            sa.Column("account_id", sa.BigInteger(), nullable=False),
+            sa.Column("user_id", postgresql.UUID(as_uuid=True), nullable=False),
+            sa.Column("search_space_id", sa.Integer(), nullable=False),
+            sa.Column(
+                "state",
+                external_chat_binding_state_enum,
+                nullable=False,
+                server_default="pending",
+            ),
+            sa.Column("pairing_code", sa.Text(), nullable=True),
+            sa.Column(
+                "pairing_code_expires_at", sa.TIMESTAMP(timezone=True), nullable=True
+            ),
+            sa.Column("external_peer_id", sa.Text(), nullable=True),
+            sa.Column(
+                "external_peer_kind",
+                external_chat_peer_kind_enum,
+                nullable=False,
+                server_default="unknown",
+            ),
+            sa.Column(
+                "external_thread_id",
+                sa.Text(),
+                nullable=True,
+                comment="Reserved for Telegram message_thread_id when group/forum support lands.",
+            ),
+            sa.Column("external_display_name", sa.Text(), nullable=True),
+            sa.Column("external_username", sa.Text(), nullable=True),
+            sa.Column(
+                "external_metadata",
+                postgresql.JSONB(astext_type=sa.Text()),
+                nullable=False,
+                server_default=sa.text("'{}'::jsonb"),
+            ),
+            sa.Column("new_chat_thread_id", sa.Integer(), nullable=True),
+            sa.Column("revoked_at", sa.TIMESTAMP(timezone=True), nullable=True),
+            sa.Column("suspended_at", sa.TIMESTAMP(timezone=True), nullable=True),
+            sa.Column("suspended_reason", sa.Text(), nullable=True),
+            sa.Column(
+                "created_at",
+                sa.TIMESTAMP(timezone=True),
+                nullable=False,
+                server_default=sa.text("(now() AT TIME ZONE 'utc')"),
+            ),
+            sa.Column(
+                "updated_at",
+                sa.TIMESTAMP(timezone=True),
+                nullable=False,
+                server_default=sa.text("(now() AT TIME ZONE 'utc')"),
+            ),
+            sa.ForeignKeyConstraint(
+                ["account_id"], ["external_chat_accounts.id"], ondelete="CASCADE"
+            ),
+            sa.ForeignKeyConstraint(["user_id"], ["user.id"], ondelete="CASCADE"),
+            sa.ForeignKeyConstraint(
+                ["search_space_id"], ["searchspaces.id"], ondelete="CASCADE"
+            ),
+            sa.ForeignKeyConstraint(
+                ["new_chat_thread_id"], ["new_chat_threads.id"], ondelete="SET NULL"
+            ),
+        )
+    op.create_index(
+        "uq_external_chat_bindings_account_peer_active",
+        "external_chat_bindings",
+        ["account_id", "external_peer_id"],
+        unique=True,
+        postgresql_where=sa.text(
+            "state IN ('bound', 'suspended') AND external_peer_id IS NOT NULL"
+        ),
+        if_not_exists=True,
+    )
+    op.create_index(
+        "uq_external_chat_bindings_pairing_code_pending",
+        "external_chat_bindings",
+        ["pairing_code"],
+        unique=True,
+        postgresql_where=sa.text("state = 'pending'"),
+        if_not_exists=True,
+    )
+    op.create_index(
+        "ix_external_chat_bindings_user_state",
+        "external_chat_bindings",
+        ["user_id", "state"],
+        if_not_exists=True,
+    )
+    op.create_index(
+        "ix_external_chat_bindings_search_space_state",
+        "external_chat_bindings",
+        ["search_space_id", "state"],
+        if_not_exists=True,
+    )
+
+    if not _table_exists(conn, "external_chat_inbound_events"):
+        op.create_table(
+            "external_chat_inbound_events",
+            sa.Column("id", sa.BigInteger(), primary_key=True),
+            sa.Column("account_id", sa.BigInteger(), nullable=False),
+            sa.Column("external_chat_binding_id", sa.BigInteger(), nullable=True),
+            sa.Column("platform", external_chat_platform_enum, nullable=False),
+            sa.Column("event_dedupe_key", sa.Text(), nullable=False),
+            sa.Column("external_event_id", sa.Text(), nullable=True),
+            sa.Column("external_message_id", sa.Text(), nullable=True),
+            sa.Column("event_kind", external_chat_event_kind_enum, nullable=False),
+            sa.Column(
+                "raw_payload",
+                postgresql.JSONB(astext_type=sa.Text()),
+                nullable=True,
+            ),
+            sa.Column("request_id", sa.String(64), nullable=True),
+            sa.Column(
+                "status",
+                external_chat_event_status_enum,
+                nullable=False,
+                server_default="received",
+            ),
+            sa.Column(
+                "attempt_count", sa.Integer(), nullable=False, server_default="0"
+            ),
+            sa.Column("last_error", sa.Text(), nullable=True),
+            sa.Column(
+                "received_at",
+                sa.TIMESTAMP(timezone=True),
+                nullable=False,
+                server_default=sa.text("(now() AT TIME ZONE 'utc')"),
+            ),
+            sa.Column("processed_at", sa.TIMESTAMP(timezone=True), nullable=True),
+            sa.Column(
+                "created_at",
+                sa.TIMESTAMP(timezone=True),
+                nullable=False,
+                server_default=sa.text("(now() AT TIME ZONE 'utc')"),
+            ),
+            sa.ForeignKeyConstraint(
+                ["account_id"], ["external_chat_accounts.id"], ondelete="CASCADE"
+            ),
+            sa.ForeignKeyConstraint(
+                ["external_chat_binding_id"],
+                ["external_chat_bindings.id"],
+                ondelete="SET NULL",
+            ),
+            sa.UniqueConstraint(
+                "account_id",
+                "event_dedupe_key",
+                name="uq_external_chat_inbound_account_dedupe_key",
+            ),
+        )
+    op.create_index(
+        "ix_external_chat_inbound_status_received_at",
+        "external_chat_inbound_events",
+        ["status", "received_at"],
+        if_not_exists=True,
+    )
+    op.create_index(
+        "ix_external_chat_inbound_binding_received_at",
+        "external_chat_inbound_events",
+        ["external_chat_binding_id", "received_at"],
+        if_not_exists=True,
+    )
+    op.create_index(
+        "ix_external_chat_inbound_request_id",
+        "external_chat_inbound_events",
+        ["request_id"],
+        postgresql_where=sa.text("request_id IS NOT NULL"),
+        if_not_exists=True,
+    )
+
+    if not _column_exists(conn, "new_chat_threads", "source"):
+        op.add_column(
+            "new_chat_threads",
+            sa.Column("source", sa.Text(), nullable=False, server_default="surfsense"),
+        )
+    op.alter_column("new_chat_threads", "source", type_=sa.Text())
+    if not _column_exists(conn, "new_chat_threads", "external_chat_binding_id"):
+        op.add_column(
+            "new_chat_threads",
+            sa.Column("external_chat_binding_id", sa.BigInteger(), nullable=True),
+        )
+    if not _constraint_exists(
+        conn,
+        "new_chat_threads",
+        "fk_new_chat_threads_external_chat_external_chat_binding_id",
+    ):
+        op.create_foreign_key(
+            "fk_new_chat_threads_external_chat_external_chat_binding_id",
+            "new_chat_threads",
+            "external_chat_bindings",
+            ["external_chat_binding_id"],
+            ["id"],
+            ondelete="SET NULL",
+        )
+    op.create_index(
+        "ix_new_chat_threads_source", "new_chat_threads", ["source"], if_not_exists=True
+    )
+    op.create_index(
+        "ix_new_chat_threads_external_chat_binding_id",
+        "new_chat_threads",
+        ["external_chat_binding_id"],
+        if_not_exists=True,
+    )
+
+    if not _column_exists(conn, "new_chat_messages", "source"):
+        op.add_column(
+            "new_chat_messages",
+            sa.Column("source", sa.Text(), nullable=False, server_default="surfsense"),
+        )
+    op.alter_column("new_chat_messages", "source", type_=sa.Text())
+    if not _column_exists(conn, "new_chat_messages", "platform_metadata"):
+        op.add_column(
+            "new_chat_messages",
+            sa.Column(
+                "platform_metadata",
+                postgresql.JSONB(astext_type=sa.Text()),
+                nullable=True,
+            ),
+        )
+    op.create_index(
+        "ix_new_chat_messages_source",
+        "new_chat_messages",
+        ["source"],
+        if_not_exists=True,
+    )
+    op.create_index(
+        "uq_new_chat_messages_inbound_platform",
+        "new_chat_messages",
+        [
+            "thread_id",
+            sa.text("(platform_metadata->>'platform')"),
+            sa.text("(platform_metadata->>'external_message_id')"),
+        ],
+        unique=True,
+        postgresql_where=sa.text(
+            "platform_metadata IS NOT NULL "
+            "AND platform_metadata->>'direction' = 'inbound'"
+        ),
+        if_not_exists=True,
+    )
+    op.execute("ALTER TABLE new_chat_messages REPLICA IDENTITY FULL")
+
+    exists = conn.execute(
+        sa.text("SELECT 1 FROM pg_publication WHERE pubname = :name"),
+        {"name": PUBLICATION_NAME},
+    ).fetchone()
+    if exists:
+        documents_has_zero_ver = _has_zero_version(conn, "documents")
+        user_has_zero_ver = _has_zero_version(conn, "user")
+        tx = conn.begin_nested() if conn.in_transaction() else conn.begin()
+        with tx:
+            conn.execute(
+                sa.text(
+                    f"COMMENT ON PUBLICATION {PUBLICATION_NAME} IS 'pre-144-external-chat'"
+                )
+            )
+            conn.execute(
+                sa.text(
+                    _build_set_table_ddl(
+                        documents_has_zero_ver=documents_has_zero_ver,
+                        user_has_zero_ver=user_has_zero_ver,
+                    )
+                )
+            )
+            conn.execute(
+                sa.text(
+                    f"COMMENT ON PUBLICATION {PUBLICATION_NAME} IS 'post-144-external-chat'"
+                )
+            )
+
+
+def downgrade() -> None:
+    conn = op.get_bind()
+    exists = conn.execute(
+        sa.text("SELECT 1 FROM pg_publication WHERE pubname = :name"),
+        {"name": PUBLICATION_NAME},
+    ).fetchone()
+    if exists:
+        documents_has_zero_ver = _has_zero_version(conn, "documents")
+        user_has_zero_ver = _has_zero_version(conn, "user")
+        # Restore the publication shape from migration 148.
+        doc_cols = DOCUMENT_COLS + (['"_0_version"'] if documents_has_zero_ver else [])
+        user_cols = USER_COLS + (['"_0_version"'] if user_has_zero_ver else [])
+        ddl = (
+            f"ALTER PUBLICATION {PUBLICATION_NAME} SET TABLE "
+            f"notifications, "
+            f"documents ({_cols(doc_cols)}), "
+            f"folders, "
+            f"search_source_connectors, "
+            f"new_chat_messages, "
+            f"chat_comments, "
+            f"chat_session_state, "
+            f'"user" ({_cols(user_cols)}), '
+            f"automation_runs ({_cols(AUTOMATION_RUN_COLS)})"
+        )
+        tx = conn.begin_nested() if conn.in_transaction() else conn.begin()
+        with tx:
+            conn.execute(
+                sa.text(
+                    f"COMMENT ON PUBLICATION {PUBLICATION_NAME} IS 'pre-144-downgrade'"
+                )
+            )
+            conn.execute(sa.text(ddl))
+            conn.execute(
+                sa.text(
+                    f"COMMENT ON PUBLICATION {PUBLICATION_NAME} IS 'post-144-downgrade'"
+                )
+            )
+
+    if _column_exists(conn, "new_chat_messages", "source"):
+        op.execute("ALTER TABLE new_chat_messages REPLICA IDENTITY DEFAULT")
+    _drop_index_if_exists("uq_new_chat_messages_inbound_platform", "new_chat_messages")
+    _drop_index_if_exists("ix_new_chat_messages_source", "new_chat_messages")
+    _drop_column_if_exists("new_chat_messages", "platform_metadata")
+    _drop_column_if_exists("new_chat_messages", "source")
+
+    _drop_index_if_exists(
+        "ix_new_chat_threads_external_chat_binding_id", "new_chat_threads"
+    )
+    _drop_index_if_exists("ix_new_chat_threads_source", "new_chat_threads")
+    if _constraint_exists(
+        conn,
+        "new_chat_threads",
+        "fk_new_chat_threads_external_chat_external_chat_binding_id",
+    ):
+        op.drop_constraint(
+            "fk_new_chat_threads_external_chat_external_chat_binding_id",
+            "new_chat_threads",
+            type_="foreignkey",
+        )
+    _drop_column_if_exists("new_chat_threads", "external_chat_binding_id")
+    _drop_column_if_exists("new_chat_threads", "source")
+
+    _drop_index_if_exists(
+        "ix_external_chat_inbound_binding_received_at", "external_chat_inbound_events"
+    )
+    _drop_index_if_exists(
+        "ix_external_chat_inbound_request_id", "external_chat_inbound_events"
+    )
+    _drop_index_if_exists(
+        "ix_external_chat_inbound_status_received_at", "external_chat_inbound_events"
+    )
+    if _table_exists(conn, "external_chat_inbound_events"):
+        op.drop_table("external_chat_inbound_events")
+
+    _drop_index_if_exists(
+        "ix_external_chat_bindings_search_space_state",
+        "external_chat_bindings",
+    )
+    _drop_index_if_exists(
+        "ix_external_chat_bindings_user_state", "external_chat_bindings"
+    )
+    _drop_index_if_exists(
+        "uq_external_chat_bindings_pairing_code_pending",
+        "external_chat_bindings",
+    )
+    _drop_index_if_exists(
+        "uq_external_chat_bindings_account_peer_active",
+        "external_chat_bindings",
+    )
+    if _table_exists(conn, "external_chat_bindings"):
+        op.drop_table("external_chat_bindings")
+
+    _drop_index_if_exists(
+        "uq_external_chat_accounts_system_platform", "external_chat_accounts"
+    )
+    _drop_index_if_exists(
+        "uq_external_chat_accounts_owner_platform", "external_chat_accounts"
+    )
+    _drop_index_if_exists(
+        "uq_external_chat_accounts_webhook_secret", "external_chat_accounts"
+    )
+    if _table_exists(conn, "external_chat_accounts"):
+        op.drop_table("external_chat_accounts")
+
+    for enum_name in (
+        "external_chat_event_status",
+        "external_chat_event_kind",
+        "external_chat_peer_kind",
+        "external_chat_binding_state",
+        "external_chat_health_status",
+        "external_chat_account_mode",
+        "external_chat_platform",
+    ):
+        postgresql.ENUM(name=enum_name).drop(conn, checkfirst=True)
--- a/surfsense_backend/alembic/versions/150_add_slack_gateway_platform.py
+++ b/surfsense_backend/alembic/versions/150_add_slack_gateway_platform.py
@ -0,0 +1,102 @@
+"""add slack gateway platform
+
+Revision ID: 150
+Revises: 149
+Create Date: 2026-05-31
+"""
+
+from __future__ import annotations
+
+from collections.abc import Sequence
+
+import sqlalchemy as sa
+
+from alembic import op
+
+revision: str = "150"
+down_revision: str | None = "149"
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
+
+
+def _enum_value_exists(enum_name: str, value: str) -> bool:
+    conn = op.get_bind()
+    return (
+        conn.execute(
+            sa.text(
+                "SELECT 1 FROM pg_enum e "
+                "JOIN pg_type t ON t.oid = e.enumtypid "
+                "WHERE t.typname = :enum_name AND e.enumlabel = :value"
+            ),
+            {"enum_name": enum_name, "value": value},
+        ).fetchone()
+        is not None
+    )
+
+
+def _index_exists(index_name: str) -> bool:
+    conn = op.get_bind()
+    return (
+        conn.execute(
+            sa.text(
+                "SELECT 1 FROM pg_indexes "
+                "WHERE schemaname = current_schema() AND indexname = :index_name"
+            ),
+            {"index_name": index_name},
+        ).fetchone()
+        is not None
+    )
+
+
+def upgrade() -> None:
+    if not _enum_value_exists("external_chat_platform", "slack"):
+        op.execute("ALTER TYPE external_chat_platform ADD VALUE 'slack'")
+
+    if _index_exists("uq_external_chat_accounts_system_platform"):
+        op.drop_index(
+            "uq_external_chat_accounts_system_platform",
+            table_name="external_chat_accounts",
+        )
+
+    op.create_index(
+        "uq_external_chat_accounts_system_platform",
+        "external_chat_accounts",
+        ["platform"],
+        unique=True,
+        postgresql_where=sa.text(
+            "is_system_account = true AND NOT (cursor_state ? 'team_id')"
+        ),
+        if_not_exists=True,
+    )
+    op.create_index(
+        "uq_external_chat_accounts_slack_team",
+        "external_chat_accounts",
+        ["platform", sa.text("(cursor_state ->> 'team_id')")],
+        unique=True,
+        postgresql_where=sa.text(
+            "is_system_account = true AND cursor_state ? 'team_id'"
+        ),
+        if_not_exists=True,
+    )
+
+
+def downgrade() -> None:
+    if _index_exists("uq_external_chat_accounts_slack_team"):
+        op.drop_index(
+            "uq_external_chat_accounts_slack_team",
+            table_name="external_chat_accounts",
+        )
+    if _index_exists("uq_external_chat_accounts_system_platform"):
+        op.drop_index(
+            "uq_external_chat_accounts_system_platform",
+            table_name="external_chat_accounts",
+        )
+    op.create_index(
+        "uq_external_chat_accounts_system_platform",
+        "external_chat_accounts",
+        ["platform"],
+        unique=True,
+        postgresql_where=sa.text("is_system_account = true"),
+        if_not_exists=True,
+    )
+    # PostgreSQL enum values are intentionally not removed on downgrade.
--- a/surfsense_backend/alembic/versions/151_add_discord_gateway_platform.py
+++ b/surfsense_backend/alembic/versions/151_add_discord_gateway_platform.py
@ -0,0 +1,106 @@
+"""add discord gateway platform
+
+Revision ID: 151
+Revises: 150
+Create Date: 2026-06-01
+"""
+
+from __future__ import annotations
+
+from collections.abc import Sequence
+
+import sqlalchemy as sa
+
+from alembic import op
+
+revision: str = "151"
+down_revision: str | None = "150"
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
+
+
+def _enum_value_exists(enum_name: str, value: str) -> bool:
+    conn = op.get_bind()
+    return (
+        conn.execute(
+            sa.text(
+                "SELECT 1 FROM pg_enum e "
+                "JOIN pg_type t ON t.oid = e.enumtypid "
+                "WHERE t.typname = :enum_name AND e.enumlabel = :value"
+            ),
+            {"enum_name": enum_name, "value": value},
+        ).fetchone()
+        is not None
+    )
+
+
+def _index_exists(index_name: str) -> bool:
+    conn = op.get_bind()
+    return (
+        conn.execute(
+            sa.text(
+                "SELECT 1 FROM pg_indexes "
+                "WHERE schemaname = current_schema() AND indexname = :index_name"
+            ),
+            {"index_name": index_name},
+        ).fetchone()
+        is not None
+    )
+
+
+def upgrade() -> None:
+    if not _enum_value_exists("external_chat_platform", "discord"):
+        op.execute("ALTER TYPE external_chat_platform ADD VALUE 'discord'")
+
+    if _index_exists("uq_external_chat_accounts_system_platform"):
+        op.drop_index(
+            "uq_external_chat_accounts_system_platform",
+            table_name="external_chat_accounts",
+        )
+
+    op.create_index(
+        "uq_external_chat_accounts_system_platform",
+        "external_chat_accounts",
+        ["platform"],
+        unique=True,
+        postgresql_where=sa.text(
+            "is_system_account = true "
+            "AND NOT (cursor_state ? 'team_id') "
+            "AND NOT (cursor_state ? 'guild_id')"
+        ),
+        if_not_exists=True,
+    )
+    op.create_index(
+        "uq_external_chat_accounts_discord_guild",
+        "external_chat_accounts",
+        ["platform", sa.text("(cursor_state ->> 'guild_id')")],
+        unique=True,
+        postgresql_where=sa.text(
+            "is_system_account = true AND cursor_state ? 'guild_id'"
+        ),
+        if_not_exists=True,
+    )
+
+
+def downgrade() -> None:
+    if _index_exists("uq_external_chat_accounts_discord_guild"):
+        op.drop_index(
+            "uq_external_chat_accounts_discord_guild",
+            table_name="external_chat_accounts",
+        )
+    if _index_exists("uq_external_chat_accounts_system_platform"):
+        op.drop_index(
+            "uq_external_chat_accounts_system_platform",
+            table_name="external_chat_accounts",
+        )
+    op.create_index(
+        "uq_external_chat_accounts_system_platform",
+        "external_chat_accounts",
+        ["platform"],
+        unique=True,
+        postgresql_where=sa.text(
+            "is_system_account = true AND NOT (cursor_state ? 'team_id')"
+        ),
+        if_not_exists=True,
+    )
+    # PostgreSQL enum values are intentionally not removed on downgrade.
--- a/surfsense_backend/alembic/versions/152_add_document_files.py
+++ b/surfsense_backend/alembic/versions/152_add_document_files.py
@ -0,0 +1,85 @@
+"""add document_files table for stored original uploads
+
+Revision ID: 152
+Revises: 151
+"""
+
+from collections.abc import Sequence
+
+from alembic import op
+
+revision: str = "152"
+down_revision: str | None = "151"
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
+
+
+def upgrade() -> None:
+    # The enum type must precede the table that references it.
+    op.execute(
+        """
+        DO $$
+        BEGIN
+            IF NOT EXISTS (
+                SELECT 1 FROM pg_type WHERE typname = 'document_file_kind'
+            ) THEN
+                CREATE TYPE document_file_kind AS ENUM (
+                    'ORIGINAL', 'REDACTED', 'FILLED_FORM'
+                );
+            END IF;
+        END
+        $$;
+        """
+    )
+
+    op.execute(
+        """
+        CREATE TABLE IF NOT EXISTS document_files (
+            id SERIAL PRIMARY KEY,
+            document_id INTEGER NOT NULL
+                REFERENCES documents(id) ON DELETE CASCADE,
+            search_space_id INTEGER NOT NULL
+                REFERENCES searchspaces(id) ON DELETE CASCADE,
+            kind document_file_kind NOT NULL DEFAULT 'ORIGINAL',
+            storage_backend VARCHAR(32) NOT NULL,
+            storage_key TEXT NOT NULL,
+            original_filename TEXT NOT NULL,
+            mime_type TEXT,
+            size_bytes BIGINT NOT NULL,
+            checksum_sha256 VARCHAR(64),
+            created_by_id UUID
+                REFERENCES "user"(id) ON DELETE SET NULL,
+            created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW()
+        );
+        """
+    )
+
+    op.execute(
+        "CREATE INDEX IF NOT EXISTS ix_document_files_document_id "
+        "ON document_files(document_id);"
+    )
+    op.execute(
+        "CREATE INDEX IF NOT EXISTS ix_document_files_search_space_id "
+        "ON document_files(search_space_id);"
+    )
+    op.execute(
+        "CREATE INDEX IF NOT EXISTS ix_document_files_kind ON document_files(kind);"
+    )
+    op.execute(
+        "CREATE INDEX IF NOT EXISTS ix_document_files_created_by_id "
+        "ON document_files(created_by_id);"
+    )
+    op.execute(
+        "CREATE INDEX IF NOT EXISTS ix_document_files_created_at "
+        "ON document_files(created_at);"
+    )
+
+
+def downgrade() -> None:
+    op.execute("DROP INDEX IF EXISTS ix_document_files_created_at;")
+    op.execute("DROP INDEX IF EXISTS ix_document_files_created_by_id;")
+    op.execute("DROP INDEX IF EXISTS ix_document_files_kind;")
+    op.execute("DROP INDEX IF EXISTS ix_document_files_search_space_id;")
+    op.execute("DROP INDEX IF EXISTS ix_document_files_document_id;")
+    op.execute("DROP TABLE IF EXISTS document_files;")
+    op.execute("DROP TYPE IF EXISTS document_file_kind;")
--- a/surfsense_backend/alembic/versions/153_restore_automation_runs_to_zero_publication.py
+++ b/surfsense_backend/alembic/versions/153_restore_automation_runs_to_zero_publication.py
@ -0,0 +1,121 @@
+"""restore automation_runs to zero_publication
+
+Migration 149's ``SET TABLE`` dropped ``automation_runs`` (added in 148),
+breaking the dashboard live run ticker with a SchemaVersionNotSupported
+reload loop. Re-emit the publication with ``automation_runs`` using the
+``COMMENT`` bookend pattern so zero-cache fires its schema-change hook.
+
+Revision ID: 153
+Revises: 152
+"""
+
+from collections.abc import Sequence
+
+import sqlalchemy as sa
+
+from alembic import op
+
+revision: str = "153"
+down_revision: str | None = "152"
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
+
+PUBLICATION_NAME = "zero_publication"
+
+DOCUMENT_COLS = [
+    "id",
+    "title",
+    "document_type",
+    "search_space_id",
+    "folder_id",
+    "created_by_id",
+    "status",
+    "created_at",
+    "updated_at",
+]
+
+USER_COLS = [
+    "id",
+    "pages_limit",
+    "pages_used",
+    "premium_credit_micros_limit",
+    "premium_credit_micros_used",
+]
+
+AUTOMATION_RUN_COLS = [
+    "id",
+    "automation_id",
+    "trigger_id",
+    "status",
+    "step_results",
+    "started_at",
+    "finished_at",
+    "created_at",
+]
+
+
+def _has_zero_version(conn, table: str) -> bool:
+    return (
+        conn.execute(
+            sa.text(
+                "SELECT 1 FROM information_schema.columns "
+                "WHERE table_name = :tbl AND column_name = '_0_version'"
+            ),
+            {"tbl": table},
+        ).fetchone()
+        is not None
+    )
+
+
+def _set_table_ddl(*, with_automation_runs: bool, conn) -> str:
+    doc_cols = DOCUMENT_COLS + (
+        ['"_0_version"'] if _has_zero_version(conn, "documents") else []
+    )
+    user_cols = USER_COLS + (
+        ['"_0_version"'] if _has_zero_version(conn, "user") else []
+    )
+    tables = [
+        "notifications",
+        f"documents ({', '.join(doc_cols)})",
+        "folders",
+        "search_source_connectors",
+        "new_chat_messages",
+        "chat_comments",
+        "chat_session_state",
+        f'"user" ({", ".join(user_cols)})',
+    ]
+    if with_automation_runs:
+        tables.append(f"automation_runs ({', '.join(AUTOMATION_RUN_COLS)})")
+    return f"ALTER PUBLICATION {PUBLICATION_NAME} SET TABLE " + ", ".join(tables)
+
+
+def _resync(*, with_automation_runs: bool, tag: str) -> None:
+    conn = op.get_bind()
+    exists = conn.execute(
+        sa.text("SELECT 1 FROM pg_publication WHERE pubname = :name"),
+        {"name": PUBLICATION_NAME},
+    ).fetchone()
+    if not exists:
+        return
+
+    tx = conn.begin_nested() if conn.in_transaction() else conn.begin()
+    with tx:
+        conn.execute(
+            sa.text(f"COMMENT ON PUBLICATION {PUBLICATION_NAME} IS 'pre-{tag}'")
+        )
+        conn.execute(
+            sa.text(
+                _set_table_ddl(with_automation_runs=with_automation_runs, conn=conn)
+            )
+        )
+        conn.execute(
+            sa.text(f"COMMENT ON PUBLICATION {PUBLICATION_NAME} IS 'post-{tag}'")
+        )
+
+
+def upgrade() -> None:
+    _resync(with_automation_runs=True, tag="153-resync")
+
+
+def downgrade() -> None:
+    _resync(with_automation_runs=False, tag="153-downgrade")
--- a/surfsense_backend/alembic/versions/154_remove_document_summary_llm.py
+++ b/surfsense_backend/alembic/versions/154_remove_document_summary_llm.py
@ -0,0 +1,147 @@
+"""remove document summary llm settings
+
+Revision ID: 154
+Revises: 153
+"""
+
+from collections.abc import Sequence
+
+import sqlalchemy as sa
+
+from alembic import op
+
+revision: str = "154"
+down_revision: str | None = "153"
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
+
+PUBLICATION_NAME = "zero_publication"
+
+DOCUMENT_COLS = [
+    "id",
+    "title",
+    "document_type",
+    "search_space_id",
+    "folder_id",
+    "created_by_id",
+    "status",
+    "created_at",
+    "updated_at",
+]
+
+USER_COLS = [
+    "id",
+    "pages_limit",
+    "pages_used",
+    "premium_credit_micros_limit",
+    "premium_credit_micros_used",
+]
+
+AUTOMATION_RUN_COLS = [
+    "id",
+    "automation_id",
+    "trigger_id",
+    "status",
+    "step_results",
+    "started_at",
+    "finished_at",
+    "created_at",
+]
+
+
+def _column_exists(conn, table: str, column: str) -> bool:
+    return (
+        conn.execute(
+            sa.text(
+                "SELECT 1 FROM information_schema.columns "
+                "WHERE table_name = :table AND column_name = :column"
+            ),
+            {"table": table, "column": column},
+        ).fetchone()
+        is not None
+    )
+
+
+def _has_zero_version(conn, table: str) -> bool:
+    return _column_exists(conn, table, "_0_version")
+
+
+def _set_table_ddl(conn) -> str:
+    doc_cols = DOCUMENT_COLS + (
+        ['"_0_version"'] if _has_zero_version(conn, "documents") else []
+    )
+    user_cols = USER_COLS + (
+        ['"_0_version"'] if _has_zero_version(conn, "user") else []
+    )
+    tables = [
+        "notifications",
+        f"documents ({', '.join(doc_cols)})",
+        "folders",
+        "search_source_connectors",
+        "new_chat_messages",
+        "chat_comments",
+        "chat_session_state",
+        f'"user" ({", ".join(user_cols)})',
+        f"automation_runs ({', '.join(AUTOMATION_RUN_COLS)})",
+    ]
+    return f"ALTER PUBLICATION {PUBLICATION_NAME} SET TABLE " + ", ".join(tables)
+
+
+def _resync_zero_publication(tag: str) -> None:
+    conn = op.get_bind()
+    exists = conn.execute(
+        sa.text("SELECT 1 FROM pg_publication WHERE pubname = :name"),
+        {"name": PUBLICATION_NAME},
+    ).fetchone()
+    if not exists:
+        return
+
+    tx = conn.begin_nested() if conn.in_transaction() else conn.begin()
+    with tx:
+        conn.execute(
+            sa.text(f"COMMENT ON PUBLICATION {PUBLICATION_NAME} IS 'pre-{tag}'")
+        )
+        conn.execute(sa.text(_set_table_ddl(conn)))
+        conn.execute(
+            sa.text(f"COMMENT ON PUBLICATION {PUBLICATION_NAME} IS 'post-{tag}'")
+        )
+
+
+def upgrade() -> None:
+    conn = op.get_bind()
+
+    if _column_exists(conn, "searchspaces", "document_summary_llm_id"):
+        op.drop_column("searchspaces", "document_summary_llm_id")
+
+    if _column_exists(conn, "search_source_connectors", "enable_summary"):
+        op.drop_column("search_source_connectors", "enable_summary")
+
+    _resync_zero_publication("154-summary-removal")
+
+
+def downgrade() -> None:
+    conn = op.get_bind()
+
+    if not _column_exists(conn, "searchspaces", "document_summary_llm_id"):
+        op.add_column(
+            "searchspaces",
+            sa.Column(
+                "document_summary_llm_id",
+                sa.Integer(),
+                nullable=True,
+                server_default="0",
+            ),
+        )
+
+    if not _column_exists(conn, "search_source_connectors", "enable_summary"):
+        op.add_column(
+            "search_source_connectors",
+            sa.Column(
+                "enable_summary",
+                sa.Boolean(),
+                nullable=False,
+                server_default=sa.text("false"),
+            ),
+        )
+
+    _resync_zero_publication("154-summary-removal-downgrade")
--- a/surfsense_backend/alembic/versions/155_reconcile_zero_publication.py
+++ b/surfsense_backend/alembic/versions/155_reconcile_zero_publication.py
@ -0,0 +1,23 @@
+"""reconcile zero_publication from canonical definition
+
+Revision ID: 155
+Revises: 154
+"""
+
+from collections.abc import Sequence
+
+from alembic import op
+from app.zero_publication import apply_publication
+
+revision: str = "155"
+down_revision: str | None = "154"
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
+
+
+def upgrade() -> None:
+    apply_publication(op.get_bind())
+
+
+def downgrade() -> None:
+    """No-op. Historical publication shapes are immutable."""
--- a/surfsense_backend/app/agents/autocomplete/autocomplete_agent.py
+++ b/surfsense_backend/app/agents/autocomplete/autocomplete_agent.py
@ -1,557 +0,0 @@
-"""Vision autocomplete agent with scoped filesystem exploration.
-
-Converts the stateless single-shot vision autocomplete into an agent that
-seeds a virtual filesystem from KB search results and lets the vision LLM
-explore documents via ``ls``, ``read_file``, ``glob``, ``grep``, etc.
-before generating the final completion.
-
-Performance: KB search and agent graph compilation run in parallel so
-the only sequential latency is KB-search (or agent compile, whichever is
-slower) + the agent's LLM turns.  There is no separate "query extraction"
-LLM call — the window title is used directly as the KB search query.
-"""
-
-from __future__ import annotations
-
-import asyncio
-import json
-import logging
-import re
-import uuid
-from collections.abc import AsyncGenerator
-from typing import Any
-
-from deepagents.graph import BASE_AGENT_PROMPT
-from deepagents.middleware.patch_tool_calls import PatchToolCallsMiddleware
-from langchain.agents import create_agent
-from langchain_anthropic.middleware import AnthropicPromptCachingMiddleware
-from langchain_core.language_models import BaseChatModel
-from langchain_core.messages import AIMessage, ToolMessage
-
-from app.agents.new_chat.document_xml import build_document_xml
-from app.agents.new_chat.middleware.filesystem import SurfSenseFilesystemMiddleware
-from app.agents.new_chat.middleware.knowledge_search import (
-    search_knowledge_base,
-)
-from app.agents.new_chat.path_resolver import (
-    DOCUMENTS_ROOT,
-    build_path_index,
-    doc_to_virtual_path,
-)
-from app.db import shielded_async_session
-from app.services.new_streaming_service import VercelStreamingService
-
-try:
-    from deepagents.backends.utils import create_file_data
-except Exception:  # pragma: no cover - defensive
-
-    def create_file_data(content: str) -> dict[str, Any]:
-        return {"content": content.split("\n")}
-
-
-async def _build_autocomplete_filesystem(
-    *,
-    documents: Any,
-    search_space_id: int,
-) -> tuple[dict[str, Any], dict[int, str]]:
-    """Build a ``state['files']``-shaped dict from KB search results.
-
-    This is the autocomplete-specific replacement for the previous
-    ``build_scoped_filesystem`` helper. It uses the canonical path resolver
-    so paths line up with the rest of the system, including collision
-    suffixes for duplicate titles.
-    """
-    files: dict[str, Any] = {}
-    doc_id_to_path: dict[int, str] = {}
-
-    if not documents:
-        return files, doc_id_to_path
-
-    async with shielded_async_session() as session:
-        index = await build_path_index(session, search_space_id)
-
-    for document in documents:
-        if not isinstance(document, dict):
-            continue
-        meta = document.get("document") or {}
-        doc_id = meta.get("id")
-        if not isinstance(doc_id, int):
-            continue
-        title = str(meta.get("title") or "untitled")
-        folder_id = meta.get("folder_id")
-        path = doc_to_virtual_path(
-            doc_id=doc_id, title=title, folder_id=folder_id, index=index
-        )
-        chunk_ids = document.get("matched_chunk_ids") or []
-        try:
-            matched_set = {int(c) for c in chunk_ids}
-        except (TypeError, ValueError):
-            matched_set = set()
-        xml = build_document_xml(document, matched_chunk_ids=matched_set)
-        files[path] = create_file_data(xml)
-        doc_id_to_path[doc_id] = path
-
-    if not files:
-        # Ensure the synthetic /documents folder is visible even when empty.
-        files.setdefault(f"{DOCUMENTS_ROOT}/.placeholder", create_file_data(""))
-
-    return files, doc_id_to_path
-
-
-logger = logging.getLogger(__name__)
-
-KB_TOP_K = 10
-
-# ---------------------------------------------------------------------------
-# System prompt
-# ---------------------------------------------------------------------------
-
-AUTOCOMPLETE_SYSTEM_PROMPT = """You are a smart writing assistant that analyzes the user's screen to draft or complete text.
-
-You will receive a screenshot of the user's screen. Your PRIMARY source of truth is the screenshot itself — the visual context determines what to write.
-
-Your job:
-1. Analyze the ENTIRE screenshot to understand what the user is working on (email thread, chat conversation, document, code editor, form, etc.).
-2. Identify the text area where the user will type.
-3. Generate the text the user most likely wants to write based on the visual context.
-
-You also have access to the user's knowledge base documents via filesystem tools. However:
- ONLY consult the knowledge base if the screenshot clearly involves a topic where your KB documents are DIRECTLY relevant (e.g., the user is writing about a specific project/topic that matches a document title).
- Do NOT explore documents just because they exist. Most autocomplete requests can be answered purely from the screenshot.
- If you do read a document, only incorporate information that is 100% relevant to what the user is typing RIGHT NOW. Do not add extra details, background, or tangential information from the KB.
- Keep your output SHORT — autocomplete should feel like a natural continuation, not an essay.
-
-Key behavior:
- If the text area is EMPTY, draft a concise response or message based on what you see on screen (e.g., reply to an email, respond to a chat message, continue a document).
- If the text area already has text, continue it naturally — typically just a sentence or two.
-
-Rules:
- Be CONCISE. Prefer a single paragraph or a few sentences. Autocomplete is a quick assist, not a full draft.
- Match the tone and formality of the surrounding context.
- If the screen shows code, write code. If it shows a casual chat, be casual. If it shows a formal email, be formal.
- Do NOT describe the screenshot or explain your reasoning.
- Do NOT cite or reference documents explicitly — just let the knowledge inform your writing naturally.
- If you cannot determine what to write, output an empty JSON array: []
-
-## Output Format
-
-You MUST provide exactly 3 different suggestion options. Each should be a distinct, plausible completion — vary the tone, detail level, or angle.
-
-Return your suggestions as a JSON array of exactly 3 strings. Output ONLY the JSON array, nothing else — no markdown fences, no explanation, no commentary.
-
-Example format:
-["First suggestion text here.", "Second suggestion — a different take.", "Third option with another approach."]
-
-## Filesystem Tools `ls`, `read_file`, `write_file`, `edit_file`, `glob`, `grep`
-
-All file paths must start with a `/`.
- ls: list files and directories at a given path.
- read_file: read a file from the filesystem.
- write_file: create a temporary file in the session (not persisted).
- edit_file: edit a file in the session (not persisted for /documents/ files).
- glob: find files matching a pattern (e.g., "**/*.xml").
- grep: search for text within files.
-
-## When to Use Filesystem Tools
-
-BEFORE reaching for any tool, ask yourself: "Can I write a good completion purely from the screenshot?" If yes, just write it — do NOT explore the KB.
-
-Only use tools when:
- The user is clearly writing about a specific topic that likely has detailed information in their KB.
- You need a specific fact, name, number, or reference that the screenshot doesn't provide.
-
-When you do use tools, be surgical:
- Check the `ls` output first. If no document title looks relevant, stop — do not read files just to see what's there.
- If a title looks relevant, read only the `<chunk_index>` (first ~20 lines) and jump to matched chunks. Do not read entire documents.
- Extract only the specific information you need and move on to generating the completion.
-
-## Reading Documents Efficiently
-
-Documents are formatted as XML. Each document contains:
- `<document_metadata>` — title, type, URL, etc.
- `<chunk_index>` — a table of every chunk with its **line range** and a
-  `matched="true"` flag for chunks that matched the search query.
- `<document_content>` — the actual chunks in original document order.
-
-**Workflow**: read the first ~20 lines to see the `<chunk_index>`, identify
-chunks marked `matched="true"`, then use `read_file(path, offset=<start_line>,
-limit=<lines>)` to jump directly to those sections."""
-
-APP_CONTEXT_BLOCK = """
-
-The user is currently working in "{app_name}" (window: "{window_title}"). Use this to understand the type of application and adapt your tone and format accordingly."""
-
-
-def _build_autocomplete_system_prompt(app_name: str, window_title: str) -> str:
-    prompt = AUTOCOMPLETE_SYSTEM_PROMPT
-    if app_name:
-        prompt += APP_CONTEXT_BLOCK.format(app_name=app_name, window_title=window_title)
-    return prompt
-
-
-# ---------------------------------------------------------------------------
-# Pre-compute KB filesystem (runs in parallel with agent compilation)
-# ---------------------------------------------------------------------------
-
-
-class _KBResult:
-    """Container for pre-computed KB filesystem results."""
-
-    __slots__ = ("files", "ls_ai_msg", "ls_tool_msg")
-
-    def __init__(
-        self,
-        files: dict[str, Any] | None = None,
-        ls_ai_msg: AIMessage | None = None,
-        ls_tool_msg: ToolMessage | None = None,
-    ) -> None:
-        self.files = files
-        self.ls_ai_msg = ls_ai_msg
-        self.ls_tool_msg = ls_tool_msg
-
-    @property
-    def has_documents(self) -> bool:
-        return bool(self.files)
-
-
-async def precompute_kb_filesystem(
-    search_space_id: int,
-    query: str,
-    top_k: int = KB_TOP_K,
-) -> _KBResult:
-    """Search the KB and build the scoped filesystem outside the agent.
-
-    This is designed to be called via ``asyncio.gather`` alongside agent
-    graph compilation so the two run concurrently.
-    """
-    if not query:
-        return _KBResult()
-
-    try:
-        search_results = await search_knowledge_base(
-            query=query,
-            search_space_id=search_space_id,
-            top_k=top_k,
-        )
-
-        if not search_results:
-            return _KBResult()
-
-        new_files, _ = await _build_autocomplete_filesystem(
-            documents=search_results,
-            search_space_id=search_space_id,
-        )
-
-        if not new_files:
-            return _KBResult()
-
-        doc_paths = [
-            p
-            for p, v in new_files.items()
-            if p.startswith("/documents/") and v is not None
-        ]
-        tool_call_id = f"auto_ls_{uuid.uuid4().hex[:12]}"
-        ai_msg = AIMessage(
-            content="",
-            tool_calls=[
-                {"name": "ls", "args": {"path": "/documents"}, "id": tool_call_id}
-            ],
-        )
-        tool_msg = ToolMessage(
-            content=str(doc_paths) if doc_paths else "No documents found.",
-            tool_call_id=tool_call_id,
-        )
-        return _KBResult(files=new_files, ls_ai_msg=ai_msg, ls_tool_msg=tool_msg)
-
-    except Exception:
-        logger.warning(
-            "KB pre-computation failed, proceeding without KB", exc_info=True
-        )
-        return _KBResult()
-
-
-# ---------------------------------------------------------------------------
-# Filesystem middleware — no save_document, no persistence
-# ---------------------------------------------------------------------------
-
-
-class AutocompleteFilesystemMiddleware(SurfSenseFilesystemMiddleware):
-    """Filesystem middleware for autocomplete — read-only exploration only.
-
-    Passes ``search_space_id=None`` so the new persistence pipeline is
-    bypassed; the autocomplete flow only reads, never commits to Postgres.
-    """
-
-    def __init__(self) -> None:
-        super().__init__(search_space_id=None, created_by_id=None)
-
-
-# ---------------------------------------------------------------------------
-# Agent factory
-# ---------------------------------------------------------------------------
-
-
-async def _compile_agent(
-    llm: BaseChatModel,
-    app_name: str,
-    window_title: str,
-) -> Any:
-    """Compile the agent graph (CPU-bound, runs in a thread)."""
-    system_prompt = _build_autocomplete_system_prompt(app_name, window_title)
-    final_system_prompt = system_prompt + "\n\n" + BASE_AGENT_PROMPT
-
-    middleware = [
-        AutocompleteFilesystemMiddleware(),
-        PatchToolCallsMiddleware(),
-        AnthropicPromptCachingMiddleware(unsupported_model_behavior="ignore"),
-    ]
-
-    agent = await asyncio.to_thread(
-        create_agent,
-        llm,
-        system_prompt=final_system_prompt,
-        tools=[],
-        middleware=middleware,
-    )
-    return agent.with_config({"recursion_limit": 200})
-
-
-async def create_autocomplete_agent(
-    llm: BaseChatModel,
-    *,
-    search_space_id: int,
-    kb_query: str,
-    app_name: str = "",
-    window_title: str = "",
-) -> tuple[Any, _KBResult]:
-    """Create the autocomplete agent and pre-compute KB in parallel.
-
-    Returns ``(agent, kb_result)`` so the caller can inject the pre-computed
-    filesystem into the agent's initial state without any middleware delay.
-    """
-    agent, kb = await asyncio.gather(
-        _compile_agent(llm, app_name, window_title),
-        precompute_kb_filesystem(search_space_id, kb_query),
-    )
-    return agent, kb
-
-
-# ---------------------------------------------------------------------------
-# JSON suggestion parsing (with fallback)
-# ---------------------------------------------------------------------------
-
-
-def _parse_suggestions(raw: str) -> list[str]:
-    """Extract a list of suggestion strings from the agent's output.
-
-    Tries, in order:
-      1. Direct ``json.loads``
-      2. Extract content between ```json ... ``` fences
-      3. Find the first ``[`` … ``]`` span
-    Falls back to wrapping the raw text as a single suggestion.
-    """
-    text = raw.strip()
-    if not text:
-        return []
-
-    for candidate in _json_candidates(text):
-        try:
-            parsed = json.loads(candidate)
-            if isinstance(parsed, list) and all(isinstance(s, str) for s in parsed):
-                return [s for s in parsed if s.strip()]
-        except (json.JSONDecodeError, ValueError):
-            continue
-
-    return [text]
-
-
-def _json_candidates(text: str) -> list[str]:
-    """Yield candidate JSON strings from raw text."""
-    candidates = [text]
-
-    fence = re.search(r"```(?:json)?\s*\n?(.*?)```", text, re.DOTALL)
-    if fence:
-        candidates.append(fence.group(1).strip())
-
-    bracket = re.search(r"\[.*]", text, re.DOTALL)
-    if bracket:
-        candidates.append(bracket.group(0))
-
-    return candidates
-
-
-# ---------------------------------------------------------------------------
-# Streaming helper
-# ---------------------------------------------------------------------------
-
-
-async def stream_autocomplete_agent(
-    agent: Any,
-    input_data: dict[str, Any],
-    streaming_service: VercelStreamingService,
-    *,
-    emit_message_start: bool = True,
-) -> AsyncGenerator[str, None]:
-    """Stream agent events as Vercel SSE, with thinking steps for tool calls.
-
-    When ``emit_message_start`` is False the caller has already sent the
-    ``message_start`` event (e.g. to show preparation steps before the agent
-    runs).
-    """
-    thread_id = uuid.uuid4().hex
-    config = {"configurable": {"thread_id": thread_id}}
-
-    text_buffer: list[str] = []
-    active_tool_depth = 0
-    thinking_step_counter = 0
-    tool_step_ids: dict[str, str] = {}
-    step_titles: dict[str, str] = {}
-    completed_step_ids: set[str] = set()
-    last_active_step_id: str | None = None
-
-    def next_thinking_step_id() -> str:
-        nonlocal thinking_step_counter
-        thinking_step_counter += 1
-        return f"autocomplete-step-{thinking_step_counter}"
-
-    def complete_current_step() -> str | None:
-        nonlocal last_active_step_id
-        if last_active_step_id and last_active_step_id not in completed_step_ids:
-            completed_step_ids.add(last_active_step_id)
-            title = step_titles.get(last_active_step_id, "Done")
-            event = streaming_service.format_thinking_step(
-                step_id=last_active_step_id,
-                title=title,
-                status="complete",
-            )
-            last_active_step_id = None
-            return event
-        return None
-
-    if emit_message_start:
-        yield streaming_service.format_message_start()
-
-    gen_step_id = next_thinking_step_id()
-    last_active_step_id = gen_step_id
-    step_titles[gen_step_id] = "Generating suggestions"
-    yield streaming_service.format_thinking_step(
-        step_id=gen_step_id,
-        title="Generating suggestions",
-        status="in_progress",
-    )
-
-    try:
-        async for event in agent.astream_events(
-            input_data, config=config, version="v2"
-        ):
-            event_type = event.get("event", "")
-            if event_type == "on_chat_model_stream":
-                if active_tool_depth > 0:
-                    continue
-                if "surfsense:internal" in event.get("tags", []):
-                    continue
-                chunk = event.get("data", {}).get("chunk")
-                if chunk and hasattr(chunk, "content"):
-                    content = chunk.content
-                    if content and isinstance(content, str):
-                        text_buffer.append(content)
-
-            elif event_type == "on_chat_model_end":
-                if active_tool_depth > 0:
-                    continue
-                if "surfsense:internal" in event.get("tags", []):
-                    continue
-                output = event.get("data", {}).get("output")
-                if output and hasattr(output, "content"):
-                    if getattr(output, "tool_calls", None):
-                        continue
-                    content = output.content
-                    if content and isinstance(content, str) and not text_buffer:
-                        text_buffer.append(content)
-
-            elif event_type == "on_tool_start":
-                active_tool_depth += 1
-                tool_name = event.get("name", "unknown_tool")
-                run_id = event.get("run_id", "")
-                tool_input = event.get("data", {}).get("input", {})
-
-                step_event = complete_current_step()
-                if step_event:
-                    yield step_event
-
-                tool_step_id = next_thinking_step_id()
-                tool_step_ids[run_id] = tool_step_id
-                last_active_step_id = tool_step_id
-
-                title, items = _describe_tool_call(tool_name, tool_input)
-                step_titles[tool_step_id] = title
-                yield streaming_service.format_thinking_step(
-                    step_id=tool_step_id,
-                    title=title,
-                    status="in_progress",
-                    items=items,
-                )
-
-            elif event_type == "on_tool_end":
-                active_tool_depth = max(0, active_tool_depth - 1)
-                run_id = event.get("run_id", "")
-                step_id = tool_step_ids.pop(run_id, None)
-                if step_id and step_id not in completed_step_ids:
-                    completed_step_ids.add(step_id)
-                    title = step_titles.get(step_id, "Done")
-                    yield streaming_service.format_thinking_step(
-                        step_id=step_id,
-                        title=title,
-                        status="complete",
-                    )
-                    if last_active_step_id == step_id:
-                        last_active_step_id = None
-
-        step_event = complete_current_step()
-        if step_event:
-            yield step_event
-
-        raw_text = "".join(text_buffer)
-        suggestions = _parse_suggestions(raw_text)
-
-        yield streaming_service.format_data("suggestions", {"options": suggestions})
-
-        yield streaming_service.format_finish()
-        yield streaming_service.format_done()
-
-    except Exception as e:
-        logger.error(f"Autocomplete agent streaming error: {e}", exc_info=True)
-        yield streaming_service.format_error("Autocomplete failed. Please try again.")
-        yield streaming_service.format_done()
-
-
-def _describe_tool_call(tool_name: str, tool_input: Any) -> tuple[str, list[str]]:
-    """Return a human-readable (title, items) for a tool call thinking step."""
-    inp = tool_input if isinstance(tool_input, dict) else {}
-    if tool_name == "ls":
-        path = inp.get("path", "/")
-        return "Listing files", [path]
-    if tool_name == "read_file":
-        fp = inp.get("file_path", "")
-        display = fp if len(fp) <= 80 else "…" + fp[-77:]
-        return "Reading file", [display]
-    if tool_name == "write_file":
-        fp = inp.get("file_path", "")
-        display = fp if len(fp) <= 80 else "…" + fp[-77:]
-        return "Writing file", [display]
-    if tool_name == "edit_file":
-        fp = inp.get("file_path", "")
-        display = fp if len(fp) <= 80 else "…" + fp[-77:]
-        return "Editing file", [display]
-    if tool_name == "glob":
-        pat = inp.get("pattern", "")
-        base = inp.get("path", "/")
-        return "Searching files", [f"{pat} in {base}"]
-    if tool_name == "grep":
-        pat = inp.get("pattern", "")
-        path = inp.get("path", "")
-        display_pat = pat[:60] + ("…" if len(pat) > 60 else "")
-        return "Searching content", [
-            f'"{display_pat}"' + (f" in {path}" if path else "")
-        ]
-    return f"Using {tool_name}", []
--- a/surfsense_backend/app/agents/chat/init.py
+++ b/surfsense_backend/app/agents/chat/init.py
@ -0,0 +1,5 @@
+"""Chat agents category.
+
+Groups the conversational agents that share a kernel: ``anonymous_chat`` and
+``multi_agent_chat``. Code shared by *both* lives in ``chat/shared/``.
+"""
--- a/surfsense_backend/app/agents/chat/anonymous_chat/init.py
+++ b/surfsense_backend/app/agents/chat/anonymous_chat/init.py
@ -0,0 +1,14 @@
+"""Anonymous / free-chat agent.
+
+The no-login chat experience: a deliberately minimal agent that bypasses the
+full SurfSense deep-agent stack (filesystem, knowledge-base persistence,
+subagents, skills, memory) and answers with an optional ``web_search`` tool and
+an optional read-only uploaded document. See :mod:`.agent` for details.
+"""
+
+from app.agents.chat.anonymous_chat.agent import (
+    build_anonymous_system_prompt,
+    create_anonymous_chat_agent,
+)
+
+__all__ = ["build_anonymous_system_prompt", "create_anonymous_chat_agent"]
--- a/surfsense_backend/app/agents/chat/anonymous_chat/agent.py
+++ b/surfsense_backend/app/agents/chat/anonymous_chat/agent.py
@ -27,12 +27,12 @@ from langchain.agents.middleware import (
 from langchain_core.language_models import BaseChatModel
 from langgraph.types import Checkpointer

-from app.agents.new_chat.context import SurfSenseContextSchema
-from app.agents.new_chat.middleware import (
+from app.agents.chat.shared.context import SurfSenseContextSchema
+from app.agents.chat.shared.middleware import (
    RetryAfterMiddleware,
    create_surfsense_compaction_middleware,
 )
-from app.agents.new_chat.tools.web_search import create_web_search_tool
+from app.agents.chat.shared.tools.web_search import create_web_search_tool

 # Cap how much of an uploaded document we inline into the system prompt. The
 # upload endpoint allows files up to several MB, but the doc is re-sent on
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/init.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/init.py
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/constants.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/constants.py
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/init.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/init.py
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/context_prune/init.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/context_prune/init.py
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/context_prune/prune_tool_names.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/context_prune/prune_tool_names.py
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/graph/init.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/graph/init.py
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/graph/compile_graph_sync.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/graph/compile_graph_sync.py
@ -2,6 +2,7 @@

 from __future__ import annotations

+import time
 from collections.abc import Sequence
 from typing import Any

@ -11,13 +12,16 @@ from langchain_core.language_models import BaseChatModel
 from langchain_core.tools import BaseTool
 from langgraph.types import Checkpointer

-from app.agents.multi_agent_chat.middleware.stack import (
+from app.agents.chat.multi_agent_chat.main_agent.middleware.stack import (
    build_main_agent_deepagent_middleware,
 )
-from app.agents.new_chat.context import SurfSenseContextSchema
-from app.agents.new_chat.feature_flags import AgentFeatureFlags
-from app.agents.new_chat.filesystem_selection import FilesystemMode
+from app.agents.chat.multi_agent_chat.shared.feature_flags import AgentFeatureFlags
+from app.agents.chat.multi_agent_chat.shared.filesystem_selection import FilesystemMode
+from app.agents.chat.shared.context import SurfSenseContextSchema
 from app.db import ChatVisibility
+from app.utils.perf import get_perf_logger
+
+_perf_log = get_perf_logger()


 def build_compiled_agent_graph_sync(
@ -43,6 +47,7 @@ def build_compiled_agent_graph_sync(
    disabled_tools: list[str] | None = None,
 ):
    """Sync compile: middleware + ``create_agent`` (run via ``asyncio.to_thread``)."""
+    mw_start = time.perf_counter()
    main_agent_middleware = build_main_agent_deepagent_middleware(
        llm=llm,
        tools=tools,
@ -63,7 +68,9 @@ def build_compiled_agent_graph_sync(
        mcp_tools_by_agent=mcp_tools_by_agent,
        disabled_tools=disabled_tools,
    )
+    mw_elapsed = time.perf_counter() - mw_start

+    create_start = time.perf_counter()
    agent = create_agent(
        llm,
        system_prompt=final_system_prompt,
@ -72,6 +79,15 @@ def build_compiled_agent_graph_sync(
        context_schema=SurfSenseContextSchema,
        checkpointer=checkpointer,
    )
+    create_elapsed = time.perf_counter() - create_start
+    _perf_log.info(
+        "[graph_compile] middleware_build=%.3fs main_create_agent=%.3fs "
+        "total=%.3fs mw_count=%d",
+        mw_elapsed,
+        create_elapsed,
+        mw_elapsed + create_elapsed,
+        len(main_agent_middleware),
+    )
    return agent.with_config(
        {
            "recursion_limit": 10_000,
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/init.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/init.py
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/action_log/init.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/action_log/init.py
@ -0,0 +1,10 @@
+"""Action-log middleware: audit row per tool call (impl + builder)."""
+
+from .builder import build_action_log_mw
+from .middleware import ActionLogMiddleware, ToolDefinition
+
+__all__ = [
+    "ActionLogMiddleware",
+    "ToolDefinition",
+    "build_action_log_mw",
+]
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/action_log/builder.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/action_log/builder.py
@ -4,11 +4,10 @@ from __future__ import annotations

 import logging

-from app.agents.new_chat.feature_flags import AgentFeatureFlags
-from app.agents.new_chat.middleware import ActionLogMiddleware
-from app.agents.new_chat.tools.registry import BUILTIN_TOOLS
+from app.agents.chat.multi_agent_chat.shared.feature_flags import AgentFeatureFlags
+from app.agents.chat.multi_agent_chat.shared.middleware.flags import enabled

-from ..shared.flags import enabled
+from .middleware import ActionLogMiddleware


 def build_action_log_mw(
@ -21,12 +20,13 @@ def build_action_log_mw(
    if not enabled(flags, "enable_action_log") or thread_id is None:
        return None
    try:
-        tool_defs_by_name = {td.name: td for td in BUILTIN_TOOLS}
+        # No built-in tool declares a ``reverse`` callable yet, so the action
+        # log runs without a tool_definitions map. Reversibility is opt-in per
+        # tool via ``ToolDefinition.reverse`` and can be wired here when used.
        return ActionLogMiddleware(
            thread_id=thread_id,
            search_space_id=search_space_id,
            user_id=user_id,
-            tool_definitions=tool_defs_by_name,
        )
    except Exception:  # pragma: no cover - defensive
        logging.warning(
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/action_log/middleware.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/action_log/middleware.py
@ -1,25 +1,15 @@
 """Append-only action-log middleware for the SurfSense agent.

-Wraps every tool call via :meth:`AgentMiddleware.awrap_tool_call` and writes
-a row to :class:`~app.db.AgentActionLog` after the tool returns. Tools opt
-into reversibility by declaring a ``reverse`` callable on their
-:class:`~app.agents.new_chat.tools.registry.ToolDefinition`; the rendered
-descriptor is persisted in ``reverse_descriptor`` for use by
+Wraps every tool call and writes a row to :class:`~app.db.AgentActionLog`
+after the tool returns. Tools opt into reversibility via a ``reverse``
+callable on their :class:`ToolDefinition`; the rendered descriptor powers
 ``/api/threads/{thread_id}/revert/{action_id}``.

-Design points:
-
-* **Defensive.** Logging never blocks the agent. We catch every exception
-  on the DB write path and emit a warning; the tool's ``ToolMessage``
-  result is always returned untouched.
-* **Lightweight payload.** Only the tool ``name`` + ``args`` (capped) +
-  ``result_id`` + ``reverse_descriptor`` are stored. Tool output text
-  remains in the LangGraph checkpoint / spilled tool-output files.
-* **Best-effort reversibility.** We invoke ``reverse(args, result_obj)``
-  with the parsed JSON result when the tool's content is a JSON object;
-  otherwise the raw text is passed. Exceptions in the reverse callable
-  are swallowed and logged — a failed descriptor render simply means the
-  action is NOT marked reversible.
+Logging is fully defensive — DB-write failures are swallowed so the tool's
+result is always returned untouched. Only metadata (name, capped args,
+result_id, reverse_descriptor) is stored; tool output stays in the
+checkpoint. Reversibility is best-effort: a reverse callable that raises
+just leaves the action non-reversible.
 """

 from __future__ import annotations
@ -27,14 +17,14 @@ from __future__ import annotations
 import json
 import logging
 from collections.abc import Awaitable, Callable
+from dataclasses import dataclass
 from typing import TYPE_CHECKING, Any

 from langchain.agents.middleware import AgentMiddleware
 from langchain_core.callbacks import adispatch_custom_event
 from langchain_core.messages import ToolMessage

-from app.agents.new_chat.feature_flags import get_flags
-from app.agents.new_chat.tools.registry import ToolDefinition
+from app.agents.chat.multi_agent_chat.shared.feature_flags import get_flags

 if TYPE_CHECKING:  # pragma: no cover - type-only
    from langchain.agents.middleware.types import ToolCallRequest
@ -44,6 +34,31 @@ if TYPE_CHECKING:  # pragma: no cover - type-only
 logger = logging.getLogger(__name__)


+@dataclass
+class ToolDefinition:
+    """Reversibility descriptor consumed by :class:`ActionLogMiddleware`.
+
+    Only ``name`` and ``reverse`` are read by the middleware; the remaining
+    fields let callers and tests describe a tool declaratively. A tool is
+    marked reversible in the action log when ``reverse`` is set and renders a
+    descriptor without raising.
+
+    Attributes:
+        name: Unique identifier for the tool.
+        description: Human-readable description of what the tool does.
+        factory: Optional callable that builds the tool (unused by the
+            middleware; retained for declarative call sites/tests).
+        reverse: Optional callable that, given the tool's ``(args, result)``,
+            returns a ``ReverseDescriptor`` describing the inverse invocation.
+
+    """
+
+    name: str
+    description: str = ""
+    factory: Callable[[dict[str, Any]], Any] | None = None
+    reverse: Callable[[dict[str, Any], Any], dict[str, Any]] | None = None
+
+
 # Cap for the persisted ``args`` JSON to avoid bloating the action log with
 # accidentally-huge inputs. Values are truncated and a flag is set in the
 # stored payload so consumers can detect truncation.
@ -93,18 +108,32 @@ class ActionLogMiddleware(AgentMiddleware):
        self._user_id = user_id
        self._tool_definitions = dict(tool_definitions or {})

-    def _enabled(self) -> bool:
+    def _enabled(self, thread_id: int | None) -> bool:
        flags = get_flags()
        if flags.disable_new_agent_stack:
            return False
-        return bool(flags.enable_action_log) and self._thread_id is not None
+        return bool(flags.enable_action_log) and thread_id is not None
+
+    def _resolve_thread_id(self, request: ToolCallRequest) -> int | None:
+        """Resolve the live thread id, preferring the runtime config.
+
+        Reading ``configurable.thread_id`` from the active ``RunnableConfig``
+        (rather than the value captured at ``__init__``) lets a single cached
+        compiled graph safely serve many threads — without it, a cache hit
+        would attribute action-log rows to whichever thread first built the
+        graph. Falls back to the constructor value for legacy/test runtimes
+        that don't surface a config.
+        """
+        resolved = _resolve_thread_id(request)
+        return resolved if resolved is not None else self._thread_id

    async def awrap_tool_call(
        self,
        request: ToolCallRequest,
        handler: Callable[[ToolCallRequest], Awaitable[ToolMessage | Command[Any]]],
    ) -> ToolMessage | Command[Any]:
-        if not self._enabled():
+        thread_id = self._resolve_thread_id(request)
+        if not self._enabled(thread_id):
            return await handler(request)

        result: ToolMessage | Command[Any]
@ -119,10 +148,16 @@ class ActionLogMiddleware(AgentMiddleware):
                request=request,
                result=None,
                error_payload=error_payload,
+                thread_id=thread_id,
            )
            raise

-        await self._record(request=request, result=result, error_payload=None)
+        await self._record(
+            request=request,
+            result=result,
+            error_payload=None,
+            thread_id=thread_id,
+        )
        return result

    async def _record(
@ -131,6 +166,7 @@ class ActionLogMiddleware(AgentMiddleware):
        request: ToolCallRequest,
        result: ToolMessage | Command[Any] | None,
        error_payload: dict[str, Any] | None,
+        thread_id: int | None,
    ) -> None:
        """Persist one ``agent_action_log`` row. Defensive: never raises."""
        try:
@ -149,7 +185,7 @@ class ActionLogMiddleware(AgentMiddleware):
            chat_turn_id = _resolve_chat_turn_id(request)

            row = AgentActionLog(
-                thread_id=self._thread_id,
+                thread_id=thread_id,
                user_id=self._user_id,
                search_space_id=self._search_space_id,
                # ``turn_id`` is the deprecated alias of ``tool_call_id``
@ -178,11 +214,9 @@ class ActionLogMiddleware(AgentMiddleware):
            )
            return

-        # Surface a side-channel SSE event so the chat tool card can
-        # render a Revert button immediately after the row is durable.
-        # ``stream_new_chat`` translates this into a
-        # ``data-action-log`` SSE event. We DO NOT include the
-        # ``reverse_descriptor`` payload here; only a presence flag.
+        # Side-channel event (relayed by ``stream_new_chat`` as a
+        # ``data-action-log`` SSE) so the tool card can show a Revert button
+        # once the row is durable. Carries a presence flag, not the descriptor.
        try:
            await adispatch_custom_event(
                "action_log",
@ -337,6 +371,36 @@ def _resolve_chat_turn_id(request: Any) -> str | None:
    return None


+def _resolve_thread_id(request: Any) -> int | None:
+    """Return ``configurable.thread_id`` (as int) for this request, if accessible.
+
+    Mirrors :func:`_resolve_chat_turn_id`: ``ToolRuntime.config`` is exposed by
+    LangGraph at ``request.runtime.config``, and the chat thread id lives at
+    ``configurable.thread_id`` (a stringified ``chat_id`` at the main-graph
+    level). Returns ``None`` when absent or unparseable so the caller can fall
+    back to the constructor value.
+    """
+    try:
+        runtime = getattr(request, "runtime", None)
+        if runtime is None:
+            return None
+        config = getattr(runtime, "config", None)
+        if not isinstance(config, dict):
+            return None
+        configurable = config.get("configurable")
+        if not isinstance(configurable, dict):
+            return None
+        value = configurable.get("thread_id")
+        if value is None:
+            return None
+        try:
+            return int(value)
+        except (TypeError, ValueError):
+            return None
+    except Exception:  # pragma: no cover - defensive
+        return None
+
+
 def _resolve_message_id(request: Any) -> str | None:
    """Tool-call IDs serve as best-available message correlator at this layer."""
    return _resolve_tool_call_id(request)
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/anonymous_document/init.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/anonymous_document/init.py
@ -0,0 +1,9 @@
+"""Anonymous-document middleware: Redis hydration, cloud only (impl + builder)."""
+
+from .builder import build_anonymous_doc_mw
+from .middleware import AnonymousDocumentMiddleware
+
+__all__ = [
+    "AnonymousDocumentMiddleware",
+    "build_anonymous_doc_mw",
+]
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/anonymous_document/builder.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/anonymous_document/builder.py
@ -2,8 +2,9 @@

 from __future__ import annotations

-from app.agents.new_chat.filesystem_selection import FilesystemMode
-from app.agents.new_chat.middleware import AnonymousDocumentMiddleware
+from app.agents.chat.multi_agent_chat.shared.filesystem_selection import FilesystemMode
+
+from .middleware import AnonymousDocumentMiddleware


 def build_anonymous_doc_mw(
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/anonymous_document/middleware.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/anonymous_document/middleware.py
@ -24,8 +24,13 @@ from typing import Any
 from langchain.agents.middleware import AgentMiddleware, AgentState
 from langgraph.runtime import Runtime

-from app.agents.new_chat.filesystem_state import SurfSenseFilesystemState
-from app.agents.new_chat.path_resolver import DOCUMENTS_ROOT, safe_filename
+from app.agents.chat.multi_agent_chat.shared.state.filesystem_state import (
+    SurfSenseFilesystemState,
+)
+from app.agents.chat.runtime.path_resolver import (
+    DOCUMENTS_ROOT,
+    safe_filename,
+)

 logger = logging.getLogger(__name__)

--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/busy_mutex/init.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/busy_mutex/init.py
@ -0,0 +1,25 @@
+"""Per-turn cooperative busy-lock middleware + cancel primitives (main-agent)."""
+
+from .builder import build_busy_mutex_mw
+from .middleware import (
+    BusyMutexMiddleware,
+    end_turn,
+    get_cancel_event,
+    get_cancel_state,
+    is_cancel_requested,
+    manager,
+    request_cancel,
+    reset_cancel,
+)
+
+__all__ = [
+    "BusyMutexMiddleware",
+    "build_busy_mutex_mw",
+    "end_turn",
+    "get_cancel_event",
+    "get_cancel_state",
+    "is_cancel_requested",
+    "manager",
+    "request_cancel",
+    "reset_cancel",
+]
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/busy_mutex/builder.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/busy_mutex/builder.py
@ -2,10 +2,12 @@

 from __future__ import annotations

-from app.agents.new_chat.feature_flags import AgentFeatureFlags
-from app.agents.new_chat.middleware import BusyMutexMiddleware
+from app.agents.chat.multi_agent_chat.shared.feature_flags import AgentFeatureFlags
+from app.agents.chat.multi_agent_chat.shared.middleware.flags import enabled

-from ..shared.flags import enabled
+from .middleware import (
+    BusyMutexMiddleware,
+)


 def build_busy_mutex_mw(flags: AgentFeatureFlags) -> BusyMutexMiddleware | None:
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/busy_mutex/middleware.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/busy_mutex/middleware.py
@ -1,32 +1,12 @@
-"""
-BusyMutexMiddleware — per-thread asyncio lock + cancel token.
+"""Per-thread asyncio lock + cooperative cancel token, keyed by ``thread_id``.

-LangChain has no built-in concept of "this thread is already running a
-turn — refuse the second concurrent request". Without it, a user
-double-clicking "send" or refreshing the page mid-stream can spawn two
-turns racing on the same checkpoint, producing duplicated tool calls
-and mangled state.
+Refuses a second concurrent turn on the same thread (e.g. double-clicked
+"send") that would otherwise race on the same checkpoint and duplicate tool
+calls. Also exposes a per-thread cancel event that long-running tools poll
+via ``runtime.context.cancel_event.is_set()`` to abort cooperatively.

-Ported from OpenCode's ``Stream.scoped(AbortController)`` pattern: a
-single-process, in-memory lock + cooperative cancellation token keyed by
-``thread_id``. For multi-worker deployments a distributed lock backend
-(Redis or PostgreSQL advisory locks) is a phase-2 follow-up.
-
-What this provides:
- A ``WeakValueDictionary[str, asyncio.Lock]`` keyed by ``thread_id``;
-  acquiring the lock during ``before_agent`` blocks any concurrent
-  prompt on the same thread until release.
- A per-thread ``asyncio.Event`` (``cancel_event``) that long-running
-  tools can poll to abort cooperatively. The event is reset between
-  turns. Tools should check ``runtime.context.cancel_event.is_set()``
-  in tight inner loops.
- A typed :class:`~app.agents.new_chat.errors.BusyError` raised when a
-  second turn arrives while the lock is held.
-
-Note: SurfSense's ``stream_new_chat`` is the call site that should
-acquire/release. Wiring this as middleware means the contract is
-explicit and the lock manager is shared with subagents that compile
-their own ``create_agent`` runnables.
+Process-local and in-memory; multi-worker deployments need a distributed lock
+(Redis / PostgreSQL advisory locks) as a follow-up.
 """

 from __future__ import annotations
@ -46,7 +26,7 @@ from langchain.agents.middleware.types import (
 from langgraph.config import get_config
 from langgraph.runtime import Runtime

-from app.agents.new_chat.errors import BusyError
+from app.agents.chat.runtime.errors import BusyError

 logger = logging.getLogger(__name__)

@ -152,9 +132,8 @@ class _ThreadLockManager:
        return True


-# Module-level singleton — process-local but reused across all agent
-# instances built in this process. Subagents created in nested
-# ``create_agent`` calls also get this so locks are coherent.
+# Process-local singleton shared across all agents/subagents built in this
+# process so per-thread locks stay coherent.
 manager = _ThreadLockManager()


@ -266,7 +245,6 @@ class BusyMutexMiddleware(AgentMiddleware[AgentState[ResponseT], ContextT, Respo
        await lock.acquire()
        epoch = manager.bump_turn_epoch(thread_id)
        self._held_locks[thread_id] = (lock, epoch)
-        # Reset the cancel event so this turn starts fresh
        reset_cancel(thread_id)
        return None

@ -289,17 +267,14 @@ class BusyMutexMiddleware(AgentMiddleware[AgentState[ResponseT], ContextT, Respo
            return None
        if lock.locked():
            lock.release()
-        # Always clear cancel event between turns so a stale signal
-        # doesn't leak into the next request.
+        # Clear cancel event so a stale signal doesn't leak into the next turn.
        reset_cancel(thread_id)
        return None

-    # Provide sync no-ops because the middleware base class allows them
    def before_agent(  # type: ignore[override]
        self, state: AgentState[Any], runtime: Runtime[ContextT]
    ) -> dict[str, Any] | None:
-        # Sync path: no asyncio.Lock to acquire. Best we can do is reject
-        # if anyone else is in flight.
+        # Sync path can't await an asyncio.Lock; only reject if one is in flight.
        thread_id = self._thread_id(runtime)
        if thread_id is None:
            if self._require_thread_id:
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/checkpointed_subagent_middleware/init.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/checkpointed_subagent_middleware/init.py
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/checkpointed_subagent_middleware/config.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/checkpointed_subagent_middleware/config.py
@ -1,7 +1,9 @@
-"""RunnableConfig wiring for nested subagent invocations.
+"""HITL resume side-channel for nested subagent invocations.

-Forwards the parent's ``runtime.config`` (thread_id, …) into the subagent and
-exposes the side-channel ``stream_resume_chat`` uses to ferry resume payloads.
+Exposes the configurable side-channel ``stream_resume_chat`` uses to ferry
+resume payloads into a mid-flight subagent. The ``RunnableConfig`` builder and
+state-key filter shared with subagents live in
+``app.agents.chat.multi_agent_chat.subagents.shared.invocation``.
 """

 from __future__ import annotations
@ -11,8 +13,6 @@ from typing import Any

 from langchain.tools import ToolRuntime

-from .constants import DEFAULT_SUBAGENT_RECURSION_LIMIT
-
 logger = logging.getLogger(__name__)

 # langgraph stores the parent task's scratchpad under this configurable key;
@ -20,39 +20,6 @@ logger = logging.getLogger(__name__)
 _LANGGRAPH_SCRATCHPAD_KEY = "__pregel_scratchpad"


-def subagent_invoke_config(runtime: ToolRuntime) -> dict[str, Any]:
-    """RunnableConfig for the nested invoke; raises ``recursion_limit`` and isolates ``thread_id``.
-
-    Each parallel subagent invocation lands in its own checkpoint slot keyed
-    by an extended ``thread_id`` of the form ``{parent_thread}::task:{tool_call_id}``.
-    The same call across the resume cycle keeps reading from the same snapshot
-    (``tool_call_id`` is stable per LLM-emitted call).
-
-    We namespace via ``thread_id`` rather than ``checkpoint_ns`` because
-    langgraph's ``aget_state`` interprets a non-empty ``checkpoint_ns`` as a
-    subgraph path and raises ``ValueError("Subgraph X not found")``.
-    """
-    merged: dict[str, Any] = dict(runtime.config) if runtime.config else {}
-    current_limit = merged.get("recursion_limit")
-    try:
-        current_int = int(current_limit) if current_limit is not None else 0
-    except (TypeError, ValueError):
-        current_int = 0
-    if current_int < DEFAULT_SUBAGENT_RECURSION_LIMIT:
-        merged["recursion_limit"] = DEFAULT_SUBAGENT_RECURSION_LIMIT
-
-    configurable: dict[str, Any] = dict(merged.get("configurable") or {})
-    parent_thread_id = configurable.get("thread_id")
-    per_call_suffix = f"task:{runtime.tool_call_id}"
-    configurable["thread_id"] = (
-        f"{parent_thread_id}::{per_call_suffix}"
-        if parent_thread_id
-        else per_call_suffix
-    )
-    merged["configurable"] = configurable
-    return merged
-
-
 def consume_surfsense_resume(runtime: ToolRuntime) -> Any:
    """Pop the resume payload for *this* call's ``tool_call_id``.

--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/checkpointed_subagent_middleware/constants.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/checkpointed_subagent_middleware/constants.py
@ -1,24 +1,14 @@
-"""Constants shared by the checkpointed subagent middleware."""
+"""Tuning constants for the checkpointed subagent middleware.
+
+``EXCLUDED_STATE_KEYS`` and ``DEFAULT_SUBAGENT_RECURSION_LIMIT`` are part of the
+subagent-invocation contract shared with subagents and now live in
+``app.agents.chat.multi_agent_chat.subagents.shared.invocation``.
+"""

 from __future__ import annotations

 import os

-# Mirror of deepagents.middleware.subagents._EXCLUDED_STATE_KEYS.
-EXCLUDED_STATE_KEYS = frozenset(
-    {
-        "messages",
-        "todos",
-        "structured_response",
-        "skills_metadata",
-        "memory_contents",
-    }
-)
-
-# Match the parent graph's budget; the LangGraph default of 25 trips on
-# multi-step subagent runs.
-DEFAULT_SUBAGENT_RECURSION_LIMIT = 10_000
-

 def _read_timeout_env(name: str, default: float) -> float:
    """Parse ``name`` from the environment; fall back to ``default`` on bad values.
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/checkpointed_subagent_middleware/middleware.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/checkpointed_subagent_middleware/middleware.py
@ -0,0 +1,188 @@
+"""SubAgent middleware that compiles each subagent against the parent checkpointer."""
+
+from __future__ import annotations
+
+import time
+from collections.abc import Callable
+from typing import Any, cast
+
+from deepagents.backends.protocol import BackendFactory, BackendProtocol
+from deepagents.middleware.subagents import (
+    TASK_SYSTEM_PROMPT,
+    CompiledSubAgent,
+    SubAgent,
+    SubAgentMiddleware,
+)
+from langchain.agents import create_agent
+from langchain.chat_models import init_chat_model
+from langchain_core.runnables import Runnable
+from langgraph.types import Checkpointer
+
+from app.agents.chat.multi_agent_chat.subagents.shared.spec import (
+    SURF_CONTEXT_HINT_PROVIDER_KEY,
+    SURF_LAZY_SPEC_FACTORY_KEY,
+)
+from app.utils.perf import get_perf_logger
+
+from .task_tool import build_task_tool_with_parent_config
+
+_perf_log = get_perf_logger()
+
+
+class SurfSenseCheckpointedSubAgentMiddleware(SubAgentMiddleware):
+    """``SubAgentMiddleware`` variant that compiles each subagent against the parent checkpointer."""
+
+    def __init__(
+        self,
+        *,
+        checkpointer: Checkpointer,
+        backend: BackendProtocol | BackendFactory,
+        subagents: list[SubAgent | CompiledSubAgent],
+        system_prompt: str | None = TASK_SYSTEM_PROMPT,
+        task_description: str | None = None,
+        search_space_id: int | None = None,
+    ) -> None:
+        self._surf_checkpointer = checkpointer
+        super(SubAgentMiddleware, self).__init__()
+        if not subagents:
+            raise ValueError(
+                "At least one subagent must be specified when using the new API"
+            )
+        self._backend = backend
+        self._subagents = subagents
+        # Search-space id is captured at build time (the orchestrator runs in
+        # exactly one search space for its lifetime). The spawn-paused kill
+        # switch keys on it so an operator can quarantine one workspace
+        # without affecting the rest of the deployment.
+        self._search_space_id = search_space_id
+
+        # Lazy subagent compilation. Compiling a subagent graph via
+        # ``create_agent`` is expensive (~250-400ms each) and there can be up
+        # to ~17 of them. Doing it all in ``__init__`` put the full cost on
+        # every cold ``agent_cache`` miss (i.e. on time-to-first-token), even
+        # though a turn usually invokes zero or one subagent. We instead index
+        # the raw specs here and compile each graph on first ``task(name)``
+        # use, memoizing the result for the life of this (cached) instance.
+        self._compiled: dict[str, Runnable] = {}
+        self._lazy_specs: dict[str, dict[str, Any]] = {}
+        # Subagents whose *spec itself* is built lazily (not just compiled).
+        # Keyed by name → zero-arg factory returning the full spec dict. Used
+        # for the write knowledge_base subagent, whose filesystem middleware
+        # builds ~13 tool schemas (~2s) that almost never matter on turn 1.
+        self._lazy_spec_factories: dict[str, Callable[[], dict[str, Any]]] = {}
+        descriptors = self._build_subagent_registry()
+
+        task_tool = build_task_tool_with_parent_config(
+            descriptors,
+            task_description,
+            search_space_id=search_space_id,
+            resolve_subagent=self._resolve_subagent,
+        )
+        if system_prompt and descriptors:
+            agents_desc = "\n".join(
+                f"- {s['name']}: {s['description']}" for s in descriptors
+            )
+            self.system_prompt = (
+                system_prompt + "\n\nAvailable subagent types:\n" + agents_desc
+            )
+        else:
+            self.system_prompt = system_prompt
+        self.tools = [task_tool]
+
+    def _build_subagent_registry(self) -> list[dict[str, Any]]:
+        """Index subagents for lazy compilation; return lightweight descriptors.
+
+        Pre-compiled specs (those carrying a ``runnable``) are seeded directly
+        into the memo. Lazy specs are stashed by name and compiled on first
+        ``task(...)`` use via :meth:`_resolve_subagent`. The returned
+        descriptors carry only ``name``/``description`` plus the optional
+        context-hint provider — everything the ``task`` tool needs to validate
+        names, render its catalog, and run hints, without paying the
+        ``create_agent`` cost up front.
+        """
+        descriptors: list[dict[str, Any]] = []
+        for spec in self._subagents:
+            # Provider may be ``None`` (no hint), in which case task_tool skips
+            # the prepend step. We forward the key unconditionally so the
+            # descriptor shape is uniform.
+            hint_provider = cast(dict, spec).get(SURF_CONTEXT_HINT_PROVIDER_KEY)
+            name = spec["name"]
+            spec_factory = cast(dict, spec).get(SURF_LAZY_SPEC_FACTORY_KEY)
+            if spec_factory is not None:
+                # Descriptor-only entry: the spec dict is built on first use.
+                self._lazy_spec_factories[name] = spec_factory
+            elif "runnable" in spec:
+                compiled = cast(CompiledSubAgent, spec)
+                self._compiled[name] = compiled["runnable"]
+            else:
+                if "model" not in spec:
+                    msg = f"SubAgent '{name}' must specify 'model'"
+                    raise ValueError(msg)
+                if "tools" not in spec:
+                    msg = f"SubAgent '{name}' must specify 'tools'"
+                    raise ValueError(msg)
+                self._lazy_specs[name] = cast(dict, spec)
+            descriptors.append(
+                {
+                    "name": name,
+                    "description": spec["description"],
+                    SURF_CONTEXT_HINT_PROVIDER_KEY: hint_provider,
+                }
+            )
+        return descriptors
+
+    def _resolve_subagent(self, name: str) -> Runnable:
+        """Return the compiled subagent graph for ``name``, compiling on first use.
+
+        Memoized: the ``create_agent`` cost is paid once per subagent per
+        cached middleware instance. Raises ``KeyError`` for unknown names
+        (callers in the ``task`` tool validate membership before resolving).
+        """
+        cached = self._compiled.get(name)
+        if cached is not None:
+            return cached
+        spec = self._lazy_specs.get(name)
+        if spec is None:
+            factory = self._lazy_spec_factories.get(name)
+            if factory is None:
+                raise KeyError(name)
+            # Build the spec on first use (pays the deferred construction cost
+            # here, off the cold agent-build path), then compile and memoize.
+            build_start = time.perf_counter()
+            spec = factory()
+            _perf_log.info(
+                "[subagent_spec_lazy] name=%s (deferred spec build) in %.3fs",
+                name,
+                time.perf_counter() - build_start,
+            )
+        runnable = self._compile_one(spec)
+        self._compiled[name] = runnable
+        return runnable
+
+    def _compile_one(self, spec: dict[str, Any]) -> Runnable:
+        """Compile a single subagent graph against the parent checkpointer."""
+        model = spec["model"]
+        if isinstance(model, str):
+            model = init_chat_model(model)
+
+        middleware: list[Any] = list(spec.get("middleware", []))
+        tools_count = len(spec.get("tools") or [])
+        mw_count = len(middleware)
+
+        compile_start = time.perf_counter()
+        runnable = create_agent(
+            model,
+            system_prompt=spec["system_prompt"],
+            tools=spec["tools"],
+            middleware=middleware,
+            name=spec["name"],
+            checkpointer=self._surf_checkpointer,
+        )
+        _perf_log.info(
+            "[subagent_compile_lazy] name=%s in %.3fs tools=%d mw=%d",
+            spec["name"],
+            time.perf_counter() - compile_start,
+            tools_count,
+            mw_count,
+        )
+        return runnable
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/checkpointed_subagent_middleware/propagation.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/checkpointed_subagent_middleware/propagation.py
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/checkpointed_subagent_middleware/resume.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/checkpointed_subagent_middleware/resume.py
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/checkpointed_subagent_middleware/resume_routing.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/checkpointed_subagent_middleware/resume_routing.py
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/checkpointed_subagent_middleware/spawn_paused.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/checkpointed_subagent_middleware/spawn_paused.py
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/checkpointed_subagent_middleware/task_description.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/checkpointed_subagent_middleware/task_description.py
@ -6,7 +6,7 @@ and the ``<tools>`` block render from the same source.

 from __future__ import annotations

-from app.agents.multi_agent_chat.main_agent.system_prompt.builder.load_md import (
+from app.agents.chat.multi_agent_chat.main_agent.system_prompt.builder.load_md import (
    read_prompt_md,
 )

--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/checkpointed_subagent_middleware/task_tool.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/checkpointed_subagent_middleware/task_tool.py
@ -12,7 +12,7 @@ import asyncio
 import json
 import logging
 import time
-from collections.abc import Awaitable
+from collections.abc import Awaitable, Callable
 from typing import Annotated, Any, NoReturn, TypeVar

 from deepagents.middleware.subagents import TASK_TOOL_DESCRIPTION
@ -23,7 +23,11 @@ from langchain_core.tools import StructuredTool
 from langgraph.errors import GraphInterrupt
 from langgraph.types import Command, Interrupt

-from app.agents.multi_agent_chat.subagents.shared.spec import (
+from app.agents.chat.multi_agent_chat.subagents.shared.invocation import (
+    EXCLUDED_STATE_KEYS,
+    subagent_invoke_config,
+)
+from app.agents.chat.multi_agent_chat.subagents.shared.spec import (
    SURF_CONTEXT_HINT_PROVIDER_KEY,
    ContextHintProvider,
 )
@ -34,13 +38,11 @@ from .config import (
    consume_surfsense_resume,
    drain_parent_null_resume,
    has_surfsense_resume,
-    subagent_invoke_config,
 )
 from .constants import (
    DEFAULT_SUBAGENT_BATCH_CONCURRENCY,
    DEFAULT_SUBAGENT_BILLABLE_THRESHOLD,
    DEFAULT_SUBAGENT_INVOKE_TIMEOUT_SECONDS,
-    EXCLUDED_STATE_KEYS,
    MAX_SUBAGENT_BATCH_SIZE,
 )
 from .propagation import wrap_with_tool_call_id
@ -80,13 +82,10 @@ _T = TypeVar("_T")
 async def _ainvoke_with_timeout[T](
    coro: Awaitable[_T], *, subagent_type: str, started_at: float
 ) -> _T:
-    """Apply :data:`DEFAULT_SUBAGENT_INVOKE_TIMEOUT_SECONDS` to ``coro``.
+    """Apply the subagent invoke timeout to ``coro`` (non-positive disables it).

-    A non-positive timeout disables the cap (configurable via the
-    ``SURFSENSE_SUBAGENT_INVOKE_TIMEOUT_SECONDS`` env var). On expiry the
-    underlying task is cancelled and :class:`SubagentInvokeTimeoutError` is
-    raised — the caller wraps it into a synthetic ToolMessage so the
-    orchestrator can decide what to do.
+    On expiry the task is cancelled and :class:`SubagentInvokeTimeoutError` is
+    raised for the caller to turn into a synthetic ToolMessage.
    """
    timeout = DEFAULT_SUBAGENT_INVOKE_TIMEOUT_SECONDS
    if timeout <= 0:
@ -144,17 +143,31 @@ def build_task_tool_with_parent_config(
    task_description: str | None = None,
    *,
    search_space_id: int | None = None,
+    resolve_subagent: Callable[[str], Runnable] | None = None,
 ) -> BaseTool:
-    """Upstream ``_build_task_tool`` + parent ``runtime.config`` propagation + resume bridging."""
-    subagent_graphs: dict[str, Runnable] = {
-        spec["name"]: spec["runnable"] for spec in subagents
-    }
-    # Per-subagent context-hint providers (see ``SurfSenseSubagentSpec``).
-    # The mapping is sparse: only routes that opted in via ``pack_subagent``
-    # appear here, and the value is invoked once per ``task(...)`` call to
-    # generate a short string prepended to the subagent's first
-    # ``HumanMessage``. Failures are logged and swallowed — a broken hint
-    # provider must never prevent the underlying task from running.
+    """Upstream ``_build_task_tool`` + parent ``runtime.config`` propagation + resume bridging.
+
+    ``subagents`` are lightweight descriptors (``name``/``description`` + the
+    optional context-hint provider); the actual compiled graph is fetched
+    lazily via ``resolve_subagent(name)`` so subagent ``create_agent`` cost is
+    paid on first ``task(name)`` use rather than at graph-build time.
+
+    For backward compatibility (and tests), ``resolve_subagent`` may be omitted
+    when every descriptor already carries a pre-compiled ``runnable``; in that
+    case a trivial dict-backed resolver is used.
+    """
+    subagent_names: set[str] = {spec["name"] for spec in subagents}
+    if resolve_subagent is None:
+        _eager_graphs: dict[str, Runnable] = {
+            spec["name"]: spec["runnable"] for spec in subagents if "runnable" in spec
+        }
+
+        def resolve_subagent(name: str) -> Runnable:
+            return _eager_graphs[name]
+
+    # Sparse map of opt-in context-hint providers; each runs once per task()
+    # call to prepend a string to the subagent's first HumanMessage. Failures
+    # are swallowed so a broken hint never blocks the task.
    subagent_hint_providers: dict[str, ContextHintProvider] = {
        spec["name"]: provider
        for spec in subagents
@ -176,24 +189,18 @@ def build_task_tool_with_parent_config(
    def _billable_call_update(
        subagent_type: str, runtime: ToolRuntime
    ) -> dict[str, Any]:
-        """Build the per-call ``billable_calls`` delta + an optional warning.
+        """Build the per-call ``billable_calls`` delta plus an optional soft-cap warning.

-        The orchestrator's ``billable_calls`` map is summed by
-        :func:`_int_counter_merge_reducer`, so we always emit
-        ``{subagent_type: 1}`` and let the reducer accumulate. If the
-        cumulative count *after* this call would cross the configured
-        threshold, we also slip a soft ``messages`` entry into the update
-        so the orchestrator can read it on its next step and self-limit.
-        Returning a plain ``dict`` (vs. an extra :class:`Command`) keeps
-        the helper composable with the existing single/batch return paths.
+        Always emits ``{subagent_type: 1}`` (a reducer accumulates it); when this
+        call would cross the threshold, also adds a soft ``messages`` entry so the
+        orchestrator self-limits on its next step.
        """
        delta: dict[str, Any] = {"billable_calls": {subagent_type: 1}}
        threshold = DEFAULT_SUBAGENT_BILLABLE_THRESHOLD
        if threshold <= 0:
            return delta
        prior = runtime.state.get("billable_calls") or {}
-        # ``prior`` may be a plain dict or a reducer-managed mapping; only
-        # int values are counted so a malformed checkpoint can't crash us.
+        # Count int values only so a malformed checkpoint can't crash us.
        prior_total = sum(v for v in prior.values() if isinstance(v, int))
        new_total = prior_total + 1
        if prior_total < threshold <= new_total:
@ -212,8 +219,7 @@ def build_task_tool_with_parent_config(
        """Merge the per-call billable counter (and warning) into ``cmd``."""
        delta = _billable_call_update(subagent_type, runtime)
        warn_text = delta.pop("_billable_warn_text", None)
-        # ``cmd.update`` may be a dict or LangGraph ``UpdateDict``; defensively
-        # copy so we don't mutate state shared across other tool returns.
+        # Copy so we don't mutate state shared with other tool returns.
        update = dict(getattr(cmd, "update", {}) or {})
        for key, value in delta.items():
            update[key] = value
@ -226,14 +232,10 @@ def build_task_tool_with_parent_config(
        return Command(update=update)

    def _safe_message_text(msg: Any) -> str:
-        """Pull text out of a BaseMessage without trusting the ``.text`` property.
+        """Pull text out of a BaseMessage without using the ``.text`` property.

-        ``BaseMessage.text`` walks ``content_blocks`` and crashes with
-        ``TypeError: 'NoneType' object is not iterable`` when ``content`` is
-        ``None`` (common for tool-call AIMessages whose payload is purely
-        structured). ``getattr(msg, "text", None)`` does not catch this
-        because Python evaluates the property body before falling back to
-        the default. Read ``content`` directly and coerce defensively.
+        ``.text`` crashes when ``content`` is ``None`` (common for tool-call
+        AIMessages), and ``getattr`` won't catch it, so read ``content`` directly.
        """
        try:
            content = getattr(msg, "content", None)
@ -256,23 +258,18 @@ def build_task_tool_with_parent_config(
        return str(content)

    def _build_tool_trace(messages: list[Any]) -> list[dict[str, Any]]:
-        """Compress the subagent's message stream into a compact tool trace.
+        """Compress the subagent's messages into a compact tool trace.

-        Each entry is ``{"tool": <name>, "status": "ok"|"error", "preview":
-        <≤120 chars>}`` so the orchestrator can show "this is what your
-        specialist actually did" without dumping the full message stream
-        back through the prompt. The list is attached to the returned
-        ToolMessage's ``additional_kwargs`` (under ``"surf_tool_trace"``);
-        the LLM never sees it, but UI / observability code can pluck it
-        out of the checkpoint.
+        Entries (``{tool, status, preview}``) ride on the ToolMessage's
+        ``additional_kwargs["surf_tool_trace"]`` for UI/observability; the LLM
+        never sees them.
        """
        trace: list[dict[str, Any]] = []
        for msg in messages:
            tool_name = getattr(msg, "name", None)
            tool_call_id_attr = getattr(msg, "tool_call_id", None)
            if not tool_name and not tool_call_id_attr:
-                # Only ToolMessages have either field; skip AIMessage /
-                # HumanMessage / SystemMessage frames.
+                # Only ToolMessages carry either field.
                continue
            status = getattr(msg, "status", None) or "ok"
            preview = _safe_message_text(msg).strip().replace("\n", " ")
@ -306,8 +303,7 @@ def build_task_tool_with_parent_config(
            )
            raise ValueError(msg)
        message_text = _safe_message_text(messages[-1]).rstrip()
-        # Tool-trace is purely observability — wrap defensively so a single
-        # malformed frame never bubbles up and kills the whole user turn.
+        # Trace is observability-only; never let a bad frame kill the turn.
        try:
            tool_trace = _build_tool_trace(messages)
        except Exception:
@ -318,10 +314,7 @@ def build_task_tool_with_parent_config(
            tool_trace = []
        tool_msg = ToolMessage(message_text, tool_call_id=tool_call_id)
        if tool_trace:
-            # ``additional_kwargs`` is a free-form dict on BaseMessage; using
-            # a ``surf_`` prefix avoids collision with provider-specific keys
-            # (e.g. Anthropic's ``cache_control``). The LLM doesn't see it;
-            # consumers (UI, observability) read it off the checkpoint.
+            # surf_ prefix avoids collision with provider keys (e.g. cache_control).
            tool_msg.additional_kwargs["surf_tool_trace"] = tool_trace
        return Command(
            update={
@ -353,15 +346,13 @@ def build_task_tool_with_parent_config(
    def _validate_and_prepare_state(
        subagent_type: str, description: str, runtime: ToolRuntime
    ) -> tuple[Runnable, dict]:
-        subagent = subagent_graphs[subagent_type]
+        subagent = resolve_subagent(subagent_type)
        subagent_state = {
            k: v for k, v in runtime.state.items() if k not in EXCLUDED_STATE_KEYS
        }
        hint = _resolve_context_hint(subagent_type, description, runtime)
        if hint:
-            # Prepend as a tagged block so the subagent prompt can pattern-match
-            # on the section (and a future change can lift it into its own
-            # ``SystemMessage`` if needed).
+            # Tagged block so the subagent prompt can pattern-match the section.
            payload = f"<context_hint>\n{hint}\n</context_hint>\n\n{description}"
        else:
            payload = description
@ -372,16 +363,12 @@ def build_task_tool_with_parent_config(
        results: list[tuple[int, str, dict | str, dict | None]],
        runtime: ToolRuntime,
    ) -> Command:
-        """Combine per-child results into one Command with a combined ToolMessage.
+        """Combine per-child results into one Command with an aggregate ToolMessage.

-        ``results`` is a list of ``(task_index, subagent_type,
-        payload_or_error_text, child_state_update)`` tuples — preserving the
-        input order so the orchestrator can map each block back to the task
-        it dispatched. State updates are merged by reducer for keys outside
-        :data:`EXCLUDED_STATE_KEYS`; everything else (``messages``, ``todos``,
-        etc.) is replaced by the synthesized aggregate ToolMessage. Every
-        child also contributes a ``billable_calls`` increment so cost
-        accounting matches single-mode dispatch.
+        ``results`` tuples are ``(task_index, subagent_type, payload_or_error,
+        child_state_update)``; output blocks are sorted by index so the LLM can
+        map them back to dispatch order, and each child contributes a
+        ``billable_calls`` increment to match single-mode accounting.
        """
        results.sort(key=lambda r: r[0])
        merged_state: dict[str, Any] = {}
@ -422,8 +409,8 @@ def build_task_tool_with_parent_config(
                }
            )
            if state_update:
-                # Naive merge: later tasks win on scalar collisions; reducer-backed
-                # fields (``receipts``, ``files`` etc.) accumulate at apply time.
+                # Later tasks win on scalar collisions; reducer-backed fields
+                # accumulate at apply time.
                merged_state.update(state_update)
        aggregate = "\n\n".join(message_blocks)
        aggregate_msg = ToolMessage(
@ -467,15 +454,13 @@ def build_task_tool_with_parent_config(
    ) -> tuple[int, str, dict | str, dict | None]:
        """Run one child of a batched ``task`` call under the concurrency cap.

-        Errors are returned as plain text in slot 2 so a single child's
-        failure does not abort the whole batch. ``GraphInterrupt`` from a
-        batched child is currently treated as a hard failure for that child
-        only — batched HITL is intentionally out of scope for the v1
-        rollout (see plan tier 2 item 4 risks).
+        Errors are returned as text (slot 2) so one child's failure doesn't abort
+        the batch. A child's ``GraphInterrupt`` is a hard failure for that child:
+        batched HITL is intentionally out of scope.
        """
        async with semaphore:
-            if subagent_type not in subagent_graphs:
-                allowed_types = ", ".join([f"`{k}`" for k in subagent_graphs])
+            if subagent_type not in subagent_names:
+                allowed_types = ", ".join([f"`{k}`" for k in subagent_names])
                return (
                    task_index,
                    subagent_type,
@ -505,8 +490,7 @@ def build_task_tool_with_parent_config(
                )
                return (task_index, subagent_type, str(exc), None)
            except GraphInterrupt:
-                # Batched HITL is unsupported in v1 — surface as a failure
-                # for this child so the rest of the batch still completes.
+                # Batched HITL unsupported; fail this child so the batch finishes.
                logger.warning(
                    "Batch child %d (%s) raised GraphInterrupt; batched HITL "
                    "is not supported. Re-dispatch this task as a single "
@ -543,14 +527,11 @@ def build_task_tool_with_parent_config(
            return (task_index, subagent_type, result, child_state_update)

    def _coerce_batch_arg(tasks: Any) -> list[dict] | str:
-        """Rescue common LLM-side malformations of the ``tasks`` argument.
+        """Rescue common LLM malformations of the ``tasks`` argument.

-        Some providers serialise an array argument as a JSON-encoded string,
-        and small models occasionally hand back a single ``{description,
-        subagent_type}`` dict instead of a one-element array. Both are
-        recovered here with a WARN log so the issue is visible in metrics
-        but the user's turn still completes; truly broken shapes return a
-        plain string that the caller surfaces as the tool error.
+        Recovers a JSON-encoded array string and a single dict (instead of a
+        1-element array), logging a WARN. Unrecoverable shapes return a string
+        the caller surfaces as the tool error.
        """
        if isinstance(tasks, list):
            return tasks
@ -585,13 +566,10 @@ def build_task_tool_with_parent_config(
    async def _adispatch_batch(
        tasks: list[dict], runtime: ToolRuntime
    ) -> Command | str:
-        """Fan-out helper for the ``tasks`` array shape.
+        """Fan out the ``tasks`` array (size- and concurrency-capped).

-        Bounded by :data:`MAX_SUBAGENT_BATCH_SIZE` and concurrency-capped
-        at :data:`DEFAULT_SUBAGENT_BATCH_CONCURRENCY`. Returns a single
-        :class:`Command` that the LLM sees as one ToolMessage per child,
-        prefixed with ``[task <index>]`` so it can map back to the input
-        order.
+        Returns one Command; the LLM sees one ``[task <index>]``-prefixed block
+        per child, in input order.
        """
        if not tasks:
            return "tasks: array is empty; nothing to dispatch."
@ -657,8 +635,8 @@ def build_task_tool_with_parent_config(
                "task: must provide either single-mode (`description`+`subagent_type`) "
                "or batch-mode (`tasks`)."
            )
-        if subagent_type not in subagent_graphs:
-            allowed_types = ", ".join([f"`{k}`" for k in subagent_graphs])
+        if subagent_type not in subagent_names:
+            allowed_types = ", ".join([f"`{k}`" for k in subagent_names])
            return (
                f"We cannot invoke subagent {subagent_type} because it does not exist, "
                f"the only allowed types are {allowed_types}"
@ -701,17 +679,16 @@ def build_task_tool_with_parent_config(
        if pending_value is not None:
            resume_value = consume_surfsense_resume(runtime)
            if resume_value is None:
-                # Bridge invariant: a queued resume must accompany any pending
-                # subagent interrupt. Fall-through replay would silently re-prompt
-                # the user; raise so the streaming layer surfaces a clear error.
+                # A pending interrupt must have a queued resume; otherwise replay
+                # would silently re-prompt the user. Raise instead.
                raise RuntimeError(
                    f"Subagent {subagent_type!r} has a pending interrupt but no "
                    "surfsense_resume_value on config; resume bridge is broken."
                )
            expected = hitlrequest_action_count(pending_value)
            resume_value = fan_out_decisions_to_match(resume_value, expected)
-            # Prevent the parent's resume payload from leaking into subagent
-            # interrupts via langgraph's parent_scratchpad fallback.
+            # Stop the parent's resume leaking into subagent interrupts via
+            # langgraph's parent_scratchpad fallback.
            drain_parent_null_resume(runtime)
            with ot.subagent_invoke_span(
                subagent_type=subagent_type, path=invoke_path
@ -827,10 +804,8 @@ def build_task_tool_with_parent_config(
        ] = None,
    ) -> str | Command:
        atask_start = time.perf_counter()
-        # Kill switch: when ops flips the spawn-paused flag for this
-        # workspace, every ``task(...)`` invocation (single- or batch-mode)
-        # short-circuits with a clear ToolMessage so the orchestrator can
-        # tell the user what happened and stop hammering downstream APIs.
+        # Ops kill switch: short-circuit every task() call for this workspace
+        # so the orchestrator stops hammering downstream APIs.
        if await is_spawn_paused(search_space_id):
            logger.warning(
                "[hitl_route] atask SPAWN_PAUSED: search_space_id=%s tool_call_id=%s",
@ -869,8 +844,8 @@ def build_task_tool_with_parent_config(
            subagent_type,
            runtime.tool_call_id,
        )
-        if subagent_type not in subagent_graphs:
-            allowed_types = ", ".join([f"`{k}`" for k in subagent_graphs])
+        if subagent_type not in subagent_names:
+            allowed_types = ", ".join([f"`{k}`" for k in subagent_names])
            return (
                f"We cannot invoke subagent {subagent_type} because it does not exist, "
                f"the only allowed types are {allowed_types}"
@ -921,8 +896,8 @@ def build_task_tool_with_parent_config(
                    )
                expected = hitlrequest_action_count(pending_value)
                resume_value = fan_out_decisions_to_match(resume_value, expected)
-                # Prevent the parent's resume payload from leaking into subagent
-                # interrupts via langgraph's parent_scratchpad fallback.
+                # Stop the parent's resume leaking into subagent interrupts via
+                # langgraph's parent_scratchpad fallback.
                drain_parent_null_resume(runtime)
                with ot.subagent_invoke_span(
                    subagent_type=subagent_type, path=invoke_path
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/context_editing/init.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/context_editing/init.py
@ -0,0 +1,15 @@
+"""Context-editing middleware: spill + clear-tool-uses passes (impl + builder)."""
+
+from .builder import build_context_editing_mw
+from .middleware import (
+    ClearToolUsesEdit,
+    SpillingContextEditingMiddleware,
+    SpillToBackendEdit,
+)
+
+__all__ = [
+    "ClearToolUsesEdit",
+    "SpillToBackendEdit",
+    "SpillingContextEditingMiddleware",
+    "build_context_editing_mw",
+]
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/context_editing/builder.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/context_editing/builder.py
@ -7,18 +7,18 @@ from typing import Any

 from langchain_core.tools import BaseTool

-from app.agents.multi_agent_chat.main_agent.context_prune.prune_tool_names import (
+from app.agents.chat.multi_agent_chat.main_agent.context_prune.prune_tool_names import (
    safe_exclude_tools,
 )
-from app.agents.new_chat.feature_flags import AgentFeatureFlags
-from app.agents.new_chat.middleware import (
+from app.agents.chat.multi_agent_chat.shared.feature_flags import AgentFeatureFlags
+from app.agents.chat.multi_agent_chat.shared.middleware.flags import enabled
+
+from .middleware import (
    ClearToolUsesEdit,
    SpillingContextEditingMiddleware,
    SpillToBackendEdit,
 )

-from ..shared.flags import enabled
-

 def build_context_editing_mw(
    *,
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/context_editing/middleware.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/context_editing/middleware.py
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/dedup_hitl.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/dedup_hitl.py
@ -1,4 +1,4 @@
-"""Middleware that deduplicates HITL tool calls within a single LLM response.
+"""Drop duplicate HITL tool calls before execution.

 When the LLM emits multiple calls to the same HITL tool with the same
 primary argument (e.g. two ``delete_calendar_event("Doctor Appointment")``),
@ -9,72 +9,33 @@ the duplicate call is stripped from the AIMessage that gets checkpointed.
 That means it is also safe across LangGraph ``interrupt()`` boundaries:
 the removed call will never appear on graph resume.

-Dedup-key resolution order:
+Dedup-key resolution order (read from each tool's own ``metadata``):

-1. :class:`ToolDefinition.dedup_key` — callable provided by the registry
-   entry. This is the canonical mechanism.
-2. ``tool.metadata["hitl_dedup_key"]`` — string with a primary arg name;
-   used by MCP / Composio tools whose schemas the registry doesn't see.
+1. ``tool.metadata["dedup_key"]`` — callable mapping the args dict to a
+   stable signature string. This is the canonical mechanism.
+2. ``tool.metadata["hitl_dedup_key"]`` — string naming a primary arg;
+   used by MCP / Composio tools that only expose a single key field.

 A tool with no resolver from either path simply opts out of dedup.
 """

 from __future__ import annotations

-import json
 import logging
-from collections.abc import Callable
+from collections.abc import Sequence
 from typing import Any

 from langchain.agents.middleware import AgentMiddleware, AgentState
+from langchain_core.tools import BaseTool
 from langgraph.runtime import Runtime

+from app.agents.chat.multi_agent_chat.shared.middleware.dedup_tool_calls import (
+    DedupResolver,
+    wrap_dedup_key_by_arg_name,
+)
+
 logger = logging.getLogger(__name__)

-# Resolver type — given the tool ``args`` dict returns a stable
-# string used to dedupe consecutive calls. ``None`` means no dedup.
-DedupResolver = Callable[[dict[str, Any]], str]
-
-
-def wrap_dedup_key_by_arg_name(arg_name: str) -> DedupResolver:
-    """Adapt a string-arg name into a :data:`DedupResolver`.
-
-    Convenience helper used by registry entries that just want to dedupe
-    on a single arg's lowercased value (the most common case for native
-    HITL tools like ``send_gmail_email`` keyed on ``subject``).
-
-    Example::
-
-        ToolDefinition(
-            name="send_gmail_email",
-            ...,
-            dedup_key=wrap_dedup_key_by_arg_name("subject"),
-        )
-    """
-
-    def _resolver(args: dict[str, Any]) -> str:
-        return str(args.get(arg_name, "")).lower()
-
-    return _resolver
-
-
-def dedup_key_full_args(args: dict[str, Any]) -> str:
-    """Resolver that collapses calls only when **every** argument is identical.
-
-    Safe default for tools where no single field uniquely identifies a call
-    (e.g. MCP tools whose first required field is a shared workspace id).
-    """
-
-    try:
-        return json.dumps(args, sort_keys=True, default=str)
-    except (TypeError, ValueError):
-        return repr(sorted(args.items())) if isinstance(args, dict) else repr(args)
-
-
-# Backwards-compatible alias for code that imported the original
-# private name. New callers should use :func:`wrap_dedup_key_by_arg_name`.
-_wrap_string_key = wrap_dedup_key_by_arg_name
-

 class DedupHITLToolCallsMiddleware(AgentMiddleware):  # type: ignore[type-arg]
    """Remove duplicate HITL tool calls from a single LLM response.
@ -84,9 +45,8 @@ class DedupHITLToolCallsMiddleware(AgentMiddleware):  # type: ignore[type-arg]

    The dedup-resolver map is built from two sources, in priority order:

-    1. ``tool.metadata["dedup_key"]`` — callable provided by the registry's
-       ``ToolDefinition.dedup_key``. Receives the args dict and returns
-       a string signature. This is the canonical mechanism.
+    1. ``tool.metadata["dedup_key"]`` — callable that receives the args dict
+       and returns a string signature. This is the canonical mechanism.
    2. ``tool.metadata["hitl_dedup_key"]`` — string with a primary arg
       name; primarily used by MCP / Composio tools.
    """
@ -162,3 +122,7 @@ class DedupHITLToolCallsMiddleware(AgentMiddleware):  # type: ignore[type-arg]

        updated_msg = last_msg.model_copy(update={"tool_calls": deduped})
        return {"messages": [updated_msg]}
+
+
+def build_dedup_hitl_mw(tools: Sequence[BaseTool]) -> DedupHITLToolCallsMiddleware:
+    return DedupHITLToolCallsMiddleware(agent_tools=list(tools))
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/doom_loop/init.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/doom_loop/init.py
@ -0,0 +1,9 @@
+"""Doom-loop middleware: detect repeated identical tool calls (impl + builder)."""
+
+from .builder import build_doom_loop_mw
+from .middleware import DoomLoopMiddleware
+
+__all__ = [
+    "DoomLoopMiddleware",
+    "build_doom_loop_mw",
+]
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/doom_loop/builder.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/doom_loop/builder.py
@ -2,10 +2,10 @@

 from __future__ import annotations

-from app.agents.new_chat.feature_flags import AgentFeatureFlags
-from app.agents.new_chat.middleware import DoomLoopMiddleware
+from app.agents.chat.multi_agent_chat.shared.feature_flags import AgentFeatureFlags
+from app.agents.chat.multi_agent_chat.shared.middleware.flags import enabled

-from ..shared.flags import enabled
+from .middleware import DoomLoopMiddleware


 def build_doom_loop_mw(flags: AgentFeatureFlags) -> DoomLoopMiddleware | None:
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/doom_loop/middleware.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/doom_loop/middleware.py
@ -16,7 +16,7 @@ This ships **OFF by default** until the frontend explicitly handles
 ``context.permission == "doom_loop"`` interrupts.

 Wire format: uses SurfSense's existing ``interrupt()`` payload shape
-(see ``app/agents/new_chat/tools/hitl.py``):
+(see ``app/agents/shared/tools/hitl.py``):

    {
        "type": "permission_ask",
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/kb_persistence/init.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/kb_persistence/init.py
@ -0,0 +1,13 @@
+"""End-of-turn KB persistence middleware (main-agent only)."""
+
+from .builder import build_kb_persistence_mw
+from .middleware import (
+    KnowledgeBasePersistenceMiddleware,
+    commit_staged_filesystem_state,
+)
+
+__all__ = [
+    "KnowledgeBasePersistenceMiddleware",
+    "build_kb_persistence_mw",
+    "commit_staged_filesystem_state",
+]
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/kb_persistence/builder.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/kb_persistence/builder.py
@ -2,8 +2,11 @@

 from __future__ import annotations

-from app.agents.new_chat.filesystem_selection import FilesystemMode
-from app.agents.new_chat.middleware import KnowledgeBasePersistenceMiddleware
+from app.agents.chat.multi_agent_chat.shared.filesystem_selection import FilesystemMode
+
+from .middleware import (
+    KnowledgeBasePersistenceMiddleware,
+)


 def build_kb_persistence_mw(
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/kb_persistence/middleware.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/kb_persistence/middleware.py
@ -1,33 +1,19 @@
 """End-of-turn persistence for the cloud-mode SurfSense filesystem.

-This middleware runs ``aafter_agent`` once per turn (cloud only). It commits
-all staged folder creations, file moves, content writes/edits, file deletes
-(``rm``), and directory deletes (``rmdir``) to Postgres in a single ordered
-pass:
+Runs ``aafter_agent`` once per turn (cloud only), committing staged folder
+creates, moves, writes/edits, and ``rm``/``rmdir`` to Postgres in one ordered
+pass. Order matters: moves resolve before writes (so write-then-move lands at
+the final path), and file deletes run before directory deletes (so a same-turn
+``rm /a/x.md`` + ``rmdir /a`` works).

-1. Materialize ``staged_dirs`` into ``Folder`` rows.
-2. Apply ``pending_moves`` in order (chained moves resolved via
-   ``doc_id_by_path``).
-3. Normalize ``dirty_paths`` through ``pending_moves`` so write-then-move
-   sequences commit at the final path. Paths queued for ``rm`` this turn
-   are dropped here so a write+rm sequence doesn't recreate the doc.
-4. Commit content writes / edits for ``/documents/*`` paths, skipping
-   ``temp_*`` basenames.
-5. Apply ``pending_deletes`` (``rm``) — file deletes run BEFORE directory
-   deletes so a same-turn ``rm /a/x.md`` + ``rmdir /a`` sequence works.
-6. Apply ``pending_dir_deletes`` (``rmdir``); re-verifies emptiness against
-   the post-step-5 DB state.
+When ``flags.enable_action_log`` is on, each destructive op also snapshots a
+``DocumentRevision`` / ``FolderRevision`` for revert. For ``rm``/``rmdir`` the
+snapshot and DELETE share a SAVEPOINT, so a failed snapshot aborts the delete
+rather than making the data silently irreversible.

-When ``flags.enable_action_log`` is on every destructive op also writes a
-``DocumentRevision`` / ``FolderRevision`` snapshot bound to the
-originating ``AgentActionLog`` row via ``tool_call_id``. ``rm``/``rmdir``
-share a single ``SAVEPOINT`` with their snapshot — if the snapshot fails
-the DELETE rolls back and we surface the error rather than silently
-making the data irreversible.
-
-The commit body is exposed as a free function ``commit_staged_filesystem_state``
-so the optional stream-task fallback (``stream_new_chat.py``) can call the
-exact same routine when ``aafter_agent`` was skipped (e.g. client disconnect).
+The commit body is a free function (``commit_staged_filesystem_state``) so the
+stream-task fallback can run the identical routine when ``aafter_agent`` was
+skipped (e.g. client disconnect).
 """

 from __future__ import annotations
@ -40,22 +26,28 @@ from typing import Any
 from fractional_indexing import generate_key_between
 from langchain.agents.middleware import AgentMiddleware, AgentState
 from langchain_core.callbacks import adispatch_custom_event, dispatch_custom_event
+from langgraph.config import get_config
 from langgraph.runtime import Runtime
 from sqlalchemy import delete, select, update
 from sqlalchemy.exc import IntegrityError
 from sqlalchemy.ext.asyncio import AsyncSession

-from app.agents.new_chat.feature_flags import get_flags
-from app.agents.new_chat.filesystem_selection import FilesystemMode
-from app.agents.new_chat.filesystem_state import SurfSenseFilesystemState
-from app.agents.new_chat.path_resolver import (
+from app.agents.chat.multi_agent_chat.shared.feature_flags import get_flags
+from app.agents.chat.multi_agent_chat.shared.filesystem_selection import FilesystemMode
+from app.agents.chat.multi_agent_chat.shared.receipts.receipt import (
+    Receipt,
+    make_receipt,
+)
+from app.agents.chat.multi_agent_chat.shared.state.filesystem_state import (
+    SurfSenseFilesystemState,
+)
+from app.agents.chat.multi_agent_chat.shared.state.reducers import _CLEAR
+from app.agents.chat.runtime.path_resolver import (
    DOCUMENTS_ROOT,
    parse_documents_path,
    safe_folder_segment,
    virtual_path_to_doc,
 )
-from app.agents.new_chat.state_reducers import _CLEAR
-from app.agents.shared.receipt import Receipt, make_receipt
 from app.db import (
    AgentActionLog,
    Chunk,
@ -211,11 +203,9 @@ async def _create_document(
        virtual_path,
        search_space_id,
    )
-    # Filesystem-parity invariant: the only thing that *must* be unique is
-    # the path. Two notes can legitimately share content (e.g. ``cp a b``).
-    # Guard against the path-derived ``unique_identifier_hash`` constraint
-    # so we surface a clean ValueError instead of letting the INSERT poison
-    # the session with an IntegrityError.
+    # Pre-check the path-derived unique_identifier_hash so a duplicate path
+    # surfaces as a clean ValueError instead of an INSERT IntegrityError that
+    # poisons the session. Content is intentionally not unique (cp a b).
    path_collision = await session.execute(
        select(Document.id).where(
            Document.search_space_id == search_space_id,
@ -227,13 +217,6 @@ async def _create_document(
            f"a document already exists at path '{virtual_path}' "
            "(unique_identifier_hash collision)"
        )
-    # ``content_hash`` is intentionally NOT checked for uniqueness here.
-    # In a real filesystem two files at different paths can hold identical
-    # bytes, and the agent's ``write_file`` path needs that semantic to
-    # support copy/duplicate operations. The hash remains useful as a
-    # change-detection hint for connector indexers, which still consult it
-    # via :func:`check_duplicate_document` but do so with a non-unique
-    # lookup (``.first()``).
    content_hash = generate_content_hash(content, search_space_id)
    doc = Document(
        title=title,
@ -430,15 +413,9 @@ async def _mark_action_reversible(
 ) -> None:
    """Flip ``agent_action_log.reversible = TRUE`` for ``action_id``.

-    Best-effort: caller may invoke from inside a SAVEPOINT and treat
-    failure as a soft demotion (snapshot persists, just no Revert button).
-
-    Callers should also call ``_dispatch_reversibility_update`` (defined
-    below) AFTER the enclosing SAVEPOINT block exits successfully so the
-    chat tool card can light up its Revert button without
-    re-fetching ``GET /threads/.../actions``. Dispatching from inside the
-    SAVEPOINT would risk emitting "reversible=true" for rows whose
-    update gets rolled back if the surrounding destructive op fails.
+    Pair with ``_dispatch_reversibility_update`` *after* the enclosing
+    SAVEPOINT commits, so the UI never sees ``reversible=true`` for a row whose
+    update later rolls back.
    """
    if action_id is None:
        return
@ -450,22 +427,11 @@ async def _mark_action_reversible(


 async def _dispatch_reversibility_update(action_id: int | None) -> None:
-    """Best-effort dispatch of an ``action_log_updated`` custom event.
+    """Emit an ``action_log_updated`` SSE event so the Revert button lights up.

-    Surfaces the post-SAVEPOINT reversibility flip to the SSE layer so
-    the chat tool card can flip its Revert button live. Defensive:
-    failures are logged at debug level and swallowed; the
-    REST endpoint ``GET /threads/.../actions`` is still authoritative.
-
-    .. warning::
-        Inside :func:`commit_staged_filesystem_state` we DEFER all
-        dispatches until the outer ``session.commit()`` succeeds — see
-        the ``deferred_dispatches`` queue in that function. Dispatching
-        from inside a SAVEPOINT block while the outer transaction is
-        still pending would emit ``reversible=true`` for rows whose
-        snapshots get rolled back if the outer commit fails. Direct
-        callers (e.g. the optional stream-task fallback) that own the
-        full session lifetime can still call this helper inline.
+    Best-effort (failures swallowed; the REST actions endpoint is
+    authoritative). Inside :func:`commit_staged_filesystem_state` this is
+    deferred until after the outer commit via ``deferred_dispatches``.
    """
    if action_id is None:
        return
@ -484,12 +450,9 @@ async def _dispatch_reversibility_update(action_id: int | None) -> None:
 # ---------------------------------------------------------------------------
 # Snapshot helpers
 # ---------------------------------------------------------------------------
-#
-# Best-effort helpers swallow + log so a snapshot failure can never break
-# the destructive op for non-destructive tools (write/edit/move/mkdir).
-# Strict helpers run inside the SAME ``begin_nested()`` SAVEPOINT as the
-# destructive DELETE — failure aborts the savepoint and leaves the doc /
-# folder intact, so revertable ops never become irreversible silently.
+# Best-effort variants (write/edit/move/mkdir) swallow failures. Strict
+# variants (rm/rmdir) share the destructive op's SAVEPOINT so a snapshot
+# failure aborts the delete instead of making it silently irreversible.


 def _doc_revision_payload(
@ -699,15 +662,9 @@ async def commit_staged_filesystem_state(
 ) -> dict[str, Any] | None:
    """Commit all staged filesystem changes; return the state delta for reducers.

-    Shared between :class:`KnowledgeBasePersistenceMiddleware.aafter_agent`
-    and the optional stream-task fallback.
-
-    When ``flags.enable_action_log`` is on every destructive op also writes
-    a ``DocumentRevision`` / ``FolderRevision`` snapshot bound to the
-    originating ``AgentActionLog`` row via ``tool_call_id``. Snapshot
-    durability is best-effort for non-destructive ops and STRICT for
-    ``rm``/``rmdir`` (snapshot + DELETE share a SAVEPOINT — snapshot
-    failure aborts the delete).
+    Shared between :class:`KnowledgeBasePersistenceMiddleware.aafter_agent` and
+    the stream-task fallback. See the module docstring for ordering and the
+    action-log snapshot/revert semantics.
    """
    if filesystem_mode != FilesystemMode.CLOUD:
        return None
@ -766,8 +723,7 @@ async def commit_staged_filesystem_state(
    flags = get_flags()
    snapshot_enabled = flags.enable_action_log

-    # De-duplicate pending deletes per-path while preserving the latest
-    # tool_call_id (the one the user is most likely to revert via the UI).
+    # De-dup deletes per-path, keeping the latest tool_call_id (likeliest revert).
    file_delete_paths: dict[str, str] = {}
    for entry in pending_deletes:
        if not isinstance(entry, dict):
@ -791,22 +747,14 @@ async def commit_staged_filesystem_state(
    applied_moves: list[dict[str, Any]] = []
    doc_id_path_tombstones: dict[str, int | None] = {}
    tree_changed = False
-    # Reversibility-flip dispatches are deferred until AFTER the outer
-    # ``session.commit()`` succeeds. Dispatching from inside the
-    # SAVEPOINT chain while the outer transaction is still pending
-    # would emit ``reversible=true`` for rows whose snapshots get rolled
-    # back if the final commit raises. Snapshot helpers append on
-    # success; we drain this list after commit and silently abandon it
-    # on rollback so the UI stays consistent with durable state.
+    # Reversibility-flip dispatches are drained only after the outer commit
+    # succeeds (and abandoned on rollback), so the UI never sees reversible=true
+    # for a snapshot that didn't durably land.
    deferred_dispatches: list[int] = []

    try:
        async with shielded_async_session() as session:
-            # ------------------------------------------------------------------
-            # Resolve action-id bindings up front. One SELECT per turn for all
-            # tool_call_ids, NOT one per op — important because a turn that
-            # touches 50 paths would otherwise issue 50 lookups.
-            # ------------------------------------------------------------------
+            # Resolve all action-id bindings in one SELECT per turn, not per op.
            action_id_by_call: dict[str, int] = {}
            if snapshot_enabled and thread_id is not None:
                tool_call_ids: set[str] = set()
@ -839,10 +787,7 @@ async def commit_staged_filesystem_state(
                next(iter(action_id_by_call), None) if action_id_by_call else None
            )

-            # ------------------------------------------------------------------
-            # 1. staged_dirs -> Folder rows. Snapshot post-flush so the new
-            # folder_id is available for the FK.
-            # ------------------------------------------------------------------
+            # 1. staged_dirs -> Folder rows (snapshot post-flush for the FK).
            for folder_path in staged_dirs:
                if not isinstance(folder_path, str):
                    continue
@ -863,7 +808,6 @@ async def commit_staged_filesystem_state(
                    tcid = staged_dir_tool_calls.get(folder_path)
                    action_id = _action_id_for(tcid)
                    if action_id is not None:
-                        # Re-read the folder for the snapshot.
                        result = await session.execute(
                            select(Folder).where(Folder.id == folder_id)
                        )
@ -878,16 +822,13 @@ async def commit_staged_filesystem_state(
                                deferred_dispatches=deferred_dispatches,
                            )

-            # ------------------------------------------------------------------
-            # 2. pending_moves. Snapshot pre-move (in-place restore on revert).
-            # ------------------------------------------------------------------
+            # 2. pending_moves (snapshot pre-move for in-place restore on revert).
            for move in pending_moves:
                source = str(move.get("source") or "")
                if snapshot_enabled and source:
                    tcid = str(move.get("tool_call_id") or "")
                    action_id = _action_id_for(tcid)
                    if action_id is not None:
-                        # Resolve the doc to snapshot BEFORE we mutate it.
                        doc_id_pre = doc_id_by_path.get(source)
                        document_pre: Document | None = None
                        if doc_id_pre is not None:
@ -937,10 +878,8 @@ async def commit_staged_filesystem_state(
                    path = move_alias[path]
                return path

-            # ------------------------------------------------------------------
-            # 3. dirty_paths -> writes/edits. Skip any path queued for ``rm``
-            # this turn so a write+rm sequence doesn't recreate the doc.
-            # ------------------------------------------------------------------
+            # 3. dirty_paths -> writes/edits. Paths queued for rm this turn are
+            # skipped so a write+rm sequence doesn't recreate the doc.
            kb_dirty_seen: set[str] = set()
            kb_dirty: list[str] = []
            kb_dirty_origin: dict[str, str] = {}
@ -969,9 +908,7 @@ async def commit_staged_filesystem_state(
                    continue
                content = "\n".join(file_data.get("content") or [])
                doc_id = doc_id_by_path.get(path)
-                # Path ↔ tool_call_id binding: the dirty_paths list dedupes via
-                # _add_unique_reducer, so we look up the latest tool_call_id by
-                # path (or by the un-renamed origin).
+                # Look up tool_call_id by final path or its pre-rename origin.
                origin = kb_dirty_origin.get(path, path)
                tcid = dirty_path_tool_calls.get(path) or dirty_path_tool_calls.get(
                    origin
@ -979,12 +916,9 @@ async def commit_staged_filesystem_state(
                action_id = _action_id_for(tcid)

                if doc_id is None:
-                    # The in-memory ``doc_id_by_path`` is per-thread and starts
-                    # empty in every new chat. If the agent writes to a path
-                    # that already exists in the DB (e.g. a previous chat's
-                    # ``notes.md``), we must NOT try to INSERT — it would hit
-                    # ``unique_identifier_hash`` (path-derived). Look up the
-                    # existing doc and update it in place instead.
+                    # doc_id_by_path is per-thread and empty in a new chat, so a
+                    # write to a path already in the DB must update in place, not
+                    # INSERT (which would hit the path-derived unique hash).
                    existing = await virtual_path_to_doc(
                        session,
                        search_space_id=search_space_id,
@ -1033,12 +967,9 @@ async def commit_staged_filesystem_state(
                            }
                        )
                else:
-                    # Fresh create. Wrap each create in a SAVEPOINT so a
-                    # residual ``IntegrityError`` (e.g. a deployment that
-                    # hasn't run migration 133 yet, where
-                    # ``documents.content_hash`` still carries its legacy
-                    # global UNIQUE constraint) rolls back only this one
-                    # create instead of poisoning the whole turn.
+                    # Fresh create, wrapped in a SAVEPOINT so a residual
+                    # IntegrityError (e.g. pre-migration-133 content_hash UNIQUE)
+                    # rolls back only this create, not the whole turn.
                    placeholder_revision_id: int | None = None
                    if snapshot_enabled and action_id is not None:
                        placeholder_revision_id = await _snapshot_document_pre_create(
@ -1061,8 +992,7 @@ async def commit_staged_filesystem_state(
                        logger.warning(
                            "kb_persistence: skipping %s create: %s", path, exc
                        )
-                        # Roll back the placeholder revision since the create
-                        # never happened.
+                        # Create never happened; drop its placeholder revision.
                        if placeholder_revision_id is not None:
                            await session.execute(
                                delete(DocumentRevision).where(
@ -1109,19 +1039,14 @@ async def commit_staged_filesystem_state(
                    )
                    tree_changed = True

-            # ------------------------------------------------------------------
-            # 4. pending_deletes -> ``rm``. STRICT durability: snapshot + DELETE
-            # share a SAVEPOINT. If the snapshot insert fails, the DELETE
-            # rolls back too and we surface the error rather than silently
-            # making the data irreversible.
-            # ------------------------------------------------------------------
+            # 4. pending_deletes -> rm. Strict: snapshot + DELETE share a
+            # SAVEPOINT, so a failed snapshot rolls the delete back too.
            for raw_path, tcid in file_delete_paths.items():
                final = _final_path(raw_path)
                if not final.startswith(DOCUMENTS_ROOT + "/"):
                    continue
                action_id = _action_id_for(tcid)

-                # Resolve the doc.
                doc_id_for_delete = doc_id_by_path.get(final)
                document_to_delete: Document | None = None
                if doc_id_for_delete is not None:
@ -1150,7 +1075,6 @@ async def commit_staged_filesystem_state(

                try:
                    async with session.begin_nested():
-                        # Strict: snapshot first; failure aborts the delete.
                        if snapshot_enabled and action_id is not None:
                            chunks = await _load_chunks_for_snapshot(
                                session, doc_id=doc_pk
@ -1179,10 +1103,7 @@ async def commit_staged_filesystem_state(
                    )
                    continue

-                # B1 — SAVEPOINT released. Defer the reversibility-flip
-                # dispatch until AFTER the outer commit succeeds so we
-                # never tell the UI a row is reversible if its snapshot
-                # gets rolled back.
+                # Defer the reversibility flip until after the outer commit.
                if snapshot_enabled and action_id is not None:
                    deferred_dispatches.append(int(action_id))

@ -1201,11 +1122,8 @@ async def commit_staged_filesystem_state(
                )
                tree_changed = True

-            # ------------------------------------------------------------------
-            # 5. pending_dir_deletes -> ``rmdir``. STRICT durability + final
-            # emptiness check (after step 4's deletes have run, an "empty
-            # mid-turn" directory really IS empty in DB now).
-            # ------------------------------------------------------------------
+            # 5. pending_dir_deletes -> rmdir. Strict, and re-checks emptiness
+            # against post-step-4 DB state.
            for raw_path, tcid in dir_delete_paths.items():
                final = _final_path(raw_path)
                if not final.startswith(DOCUMENTS_ROOT + "/"):
@ -1226,7 +1144,6 @@ async def commit_staged_filesystem_state(
                    )
                    continue

-                # Re-check emptiness against in-DB state.
                docs_in_folder = await session.execute(
                    select(Document.id)
                    .where(Document.folder_id == folder_id)
@ -1291,10 +1208,7 @@ async def commit_staged_filesystem_state(
                    )
                    continue

-                # B1 — SAVEPOINT released. Defer the reversibility-flip
-                # dispatch until AFTER the outer commit succeeds so we
-                # never tell the UI a row is reversible if its snapshot
-                # gets rolled back.
+                # Defer the reversibility flip until after the outer commit.
                if snapshot_enabled and action_id is not None:
                    deferred_dispatches.append(int(action_id))

@ -1314,18 +1228,13 @@ async def commit_staged_filesystem_state(
        logger.exception(
            "kb_persistence: commit failed (search_space=%s)", search_space_id
        )
-        # Outer commit raised — every SAVEPOINT-released change above
-        # (snapshots + reversibility flips) is now rolled back. Drop
-        # the deferred SSE dispatches so the UI stays consistent with
-        # durable state.
+        # Outer commit raised: everything above rolled back, so drop the
+        # deferred dispatches.
        deferred_dispatches.clear()
        return None

-    # Outer commit succeeded; flush deferred reversibility-flip
-    # dispatches now so the chat tool card can light up its Revert
-    # button without re-fetching ``GET /threads/.../actions``. De-dup
-    # to avoid emitting the same id twice (e.g. write-then-rm in the
-    # same turn dispatches once for each snapshot site).
+    # Commit succeeded; flush deferred reversibility flips (de-duped, since
+    # write-then-rm in one turn appends an id per snapshot site).
    if deferred_dispatches and dispatch_events:
        for action_id in dict.fromkeys(deferred_dispatches):
            try:
@ -1371,9 +1280,8 @@ async def commit_staged_filesystem_state(
        p for p in files if isinstance(p, str) and _basename(p).startswith(_TEMP_PREFIX)
    ]

-    # Tombstone every committed-delete path so a stale ``state["files"]`` entry
-    # (which als_info would otherwise interpret as content) cannot survive into
-    # the next turn and make a now-empty folder look non-empty.
+    # Tombstone committed-delete paths so a stale state["files"] entry can't
+    # survive into the next turn and make a now-empty folder look non-empty.
    deleted_file_paths = [
        str(payload.get("virtualPath") or "")
        for payload in committed_deletes
@ -1394,11 +1302,8 @@ async def commit_staged_filesystem_state(
        "dirty_path_tool_calls": {_CLEAR: True},
    }

-    # Emit one Receipt per committed mutation, folded into ``state['receipts']``
-    # via ``_list_append_reducer``. The receipts surface what actually committed
-    # (post-savepoint) rather than what the LLM intended; the orchestrator uses
-    # them as ground truth in the ``<verification>`` teaching. KB writes do not
-    # have public verifiable URLs, so ``verifiable_url`` stays unset.
+    # One Receipt per committed mutation: ground truth (post-savepoint) for the
+    # orchestrator's <verification> teaching. KB writes have no public URL.
    receipts: list[Receipt] = []

    def _kb_receipt(
@ -1439,8 +1344,6 @@ async def commit_staged_filesystem_state(
            external_id=payload.get("id"),
        )
    for payload in applied_moves:
-        # ``applied_moves`` rows carry the destination ``virtualPath`` because
-        # the move has already landed in the DB by the time we reach this code.
        path = str(payload.get("virtualPath") or "")
        _kb_receipt(
            type="file",
@ -1480,9 +1383,7 @@ async def commit_staged_filesystem_state(
    if tree_changed:
        delta["tree_version"] = int(state_dict.get("tree_version") or 0) + 1

-    # Avoid 'unused' lint when turn_id_for_revision was only useful for
-    # diagnostic purposes inside the SAVEPOINT chain above.
-    _ = turn_id_for_revision
+    _ = turn_id_for_revision  # diagnostic-only; silence unused lint

    logger.info(
        "kb_persistence: commit (search_space=%s) creates=%d updates=%d "
@ -1536,9 +1437,33 @@ class KnowledgeBasePersistenceMiddleware(AgentMiddleware):  # type: ignore[type-
            search_space_id=self.search_space_id,
            created_by_id=self.created_by_id,
            filesystem_mode=self.filesystem_mode,
-            thread_id=self.thread_id,
+            thread_id=self._resolve_thread_id(),
        )

+    def _resolve_thread_id(self) -> int | None:
+        """Resolve the live thread id from the active ``RunnableConfig``.
+
+        ``aafter_agent`` only receives a ``Runtime`` (which does NOT carry the
+        config), so we read ``configurable.thread_id`` via
+        :func:`langgraph.config.get_config` — the same node-context pattern used
+        by ``BusyMutexMiddleware``. Resolving at runtime (rather than using the
+        value captured at ``__init__``) lets one cached compiled graph commit
+        staged writes against the correct thread across many chats. Falls back
+        to the constructor value for legacy/test runtimes.
+        """
+        try:
+            config = get_config()
+        except Exception:
+            config = None
+        if isinstance(config, dict):
+            value = (config.get("configurable") or {}).get("thread_id")
+            if value is not None:
+                try:
+                    return int(value)
+                except (TypeError, ValueError):
+                    return None
+        return self.thread_id
+

 __all__ = [
    "KnowledgeBasePersistenceMiddleware",
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/knowledge_priority.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/knowledge_priority.py
@ -4,8 +4,10 @@ from __future__ import annotations

 from langchain_core.language_models import BaseChatModel

-from app.agents.new_chat.filesystem_selection import FilesystemMode
-from app.agents.new_chat.middleware import KnowledgePriorityMiddleware
+from app.agents.chat.multi_agent_chat.shared.filesystem_selection import FilesystemMode
+from app.agents.chat.multi_agent_chat.shared.middleware.knowledge_search import (
+    KnowledgePriorityMiddleware,
+)
 from app.services.llm_service import get_planner_llm


@ -17,7 +19,16 @@ def build_knowledge_priority_mw(
    available_connectors: list[str] | None,
    available_document_types: list[str] | None,
    mentioned_document_ids: list[int] | None,
+    preinjection_enabled: bool = True,
 ) -> KnowledgePriorityMiddleware:
+    """Build the KB priority middleware.
+
+    When ``preinjection_enabled`` is False (the lazy default), the middleware
+    runs in mentions-only mode: it skips the expensive planner LLM + embedding
+    + hybrid search and only surfaces explicit @-mentions. The main agent is
+    expected to pull relevant KB content on demand via the
+    ``search_knowledge_base`` tool instead.
+    """
    return KnowledgePriorityMiddleware(
        llm=llm,
        planner_llm=get_planner_llm(),
@ -27,4 +38,5 @@ def build_knowledge_priority_mw(
        available_document_types=available_document_types,
        mentioned_document_ids=mentioned_document_ids,
        inject_system_message=False,
+        mentions_only=not preinjection_enabled,
    )
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/knowledge_tree/init.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/knowledge_tree/init.py
@ -0,0 +1,9 @@
+"""Knowledge-tree middleware: <workspace_tree> injection, cloud only (impl + builder)."""
+
+from .builder import build_knowledge_tree_mw
+from .middleware import KnowledgeTreeMiddleware
+
+__all__ = [
+    "KnowledgeTreeMiddleware",
+    "build_knowledge_tree_mw",
+]
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/knowledge_tree/builder.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/knowledge_tree/builder.py
@ -4,8 +4,9 @@ from __future__ import annotations

 from langchain_core.language_models import BaseChatModel

-from app.agents.new_chat.filesystem_selection import FilesystemMode
-from app.agents.new_chat.middleware import KnowledgeTreeMiddleware
+from app.agents.chat.multi_agent_chat.shared.filesystem_selection import FilesystemMode
+
+from .middleware import KnowledgeTreeMiddleware


 def build_knowledge_tree_mw(
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/knowledge_tree/middleware.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/knowledge_tree/middleware.py
@ -33,9 +33,11 @@ from langchain_core.messages import SystemMessage
 from langgraph.runtime import Runtime
 from sqlalchemy import select

-from app.agents.new_chat.filesystem_selection import FilesystemMode
-from app.agents.new_chat.filesystem_state import SurfSenseFilesystemState
-from app.agents.new_chat.path_resolver import (
+from app.agents.chat.multi_agent_chat.shared.filesystem_selection import FilesystemMode
+from app.agents.chat.multi_agent_chat.shared.state.filesystem_state import (
+    SurfSenseFilesystemState,
+)
+from app.agents.chat.runtime.path_resolver import (
    DOCUMENTS_ROOT,
    PathIndex,
    build_path_index,
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/memory/init.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/memory/init.py
@ -0,0 +1,5 @@
+"""User/team memory injection middleware (main-agent only)."""
+
+from .builder import build_memory_mw
+
+__all__ = ["build_memory_mw"]
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/memory/builder.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/memory/builder.py
@ -2,9 +2,10 @@

 from __future__ import annotations

-from app.agents.new_chat.middleware import MemoryInjectionMiddleware
 from app.db import ChatVisibility

+from .middleware import MemoryInjectionMiddleware
+

 def build_memory_mw(
    *,
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/memory/middleware.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/memory/middleware.py
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/noop_injection/init.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/noop_injection/init.py
@ -0,0 +1,9 @@
+"""Noop-injection middleware: provider-compat _noop tool (impl + builder)."""
+
+from .builder import build_noop_injection_mw
+from .middleware import NoopInjectionMiddleware
+
+__all__ = [
+    "NoopInjectionMiddleware",
+    "build_noop_injection_mw",
+]
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/noop_injection/builder.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/noop_injection/builder.py
@ -2,10 +2,10 @@

 from __future__ import annotations

-from app.agents.new_chat.feature_flags import AgentFeatureFlags
-from app.agents.new_chat.middleware import NoopInjectionMiddleware
+from app.agents.chat.multi_agent_chat.shared.feature_flags import AgentFeatureFlags
+from app.agents.chat.multi_agent_chat.shared.middleware.flags import enabled

-from ..shared.flags import enabled
+from .middleware import NoopInjectionMiddleware


 def build_noop_injection_mw(flags: AgentFeatureFlags) -> NoopInjectionMiddleware | None:
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/noop_injection/middleware.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/noop_injection/middleware.py
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/otel_span/init.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/otel_span/init.py
@ -0,0 +1,9 @@
+"""OTel-span middleware: spans on model and tool calls (impl + builder)."""
+
+from .builder import build_otel_mw
+from .middleware import OtelSpanMiddleware
+
+__all__ = [
+    "OtelSpanMiddleware",
+    "build_otel_mw",
+]
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/otel_span/builder.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/otel_span/builder.py
@ -2,10 +2,10 @@

 from __future__ import annotations

-from app.agents.new_chat.feature_flags import AgentFeatureFlags
-from app.agents.new_chat.middleware import OtelSpanMiddleware
+from app.agents.chat.multi_agent_chat.shared.feature_flags import AgentFeatureFlags
+from app.agents.chat.multi_agent_chat.shared.middleware.flags import enabled

-from ..shared.flags import enabled
+from .middleware import OtelSpanMiddleware


 def build_otel_mw(flags: AgentFeatureFlags) -> OtelSpanMiddleware | None:
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/otel_span/middleware.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/otel_span/middleware.py
@ -24,6 +24,7 @@ from langchain.agents.middleware import AgentMiddleware
 from langchain_core.messages import AIMessage, ToolMessage

 from app.observability import metrics as ot_metrics, otel as ot
+from app.utils.perf import get_perf_logger

 if TYPE_CHECKING:  # pragma: no cover — type-only
    from langchain.agents.middleware.types import (
@ -34,6 +35,7 @@ if TYPE_CHECKING:  # pragma: no cover — type-only
    from langgraph.types import Command

 logger = logging.getLogger(__name__)
+_perf_log = get_perf_logger()


 class OtelSpanMiddleware(AgentMiddleware):
@ -60,7 +62,23 @@ class OtelSpanMiddleware(AgentMiddleware):
        handler: Callable[[ModelRequest], Awaitable[ModelResponse | AIMessage | Any]],
    ) -> ModelResponse | AIMessage | Any:
        if not ot.is_enabled():
-            return await handler(request)
+            # Always emit a [PERF] line for the model step even when OTel is
+            # disabled. This isolates provider/model latency from the agent's
+            # pre-flight (before_agent KB-priority/memory/tree) work, which is
+            # the usual culprit when the multi-agent path feels slow to start.
+            # ``perf_counter`` at entry doubles as the "before_agent finished /
+            # model call started" marker on the first step of a turn.
+            model_id, _provider = _resolve_model_attrs(request)
+            _t0 = time.perf_counter()
+            _perf_log.info("[model_call] start model=%s", model_id)
+            try:
+                return await handler(request)
+            finally:
+                _perf_log.info(
+                    "[model_call] done model=%s elapsed=%.3fs",
+                    model_id,
+                    time.perf_counter() - _t0,
+                )

        model_id, provider = _resolve_model_attrs(request)
        t0 = time.perf_counter()
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/plugins.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/plugins.py
@ -7,15 +7,15 @@ from typing import Any

 from langchain_core.language_models import BaseChatModel

-from app.agents.new_chat.feature_flags import AgentFeatureFlags
-from app.agents.new_chat.plugin_loader import (
+from app.agents.chat.multi_agent_chat.shared.feature_flags import AgentFeatureFlags
+from app.agents.chat.multi_agent_chat.shared.middleware.flags import enabled
+from app.db import ChatVisibility
+
+from ..plugins.loader import (
    PluginContext,
    load_allowed_plugin_names_from_env,
    load_plugin_middlewares,
 )
-from app.db import ChatVisibility
-
-from ..shared.flags import enabled


 def build_plugin_middlewares(
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/skills.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/skills.py
@ -6,14 +6,11 @@ import logging

 from deepagents.middleware.skills import SkillsMiddleware

-from app.agents.new_chat.feature_flags import AgentFeatureFlags
-from app.agents.new_chat.filesystem_selection import FilesystemMode
-from app.agents.new_chat.middleware import (
-    build_skills_backend_factory,
-    default_skills_sources,
-)
+from app.agents.chat.multi_agent_chat.shared.feature_flags import AgentFeatureFlags
+from app.agents.chat.multi_agent_chat.shared.filesystem_selection import FilesystemMode
+from app.agents.chat.multi_agent_chat.shared.middleware.flags import enabled

-from ..shared.flags import enabled
+from ..skills.backends import build_skills_backend_factory, default_skills_sources


 def build_skills_mw(
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/stack.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/stack.py
@ -0,0 +1,314 @@
+"""Main-agent middleware list assembly: one line per slot.
+
+The main agent is a pure router — filesystem reads/writes are owned by the
+``knowledge_base`` subagent and delegated via the ``task`` tool. The stack
+here only renders KB context (workspace tree + priority docs), projects it
+into system messages, and commits any subagent-side staged writes at end of
+turn (cloud mode).
+"""
+
+from __future__ import annotations
+
+import logging
+import time
+from collections.abc import Sequence
+from typing import Any, cast
+
+from deepagents import SubAgent
+from deepagents.backends import StateBackend
+from langchain.agents import create_agent
+from langchain_core.language_models import BaseChatModel
+from langchain_core.runnables import Runnable
+from langchain_core.tools import BaseTool
+from langgraph.types import Checkpointer
+
+from app.agents.chat.multi_agent_chat.main_agent.middleware.memory import (
+    build_memory_mw,
+)
+from app.agents.chat.multi_agent_chat.shared.feature_flags import AgentFeatureFlags
+from app.agents.chat.multi_agent_chat.shared.filesystem_selection import FilesystemMode
+from app.agents.chat.multi_agent_chat.shared.middleware.anthropic_cache import (
+    build_anthropic_cache_mw,
+)
+from app.agents.chat.multi_agent_chat.shared.middleware.compaction import (
+    build_compaction_mw,
+)
+from app.agents.chat.multi_agent_chat.shared.middleware.kb_context_projection import (
+    build_kb_context_projection_mw,
+)
+from app.agents.chat.multi_agent_chat.shared.middleware.patch_tool_calls import (
+    build_patch_tool_calls_mw,
+)
+from app.agents.chat.multi_agent_chat.shared.middleware.resilience import (
+    build_resilience_middlewares,
+)
+from app.agents.chat.multi_agent_chat.shared.middleware.todos import build_todos_mw
+from app.agents.chat.multi_agent_chat.shared.permissions import (
+    build_permission_mw,
+)
+from app.agents.chat.multi_agent_chat.subagents import (
+    build_subagents,
+    get_subagents_to_exclude,
+)
+from app.agents.chat.multi_agent_chat.subagents.builtins.knowledge_base.agent import (
+    NAME as KB_WRITE_NAME,
+    READONLY_NAME as KB_READONLY_NAME,
+    build_readonly_subagent as build_kb_readonly_subagent,
+    build_subagent as build_kb_write_subagent,
+)
+from app.agents.chat.multi_agent_chat.subagents.builtins.knowledge_base.ask_knowledge_base_tool import (
+    build_ask_knowledge_base_tool,
+)
+from app.agents.chat.multi_agent_chat.subagents.builtins.knowledge_base.prompts import (
+    load_description as load_kb_write_description,
+)
+from app.agents.chat.multi_agent_chat.subagents.middleware_stack import (
+    build_subagent_middleware_stack,
+)
+from app.agents.chat.multi_agent_chat.subagents.shared.spec import (
+    SURF_LAZY_SPEC_FACTORY_KEY,
+)
+from app.db import ChatVisibility
+from app.utils.perf import get_perf_logger
+
+from .action_log import build_action_log_mw
+from .anonymous_document import build_anonymous_doc_mw
+from .busy_mutex import build_busy_mutex_mw
+from .checkpointed_subagent_middleware import (
+    SurfSenseCheckpointedSubAgentMiddleware,
+)
+from .checkpointed_subagent_middleware.task_description import (
+    TASK_TOOL_DESCRIPTION,
+)
+from .context_editing import build_context_editing_mw
+from .dedup_hitl import build_dedup_hitl_mw
+from .doom_loop import build_doom_loop_mw
+from .kb_persistence import build_kb_persistence_mw
+from .knowledge_priority import build_knowledge_priority_mw
+from .knowledge_tree import build_knowledge_tree_mw
+from .noop_injection import build_noop_injection_mw
+from .otel_span import build_otel_mw
+from .plugins import build_plugin_middlewares
+from .skills import build_skills_mw
+from .tool_call_repair import build_repair_mw
+
+_perf_log = get_perf_logger()
+
+
+def build_main_agent_deepagent_middleware(
+    *,
+    llm: BaseChatModel,
+    tools: Sequence[BaseTool],
+    backend_resolver: Any,
+    filesystem_mode: FilesystemMode,
+    search_space_id: int,
+    user_id: str | None,
+    thread_id: int | None,
+    visibility: ChatVisibility,
+    anon_session_id: str | None,
+    available_connectors: list[str] | None,
+    available_document_types: list[str] | None,
+    mentioned_document_ids: list[int] | None,
+    max_input_tokens: int | None,
+    flags: AgentFeatureFlags,
+    subagent_dependencies: dict[str, Any],
+    checkpointer: Checkpointer,
+    mcp_tools_by_agent: dict[str, list[BaseTool]] | None = None,
+    disabled_tools: list[str] | None = None,
+) -> list[Any]:
+    """Ordered middleware for ``create_agent`` (None entries already stripped)."""
+    stack_build_start = time.perf_counter()
+    resilience = build_resilience_middlewares(flags)
+
+    memory_mw = build_memory_mw(
+        user_id=user_id,
+        search_space_id=search_space_id,
+        visibility=visibility,
+    )
+
+    subagent_dependencies = {
+        **subagent_dependencies,
+        "backend_resolver": backend_resolver,
+        "filesystem_mode": filesystem_mode,
+        "flags": flags,
+    }
+    shared_mw_start = time.perf_counter()
+    shared_subagent_middleware = build_subagent_middleware_stack(
+        resilience=resilience,
+        flags=flags,
+    )
+    shared_mw_elapsed = time.perf_counter() - shared_mw_start
+
+    def _compile_kb_readonly() -> Runnable:
+        """Build *and* compile the read-only KB graph on first ``ask_knowledge_base`` use.
+
+        Both the spec build (``build_kb_readonly_subagent`` — middleware +
+        tool-schema construction, ~the same cost as one regular subagent) and
+        the ``create_agent`` compile are deferred here (memoized by
+        ``build_ask_knowledge_base_tool``) so neither is paid on the cold
+        agent-build / TTFT path; most first turns never call a subagent.
+        """
+        build_start = time.perf_counter()
+        kb_readonly_spec = build_kb_readonly_subagent(
+            dependencies=subagent_dependencies,
+            model=llm,
+            middleware_stack=shared_subagent_middleware,
+        ).spec
+        runnable = create_agent(
+            llm,
+            system_prompt=kb_readonly_spec["system_prompt"],
+            tools=kb_readonly_spec["tools"],
+            middleware=kb_readonly_spec["middleware"],
+            name=KB_READONLY_NAME,
+            checkpointer=checkpointer,
+        )
+        _perf_log.info(
+            "[subagent_compile_lazy] name=%s (spec+compile) in %.3fs",
+            KB_READONLY_NAME,
+            time.perf_counter() - build_start,
+        )
+        return runnable
+
+    ask_kb_tool = build_ask_knowledge_base_tool(_compile_kb_readonly)
+
+    def _build_kb_write_spec() -> dict[str, Any]:
+        """Build the *write* knowledge_base subagent spec on first ``task`` use.
+
+        The KB filesystem middleware builds ~13 tool schemas at ~150ms each
+        (~2s total), all of which used to land on the cold agent-build / TTFT
+        path even though ``task("knowledge_base")`` is essentially never the
+        first thing a turn does. Deferring the whole spec build here (memoized
+        by the checkpointed subagent middleware) moves that cost to the first
+        actual KB-write delegation. Captures the same ``subagent_dependencies``
+        the eager build would have used, so cross-thread cache behaviour is
+        unchanged.
+        """
+        spec = build_kb_write_subagent(
+            dependencies=subagent_dependencies,
+            model=llm,
+            middleware_stack=shared_subagent_middleware,
+        ).spec
+        if disabled_tools:
+            disabled = frozenset(disabled_tools)
+            tools = spec.get("tools")  # type: ignore[typeddict-item]
+            if isinstance(tools, list):
+                spec["tools"] = [  # type: ignore[typeddict-unknown-key]
+                    t for t in tools if getattr(t, "name", None) not in disabled
+                ]
+        return cast(dict[str, Any], spec)
+
+    subagents_start = time.perf_counter()
+    # The write knowledge_base subagent is excluded from the eager build and
+    # registered as a lazy descriptor (name + description cheap; spec built on
+    # first ``task("knowledge_base")`` use) — see ``_build_kb_write_spec``.
+    exclude_names = [*get_subagents_to_exclude(available_connectors), KB_WRITE_NAME]
+    subagents: list[SubAgent] = build_subagents(
+        dependencies=subagent_dependencies,
+        model=llm,
+        middleware_stack=shared_subagent_middleware,
+        mcp_tools_by_agent=mcp_tools_by_agent or {},
+        exclude=exclude_names,
+        disabled_tools=disabled_tools,
+        ask_kb_tool=ask_kb_tool,
+    )
+    kb_write_descriptor = cast(
+        SubAgent,
+        {
+            "name": KB_WRITE_NAME,
+            "description": load_kb_write_description(),
+            SURF_LAZY_SPEC_FACTORY_KEY: _build_kb_write_spec,
+        },
+    )
+    subagents.append(kb_write_descriptor)
+    subagents_elapsed = time.perf_counter() - subagents_start
+    logging.debug("Subagents registry: %s", [s["name"] for s in subagents])
+
+    assembly_start = time.perf_counter()
+    stack: list[Any] = [
+        build_busy_mutex_mw(flags),
+        build_otel_mw(flags),
+        build_todos_mw(system_prompt=""),
+        memory_mw,
+        build_anonymous_doc_mw(
+            filesystem_mode=filesystem_mode, anon_session_id=anon_session_id
+        ),
+        build_knowledge_tree_mw(
+            filesystem_mode=filesystem_mode,
+            search_space_id=search_space_id,
+            llm=llm,
+        ),
+        build_knowledge_priority_mw(
+            llm=llm,
+            search_space_id=search_space_id,
+            filesystem_mode=filesystem_mode,
+            available_connectors=available_connectors,
+            available_document_types=available_document_types,
+            mentioned_document_ids=mentioned_document_ids,
+            preinjection_enabled=flags.enable_kb_priority_preinjection,
+        ),
+        build_kb_context_projection_mw(),
+        build_kb_persistence_mw(
+            filesystem_mode=filesystem_mode,
+            search_space_id=search_space_id,
+            user_id=user_id,
+            thread_id=thread_id,
+        ),
+        build_skills_mw(
+            flags=flags,
+            filesystem_mode=filesystem_mode,
+            search_space_id=search_space_id,
+        ),
+        SurfSenseCheckpointedSubAgentMiddleware(
+            checkpointer=checkpointer,
+            backend=StateBackend,
+            subagents=subagents,
+            system_prompt=None,
+            task_description=TASK_TOOL_DESCRIPTION,
+            search_space_id=search_space_id,
+        ),
+        resilience.model_call_limit,
+        resilience.tool_call_limit,
+        build_context_editing_mw(
+            flags=flags,
+            max_input_tokens=max_input_tokens,
+            tools=tools,
+            backend_resolver=backend_resolver,
+        ),
+        build_compaction_mw(llm),
+        build_noop_injection_mw(flags),
+        resilience.retry,
+        resilience.fallback,
+        build_repair_mw(flags=flags, tools=tools),
+        build_permission_mw(flags=flags),
+        build_doom_loop_mw(flags),
+        build_action_log_mw(
+            flags=flags,
+            thread_id=thread_id,
+            search_space_id=search_space_id,
+            user_id=user_id,
+        ),
+        build_patch_tool_calls_mw(),
+        build_dedup_hitl_mw(tools),
+        *build_plugin_middlewares(
+            flags=flags,
+            search_space_id=search_space_id,
+            user_id=user_id,
+            visibility=visibility,
+            llm=llm,
+        ),
+        build_anthropic_cache_mw(),
+    ]
+    result = [m for m in stack if m is not None]
+    assembly_elapsed = time.perf_counter() - assembly_start
+    _perf_log.info(
+        "[stack_build] total=%.3fs shared_subagent_mw=%.3fs "
+        "build_subagents=%.3fs stack_assembly=%.3fs subagents=%d mw=%d "
+        "(kb_readonly deferred to first ask_knowledge_base)",
+        time.perf_counter() - stack_build_start,
+        shared_mw_elapsed,
+        subagents_elapsed,
+        assembly_elapsed,
+        len(subagents),
+        len(result),
+    )
+    return result
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/tool_call_repair/init.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/tool_call_repair/init.py
@ -0,0 +1,9 @@
+"""Tool-call-repair middleware: fix miscased/unknown tool names (impl + builder)."""
+
+from .builder import build_repair_mw
+from .middleware import ToolCallNameRepairMiddleware
+
+__all__ = [
+    "ToolCallNameRepairMiddleware",
+    "build_repair_mw",
+]
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/tool_call_repair/builder.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/tool_call_repair/builder.py
@ -6,10 +6,10 @@ from collections.abc import Sequence

 from langchain_core.tools import BaseTool

-from app.agents.new_chat.feature_flags import AgentFeatureFlags
-from app.agents.new_chat.middleware import ToolCallNameRepairMiddleware
+from app.agents.chat.multi_agent_chat.shared.feature_flags import AgentFeatureFlags
+from app.agents.chat.multi_agent_chat.shared.middleware.flags import enabled

-from ..shared.flags import enabled
+from .middleware import ToolCallNameRepairMiddleware

 # deepagents-built-in tool names the repair pass treats as known.
 _DEEPAGENT_BUILTIN_TOOL_NAMES: frozenset[str] = frozenset(
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/tool_call_repair/middleware.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/tool_call_repair/middleware.py
@ -34,8 +34,6 @@ from langchain.agents.middleware.types import (
 from langchain_core.messages import AIMessage
 from langgraph.runtime import Runtime

-from app.agents.new_chat.tools.invalid_tool import INVALID_TOOL_NAME
-
 logger = logging.getLogger(__name__)


@ -120,6 +118,12 @@ class ToolCallNameRepairMiddleware(
                return call

        # Stage 2 — invalid fallback
+        # Local import keeps the middleware module import-light and avoids any
+        # tools <-> middleware import-order coupling at module scope.
+        from app.agents.chat.multi_agent_chat.main_agent.tools.invalid_tool import (
+            INVALID_TOOL_NAME,
+        )
+
        if INVALID_TOOL_NAME in registered:
            original_args = call.get("args") or {}
            error_msg = (
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/plugins/init.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/plugins/init.py
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/plugins/loader.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/plugins/loader.py
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/plugins/year_substituter.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/plugins/year_substituter.py
@ -17,7 +17,7 @@ Wire-up in ``pyproject.toml`` (illustrative; the in-repo plugin doesn't
 need this -- it's already on the import path)::

    [project.entry-points."surfsense.plugins"]
-    year_substituter = "app.agents.new_chat.plugins.year_substituter:make_middleware"
+    year_substituter = "app.agents.chat.multi_agent_chat.main_agent.plugins.year_substituter:make_middleware"
 """

 from __future__ import annotations
@ -34,7 +34,7 @@ if TYPE_CHECKING:  # pragma: no cover - type-only
    from langchain_core.messages import ToolMessage
    from langgraph.types import Command

-    from app.agents.new_chat.plugin_loader import PluginContext
+    from .loader import PluginContext


 logger = logging.getLogger(__name__)
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/runtime/init.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/runtime/init.py
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/runtime/agent_cache.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/runtime/agent_cache.py
@ -10,18 +10,18 @@ from langchain_core.language_models import BaseChatModel
 from langchain_core.tools import BaseTool
 from langgraph.types import Checkpointer

-from app.agents.new_chat.agent_cache import (
+from app.agents.chat.multi_agent_chat.shared.feature_flags import AgentFeatureFlags
+from app.agents.chat.multi_agent_chat.shared.filesystem_selection import FilesystemMode
+from app.db import ChatVisibility
+
+from ..graph.compile_graph_sync import build_compiled_agent_graph_sync
+from .agent_cache_store import (
    flags_signature,
    get_cache,
    stable_hash,
    system_prompt_hash,
    tools_signature,
 )
-from app.agents.new_chat.feature_flags import AgentFeatureFlags
-from app.agents.new_chat.filesystem_selection import FilesystemMode
-from app.db import ChatVisibility
-
-from ..graph.compile_graph_sync import build_compiled_agent_graph_sync


 def mcp_signature(mcp_tools_by_agent: dict[str, list[BaseTool]]) -> str:
@ -91,10 +91,18 @@ async def build_agent_with_cache(
    # Every per-request value any middleware closes over at __init__ must be in
    # the key, otherwise a hit will leak state across threads. Bump the schema
    # version when the component list changes shape.
+    #
+    # Cross-thread reuse: when enabled, ``thread_id`` is dropped from the key so
+    # one compiled graph serves all of a user's (same space/config/visibility)
+    # chats. This is only safe because ActionLog, KB-persistence, and the
+    # deliverables tools now resolve the chat thread from the live
+    # RunnableConfig instead of a constructor closure; the schema tag is bumped
+    # so v2 (per-thread) entries are never confused with v3 (shared) ones.
+    cross_thread = flags.enable_cross_thread_agent_cache
    cache_key = stable_hash(
-        "multi-agent-v2",
+        "multi-agent-v3" if cross_thread else "multi-agent-v2",
        config_id,
-        thread_id,
+        None if cross_thread else thread_id,
        user_id,
        search_space_id,
        visibility,
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/runtime/agent_cache_store.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/runtime/agent_cache_store.py
@ -67,13 +67,13 @@ from __future__ import annotations
 import asyncio
 import hashlib
 import logging
-import os
 import time
 from collections import OrderedDict
 from collections.abc import Awaitable, Callable
 from dataclasses import dataclass
 from typing import Any

+from app.config import config
 from app.utils.perf import get_perf_logger

 logger = logging.getLogger(__name__)
@ -113,12 +113,11 @@ def tools_signature(
      MCP tools loaded for the user changes, gating rules flip, etc.).
    * The available connectors / document types for the search space
      change (new connector added, last connector removed, new document
-      type indexed). Because :func:`get_connector_gated_tools` derives
-      ``modified_disabled_tools`` from ``available_connectors``, the
-      tool surface is technically already covered — but we hash the
-      connector list separately so an empty-list "no tools changed"
-      situation still rotates the key when, say, the user re-adds a
-      connector that gates a tool we were already not exposing.
+      type indexed). Connector gating derives disabled tools from
+      ``available_connectors``, so the tool surface is technically already
+      covered — but we hash the connector list separately so an empty-list
+      "no tools changed" situation still rotates the key when, say, the user
+      re-adds a connector that gates a tool we were already not exposing.

    Stays stable across:

@ -329,8 +328,8 @@ def _short(key: str, n: int = 16) -> str:
 # Module-level singleton
 # ---------------------------------------------------------------------------

-_DEFAULT_MAXSIZE = int(os.getenv("SURFSENSE_AGENT_CACHE_MAXSIZE", "256"))
-_DEFAULT_TTL = float(os.getenv("SURFSENSE_AGENT_CACHE_TTL_SECONDS", "1800"))
+_DEFAULT_MAXSIZE = config.AGENT_CACHE_MAXSIZE
+_DEFAULT_TTL = config.AGENT_CACHE_TTL_SECONDS

 _cache: _AgentCache = _AgentCache(maxsize=_DEFAULT_MAXSIZE, ttl_seconds=_DEFAULT_TTL)

--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/runtime/connector_searchable_types.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/runtime/connector_searchable_types.py
@ -0,0 +1,100 @@
+"""Map configured connectors to the searchable document/connector types.
+
+This is agent-agnostic infrastructure shared by every agent factory (single-
+and multi-agent). It translates the connectors a search space has enabled into
+the set of searchable type strings that pre-search middleware and ``web_search``
+understand, and always layers in the document types that exist independently of
+any connector (uploads, notes, extension captures, YouTube).
+
+It lives in its own module — rather than inside a specific agent factory — so
+that retiring or moving any single agent never disturbs the others' access to
+this mapping.
+"""
+
+from __future__ import annotations
+
+from typing import Any
+
+# Maps SearchSourceConnectorType enum values to the searchable document/connector types
+# used by pre-search middleware and web_search.
+# Live search connectors (TAVILY_API, LINKUP_API, BAIDU_SEARCH_API) are routed to
+# the web_search tool; all others are considered local/indexed data.
+_CONNECTOR_TYPE_TO_SEARCHABLE: dict[str, str] = {
+    # Live search connectors (handled by web_search tool)
+    "TAVILY_API": "TAVILY_API",
+    "LINKUP_API": "LINKUP_API",
+    "BAIDU_SEARCH_API": "BAIDU_SEARCH_API",
+    # Local/indexed connectors (handled by KB pre-search middleware)
+    "SLACK_CONNECTOR": "SLACK_CONNECTOR",
+    "TEAMS_CONNECTOR": "TEAMS_CONNECTOR",
+    "NOTION_CONNECTOR": "NOTION_CONNECTOR",
+    "GITHUB_CONNECTOR": "GITHUB_CONNECTOR",
+    "LINEAR_CONNECTOR": "LINEAR_CONNECTOR",
+    "DISCORD_CONNECTOR": "DISCORD_CONNECTOR",
+    "JIRA_CONNECTOR": "JIRA_CONNECTOR",
+    "CONFLUENCE_CONNECTOR": "CONFLUENCE_CONNECTOR",
+    "CLICKUP_CONNECTOR": "CLICKUP_CONNECTOR",
+    "GOOGLE_CALENDAR_CONNECTOR": "GOOGLE_CALENDAR_CONNECTOR",
+    "GOOGLE_GMAIL_CONNECTOR": "GOOGLE_GMAIL_CONNECTOR",
+    "GOOGLE_DRIVE_CONNECTOR": "GOOGLE_DRIVE_FILE",  # Connector type differs from document type
+    "AIRTABLE_CONNECTOR": "AIRTABLE_CONNECTOR",
+    "LUMA_CONNECTOR": "LUMA_CONNECTOR",
+    "ELASTICSEARCH_CONNECTOR": "ELASTICSEARCH_CONNECTOR",
+    "WEBCRAWLER_CONNECTOR": "CRAWLED_URL",  # Maps to document type
+    "BOOKSTACK_CONNECTOR": "BOOKSTACK_CONNECTOR",
+    "CIRCLEBACK_CONNECTOR": "CIRCLEBACK",  # Connector type differs from document type
+    "OBSIDIAN_CONNECTOR": "OBSIDIAN_CONNECTOR",
+    "DROPBOX_CONNECTOR": "DROPBOX_FILE",  # Connector type differs from document type
+    "ONEDRIVE_CONNECTOR": "ONEDRIVE_FILE",  # Connector type differs from document type
+    # Composio connectors (unified to native document types).
+    # Reverse of NATIVE_TO_LEGACY_DOCTYPE in app.db.
+    "COMPOSIO_GOOGLE_DRIVE_CONNECTOR": "GOOGLE_DRIVE_FILE",
+    "COMPOSIO_GMAIL_CONNECTOR": "GOOGLE_GMAIL_CONNECTOR",
+    "COMPOSIO_GOOGLE_CALENDAR_CONNECTOR": "GOOGLE_CALENDAR_CONNECTOR",
+}
+
+# Document types that don't come from SearchSourceConnector but should always be searchable
+_ALWAYS_AVAILABLE_DOC_TYPES: list[str] = [
+    "EXTENSION",  # Browser extension data
+    "FILE",  # Uploaded files
+    "NOTE",  # User notes
+    "YOUTUBE_VIDEO",  # YouTube videos
+]
+
+
+def map_connectors_to_searchable_types(
+    connector_types: list[Any],
+) -> list[str]:
+    """
+    Map SearchSourceConnectorType enums to searchable document/connector types.
+
+    This function:
+    1. Converts connector type enums to their searchable counterparts
+    2. Includes always-available document types (EXTENSION, FILE, NOTE, YOUTUBE_VIDEO)
+    3. Deduplicates while preserving order
+
+    Args:
+        connector_types: List of SearchSourceConnectorType enum values
+
+    Returns:
+        List of searchable connector/document type strings
+    """
+    result_set: set[str] = set()
+    result_list: list[str] = []
+
+    # Add always-available document types first
+    for doc_type in _ALWAYS_AVAILABLE_DOC_TYPES:
+        if doc_type not in result_set:
+            result_set.add(doc_type)
+            result_list.append(doc_type)
+
+    # Map each connector type to its searchable equivalent
+    for ct in connector_types:
+        # Handle both enum and string types
+        ct_str = ct.value if hasattr(ct, "value") else str(ct)
+        searchable = _CONNECTOR_TYPE_TO_SEARCHABLE.get(ct_str)
+        if searchable and searchable not in result_set:
+            result_set.add(searchable)
+            result_list.append(searchable)
+
+    return result_list
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/runtime/factory.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/runtime/factory.py
@ -12,21 +12,28 @@ from langchain_core.tools import BaseTool
 from langgraph.types import Checkpointer
 from sqlalchemy.ext.asyncio import AsyncSession

-from app.agents.multi_agent_chat.subagents import (
+from app.agents.chat.multi_agent_chat.shared.feature_flags import (
+    AgentFeatureFlags,
+    get_flags,
+)
+from app.agents.chat.multi_agent_chat.shared.filesystem_selection import (
+    FilesystemMode,
+    FilesystemSelection,
+)
+from app.agents.chat.multi_agent_chat.shared.middleware.filesystem.backends.resolver import (
+    build_backend_resolver,
+)
+from app.agents.chat.multi_agent_chat.subagents import (
    get_subagents_to_exclude,
    main_prompt_registry_subagent_lines,
 )
-from app.agents.multi_agent_chat.subagents.mcp_tools.index import (
+from app.agents.chat.multi_agent_chat.subagents.mcp_tools.index import (
    load_mcp_tools_by_connector,
 )
-from app.agents.new_chat.chat_deepagent import _map_connectors_to_searchable_types
-from app.agents.new_chat.feature_flags import AgentFeatureFlags, get_flags
-from app.agents.new_chat.filesystem_backends import build_backend_resolver
-from app.agents.new_chat.filesystem_selection import FilesystemMode, FilesystemSelection
-from app.agents.new_chat.llm_config import AgentConfig
-from app.agents.new_chat.prompt_caching import apply_litellm_prompt_caching
-from app.agents.new_chat.tools.invalid_tool import INVALID_TOOL_NAME, invalid_tool
-from app.agents.new_chat.tools.registry import build_tools_async
+from app.agents.chat.runtime.llm_config import AgentConfig
+from app.agents.chat.runtime.prompt_caching import (
+    apply_litellm_prompt_caching,
+)
 from app.db import ChatVisibility
 from app.services.connector_service import ConnectorService
 from app.services.user_tool_allowlist import (
@ -40,7 +47,10 @@ from ..tools import (
    MAIN_AGENT_SURFSENSE_TOOL_NAMES,
    MAIN_AGENT_SURFSENSE_TOOL_NAMES_ORDERED,
 )
+from ..tools.invalid_tool import INVALID_TOOL_NAME, invalid_tool
+from ..tools.registry import build_main_agent_tools
 from .agent_cache import build_agent_with_cache
+from .connector_searchable_types import map_connectors_to_searchable_types

 _perf_log = get_perf_logger()

@ -90,7 +100,7 @@ async def create_multi_agent_chat_deep_agent(
        connector_types = await connector_service.get_available_connectors(
            search_space_id
        )
-        available_connectors = _map_connectors_to_searchable_types(connector_types)
+        available_connectors = map_connectors_to_searchable_types(connector_types)

        available_document_types = await connector_service.get_available_document_types(
            search_space_id
@ -199,9 +209,6 @@ async def create_multi_agent_chat_deep_agent(

    modified_disabled_tools = list(disabled_tools) if disabled_tools else []

-    if "search_knowledge_base" not in modified_disabled_tools:
-        modified_disabled_tools.append("search_knowledge_base")
-
    if enabled_tools is not None:
        main_agent_enabled_tools = [
            n for n in enabled_tools if n in MAIN_AGENT_SURFSENSE_TOOL_NAMES
@ -210,12 +217,14 @@ async def create_multi_agent_chat_deep_agent(
        main_agent_enabled_tools = list(MAIN_AGENT_SURFSENSE_TOOL_NAMES_ORDERED)

    _t0 = time.perf_counter()
-    tools = await build_tools_async(
+    # Main agent builds only its own small SurfSense toolset via the SRP
+    # main-agent registry; connectors/MCP/deliverables are delegated to
+    # subagents, so no MCP loading or connector construction happens here.
+    tools = build_main_agent_tools(
        dependencies=dependencies,
        enabled_tools=main_agent_enabled_tools,
        disabled_tools=modified_disabled_tools,
        additional_tools=list(additional_tools) if additional_tools else None,
-        include_mcp_tools=False,
    )

    _flags: AgentFeatureFlags = get_flags()
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/skills/init.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/skills/init.py
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/skills/backends.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/skills/backends.py
@ -16,7 +16,7 @@ prompt at agent build time, not edited at runtime.
 Two backends are provided:

 * :class:`BuiltinSkillsBackend` — disk-backed read of bundled skills from
-  ``app/agents/new_chat/skills/builtin/``.
+  ``app/agents/shared/skills/builtin/``.
 * :class:`SearchSpaceSkillsBackend` — a thin read-only wrapper over
  :class:`KBPostgresBackend` that filters notes under the privileged folder
  ``/documents/_skills/``.
@ -47,7 +47,9 @@ from deepagents.backends.state import StateBackend
 if TYPE_CHECKING:
    from langchain.tools import ToolRuntime

-    from app.agents.new_chat.middleware.kb_postgres_backend import KBPostgresBackend
+    from app.agents.chat.multi_agent_chat.shared.middleware.filesystem.backends.kb_postgres import (
+        KBPostgresBackend,
+    )

 logger = logging.getLogger(__name__)

@ -59,9 +61,10 @@ _MAX_SKILL_FILE_SIZE = 10 * 1024 * 1024
 def _default_builtin_root() -> Path:
    """Return the absolute path to the bundled builtin skills directory.

-    Located at ``app/agents/new_chat/skills/builtin/`` relative to this module.
+    Located at ``builtin/`` next to this module (this module lives at
+    ``app/agents/multi_agent_chat/main_agent/skills/backends.py``).
    """
-    return (Path(__file__).resolve().parent.parent / "skills" / "builtin").resolve()
+    return (Path(__file__).resolve().parent / "builtin").resolve()


 class BuiltinSkillsBackend(BackendProtocol):
@ -121,6 +124,8 @@ class BuiltinSkillsBackend(BackendProtocol):
            else ("/" + str(target.relative_to(self.root)).replace("\\", "/"))
        )
        for child in sorted(target.iterdir()):
+            if child.name == "__pycache__" or child.name.startswith("."):
+                continue
            child_virtual = (
                target_virtual.rstrip("/") + "/" + child.name
                if target_virtual != "/"
@ -305,7 +310,7 @@ def build_skills_backend_factory(
        # Imported lazily to avoid a hard dependency at module import time:
        # ``KBPostgresBackend`` pulls in DB models, which are unnecessary for
        # the unit-tested builtin path.
-        from app.agents.new_chat.middleware.kb_postgres_backend import (
+        from app.agents.chat.multi_agent_chat.shared.middleware.filesystem.backends.kb_postgres import (
            KBPostgresBackend,
        )

--- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/providers/init.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/providers/init.py
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/skills/builtin/email-drafting/SKILL.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/skills/builtin/email-drafting/SKILL.md
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/skills/builtin/kb-research/SKILL.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/skills/builtin/kb-research/SKILL.md
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/skills/builtin/meeting-prep/SKILL.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/skills/builtin/meeting-prep/SKILL.md
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/skills/builtin/report-writing/SKILL.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/skills/builtin/report-writing/SKILL.md
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/skills/builtin/slack-summary/SKILL.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/skills/builtin/slack-summary/SKILL.md
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/init.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/init.py
--- a/Show more
+++ b/Show more
 @ -1 +1 @@
 .0.26
 .0.27