Merge pull request #1476 from MODSetter/dev

feat(0.0.27): bug fixes and optimizations
2026-06-24 21:38:09 +02:00 · 2026-06-09 23:10:44 -07:00 · 2026-06-09 23:10:44 -07:00 · 4c29938528
commit 4c29938528
parent 61adc80615 2624392c4a
1169 changed files with 30332 additions and 38144 deletions
--- a/.github/workflows/docker-build.yml
+++ b/.github/workflows/docker-build.yml
@ -5,6 +5,9 @@ on:
    branches:
      - main
      - dev
    tags:
      - 'v*'
      - 'beta-v*'
    paths:
      - 'surfsense_backend/**'
      - 'surfsense_web/**'
@ -24,11 +27,13 @@ permissions:
  packages: write
 jobs:
-  tag_release:
+  compute_version:
    runs-on: ubuntu-latest
-    if: github.ref == format('refs/heads/{0}', github.event.repository.default_branch) || github.event_name == 'workflow_dispatch'
+    if: github.ref == format('refs/heads/{0}', github.event.repository.default_branch) || github.event_name == 'workflow_dispatch' || startsWith(github.ref, 'refs/tags/v') || startsWith(github.ref, 'refs/tags/beta-v')
    outputs:
      new_tag: ${{ steps.tag_version.outputs.next_version }}
      commit_sha: ${{ steps.tag_version.outputs.commit_sha }}
      is_release_tag: ${{ steps.tag_version.outputs.is_release_tag }}
    steps:
      - name: Checkout code
        uses: actions/checkout@v6
@ -37,57 +42,65 @@ jobs:
          ref: ${{ github.event.inputs.branch }}
          token: ${{ secrets.GITHUB_TOKEN }}
      # Compute-only: tag is pushed by finalize_release after everything succeeds.
      - name: Read app version and calculate next Docker build version
        id: tag_version
        run: |
-          APP_VERSION=$(tr -d '[:space:]' < VERSION)
+          if [[ "$GITHUB_REF" == refs/tags/beta-v* ]]; then
-          echo "App version from VERSION file: $APP_VERSION"
+            VERSION="${GITHUB_REF#refs/tags/beta-v}"
            NEXT_VERSION="beta-${VERSION}"
            IS_RELEASE_TAG="true"
-          if [ -z "$APP_VERSION" ]; then
+            if ! echo "$VERSION" | grep -qE '^[0-9]+\.[0-9]+\.[0-9]+(-[a-zA-Z0-9.]+)?$'; then
-            echo "Error: Could not read version from VERSION file"
+              echo "::error::Version '$VERSION' is not valid semver (expected X.Y.Z). Fix your tag name."
-            exit 1
+              exit 1
-          fi
+            fi
-          git fetch --tags
+            echo "Docker beta release version from git tag: $NEXT_VERSION"
          elif [[ "$GITHUB_REF" == refs/tags/v* ]]; then
            NEXT_VERSION="${GITHUB_REF#refs/tags/v}"
            IS_RELEASE_TAG="true"
-          LATEST_BUILD_TAG=$(git tag --list "${APP_VERSION}.*" --sort='-v:refname' | head -n 1)
+            if ! echo "$NEXT_VERSION" | grep -qE '^[0-9]+\.[0-9]+\.[0-9]+(-[a-zA-Z0-9.]+)?$'; then
              echo "::error::Version '$NEXT_VERSION' is not valid semver (expected X.Y.Z). Fix your tag name."
              exit 1
            fi
-          if [ -z "$LATEST_BUILD_TAG" ]; then
+            echo "Docker release version from git tag: $NEXT_VERSION"
            echo "No previous Docker build tag found for version ${APP_VERSION}. Starting with ${APP_VERSION}.1"
            NEXT_VERSION="${APP_VERSION}.1"
          else
-            echo "Latest Docker build tag found: $LATEST_BUILD_TAG"
+            APP_VERSION=$(tr -d '[:space:]' < VERSION)
-            BUILD_NUMBER=$(echo "$LATEST_BUILD_TAG" | rev | cut -d. -f1 | rev)
+            echo "App version from VERSION file: $APP_VERSION"
-            NEXT_BUILD=$((BUILD_NUMBER + 1))
+
-            NEXT_VERSION="${APP_VERSION}.${NEXT_BUILD}"
+            if [ -z "$APP_VERSION" ]; then
              echo "Error: Could not read version from VERSION file"
              exit 1
            fi
            git fetch --tags
            LATEST_BUILD_TAG=$(git tag --list "${APP_VERSION}.*" --sort='-v:refname' | head -n 1)
            if [ -z "$LATEST_BUILD_TAG" ]; then
              echo "No previous Docker build tag found for version ${APP_VERSION}. Starting with ${APP_VERSION}.1"
              NEXT_VERSION="${APP_VERSION}.1"
            else
              echo "Latest Docker build tag found: $LATEST_BUILD_TAG"
              BUILD_NUMBER=$(echo "$LATEST_BUILD_TAG" | rev | cut -d. -f1 | rev)
              NEXT_BUILD=$((BUILD_NUMBER + 1))
              NEXT_VERSION="${APP_VERSION}.${NEXT_BUILD}"
            fi
            IS_RELEASE_TAG="false"
            echo "Calculated next Docker version: $NEXT_VERSION"
          fi
          echo "Calculated next Docker version: $NEXT_VERSION"
          echo "next_version=$NEXT_VERSION" >> $GITHUB_OUTPUT
-
+          echo "commit_sha=$(git rev-parse HEAD)" >> $GITHUB_OUTPUT
-      - name: Create and Push Tag
+          echo "is_release_tag=$IS_RELEASE_TAG" >> $GITHUB_OUTPUT
        run: |
          git config --global user.name 'github-actions[bot]'
          git config --global user.email 'github-actions[bot]@users.noreply.github.com'
          NEXT_TAG="${{ steps.tag_version.outputs.next_version }}"
          COMMIT_SHA=$(git rev-parse HEAD)
          echo "Tagging commit $COMMIT_SHA with $NEXT_TAG"
          git tag -a "$NEXT_TAG" -m "Docker build $NEXT_TAG"
          echo "Pushing tag $NEXT_TAG to origin"
          git push origin "$NEXT_TAG"
      - name: Verify Tag Push
        run: |
          echo "Checking if tag ${{ steps.tag_version.outputs.next_version }} exists remotely..."
          sleep 5
          git ls-remote --tags origin | grep "refs/tags/${{ steps.tag_version.outputs.next_version }}" || (echo "Tag push verification failed!" && exit 1)
          echo "Tag successfully pushed."
  build:
-    needs: tag_release
+    needs: compute_version
-    if: always() && (needs.tag_release.result == 'success' || needs.tag_release.result == 'skipped')
+    if: always() && (needs.compute_version.result == 'success' || needs.compute_version.result == 'skipped')
    runs-on: ${{ matrix.os }}
    permissions:
      packages: write
@ -97,6 +110,12 @@ jobs:
      matrix:
        platform: [linux/amd64, linux/arm64]
        image: [backend, web]
        variant: [cpu, cuda, cuda126]
        exclude:
          - image: web
            variant: cuda
          - image: web
            variant: cuda126
        include:
          - platform: linux/amd64
            suffix: amd64
@ -114,6 +133,18 @@ jobs:
            context: ./surfsense_web
            file: ./surfsense_web/Dockerfile
            target: runner
          - variant: cpu
            tag_suffix: ""
            use_cuda: "false"
            cuda_extra: cpu
          - variant: cuda
            tag_suffix: "-cuda"
            use_cuda: "true"
            cuda_extra: cu128
          - variant: cuda126
            tag_suffix: "-cuda126"
            use_cuda: "true"
            cuda_extra: cu126
    env:
      REGISTRY_IMAGE: ghcr.io/${{ github.repository_owner }}/${{ matrix.name }}
@ -149,7 +180,7 @@ jobs:
          sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true
          docker system prune -af
-      - name: Build and push by digest ${{ matrix.name }} (${{ matrix.suffix }})
+      - name: Build and push by digest ${{ matrix.name }} (${{ matrix.variant }}, ${{ matrix.suffix }})
        id: build
        uses: docker/build-push-action@v7
        with:
@ -160,10 +191,14 @@ jobs:
          tags: ${{ steps.image.outputs.name }}
          outputs: type=image,push-by-digest=true,name-canonical=true,push=true
          platforms: ${{ matrix.platform }}
-          cache-from: type=gha,scope=${{ matrix.image }}-${{ matrix.suffix }}
+          cache-from: type=registry,ref=${{ steps.image.outputs.name }}:buildcache-${{ matrix.variant }}-${{ matrix.suffix }}
-          cache-to: type=gha,mode=max,scope=${{ matrix.image }}-${{ matrix.suffix }}
+          cache-to: type=registry,ref=${{ steps.image.outputs.name }}:buildcache-${{ matrix.variant }}-${{ matrix.suffix }},mode=max,image-manifest=true,oci-mediatypes=true
          secrets: |
            HF_TOKEN=${{ secrets.HF_TOKEN }}
          provenance: false
          build-args: |
            ${{ matrix.image == 'backend' && format('USE_CUDA={0}', matrix.use_cuda) || '' }}
            ${{ matrix.image == 'backend' && format('CUDA_EXTRA={0}', matrix.cuda_extra) || '' }}
            ${{ matrix.image == 'web' && 'NEXT_PUBLIC_FASTAPI_BACKEND_URL=__NEXT_PUBLIC_FASTAPI_BACKEND_URL__' || '' }}
            ${{ matrix.image == 'web' && 'NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE=__NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE__' || '' }}
            ${{ matrix.image == 'web' && 'NEXT_PUBLIC_ETL_SERVICE=__NEXT_PUBLIC_ETL_SERVICE__' || '' }}
@ -179,15 +214,47 @@ jobs:
      - name: Upload digest
        uses: actions/upload-artifact@v7
        with:
-          name: digests-${{ matrix.image }}-${{ matrix.suffix }}
+          name: digests-${{ matrix.image }}-${{ matrix.variant }}-${{ matrix.suffix }}
          path: /tmp/digests/*
          if-no-files-found: error
          retention-days: 1
  # Release gate: require both arches for every variant, else block publishing.
  # Release-only; skipped on dev so the tolerant create_manifest path is kept.
  verify_digests:
    runs-on: ubuntu-latest
    needs: [compute_version, build]
    if: ${{ always() && needs.compute_version.result == 'success' && needs.compute_version.outputs.new_tag != '' }}
    steps:
      - name: Download all digests
        uses: actions/download-artifact@v8
        with:
          pattern: digests-*
          path: /tmp/digests
          merge-multiple: false
      - name: Require both arches for every required variant
        run: |
          fail=0
          check() {
            c=$(find /tmp/digests -type f -path "*/digests-$1-*/*" 2>/dev/null | wc -l | tr -d ' ')
            if [ "$c" -lt 2 ]; then
              echo "::error::$1 has $c/2 arch digests — blocking release"
              fail=1
            else
              echo "OK: $1 ($c/2)"
            fi
          }
          check backend-cpu
          check backend-cuda
          check backend-cuda126
          check web-cpu
          [ "$fail" -eq 0 ] || exit 1
  create_manifest:
    runs-on: ubuntu-latest
-    needs: [tag_release, build]
+    needs: [compute_version, build, verify_digests]
-    if: always() && needs.build.result == 'success'
+    if: ${{ !cancelled() && needs.verify_digests.result != 'failure' }}
    permissions:
      packages: write
      contents: read
@ -197,8 +264,20 @@ jobs:
        include:
          - name: surfsense-backend
            image: backend
            variant: cpu
            tag_suffix: ""
          - name: surfsense-backend
            image: backend
            variant: cuda
            tag_suffix: "-cuda"
          - name: surfsense-backend
            image: backend
            variant: cuda126
            tag_suffix: "-cuda126"
          - name: surfsense-web
            image: web
            variant: cpu
            tag_suffix: ""
    env:
      REGISTRY_IMAGE: ghcr.io/${{ github.repository_owner }}/${{ matrix.name }}
@ -207,22 +286,33 @@ jobs:
        id: image
        run: echo "name=${REGISTRY_IMAGE,,}" >> $GITHUB_OUTPUT
-      - name: Download amd64 digest
+      - name: Download digests
        id: download
        uses: actions/download-artifact@v8
        with:
-          name: digests-${{ matrix.image }}-amd64
+          pattern: digests-${{ matrix.image }}-${{ matrix.variant }}-*
          path: /tmp/digests
          merge-multiple: true
        continue-on-error: true
-      - name: Download arm64 digest
+      - name: Check digests
-        uses: actions/download-artifact@v8
+        id: check
-        with:
+        run: |
-          name: digests-${{ matrix.image }}-arm64
+          count=$(find /tmp/digests -type f 2>/dev/null | wc -l | tr -d ' ')
-          path: /tmp/digests
+          echo "digest_count=$count" >> $GITHUB_OUTPUT
          if [ "$count" -lt 2 ]; then
            echo "::warning::${{ matrix.variant }}: $count/2 digests, skipping merge"
            echo "skip=true" >> $GITHUB_OUTPUT
          else
            echo "skip=false" >> $GITHUB_OUTPUT
          fi
      - name: Set up Docker Buildx
        if: steps.check.outputs.skip != 'true'
        uses: docker/setup-buildx-action@v4
      - name: Login to GitHub Container Registry
        if: steps.check.outputs.skip != 'true'
        uses: docker/login-action@v4
        with:
          registry: ghcr.io
@ -230,9 +320,10 @@ jobs:
          password: ${{ secrets.GITHUB_TOKEN }}
      - name: Compute app version
        if: steps.check.outputs.skip != 'true'
        id: appver
        run: |
-          VERSION_TAG="${{ needs.tag_release.outputs.new_tag }}"
+          VERSION_TAG="${{ needs.compute_version.outputs.new_tag }}"
          if [ -n "$VERSION_TAG" ]; then
            APP_VERSION=$(echo "$VERSION_TAG" | rev | cut -d. -f2- | rev)
          else
@ -241,29 +332,69 @@ jobs:
          echo "app_version=$APP_VERSION" >> $GITHUB_OUTPUT
      - name: Docker meta
        if: steps.check.outputs.skip != 'true'
        id: meta
        uses: docker/metadata-action@v6
        with:
          images: ${{ steps.image.outputs.name }}
          tags: |
-            type=raw,value=${{ needs.tag_release.outputs.new_tag }},enable=${{ needs.tag_release.outputs.new_tag != '' }}
+            type=raw,value=${{ needs.compute_version.outputs.new_tag }},enable=${{ needs.compute_version.outputs.new_tag != '' }}
-            type=raw,value=${{ steps.appver.outputs.app_version }},enable=${{ needs.tag_release.outputs.new_tag != '' && (github.ref == format('refs/heads/{0}', github.event.repository.default_branch) || github.event.inputs.branch == github.event.repository.default_branch) }}
+            type=raw,value=${{ steps.appver.outputs.app_version }},enable=${{ needs.compute_version.outputs.new_tag != '' && needs.compute_version.outputs.is_release_tag != 'true' && (github.ref == format('refs/heads/{0}', github.event.repository.default_branch) || github.event.inputs.branch == github.event.repository.default_branch) }}
            type=ref,event=branch
            type=sha,prefix=git-
          flavor: |
-            latest=${{ github.ref == format('refs/heads/{0}', github.event.repository.default_branch) || github.event.inputs.branch == github.event.repository.default_branch }}
+            latest=${{ github.ref == format('refs/heads/{0}', github.event.repository.default_branch) || github.event.inputs.branch == github.event.repository.default_branch || startsWith(github.ref, 'refs/tags/v') }}
            ${{ matrix.tag_suffix != '' && format('suffix={0},onlatest=true', matrix.tag_suffix) || '' }}
      - name: Create manifest list and push
        if: steps.check.outputs.skip != 'true'
        working-directory: /tmp/digests
        run: |
          docker buildx imagetools create \
            $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \
            $(printf '${{ steps.image.outputs.name }}@sha256:%s ' *)
      - name: Inspect image
        if: steps.check.outputs.skip != 'true'
        run: |
          docker buildx imagetools inspect ${{ steps.image.outputs.name }}:${{ steps.meta.outputs.version }}
      - name: Summary
        if: steps.check.outputs.skip != 'true'
        run: | 
          echo "Multi-arch manifest created for ${{ matrix.name }}!"
          echo "Tags: $(jq -cr '.tags | join(", ")' <<< "$DOCKER_METADATA_OUTPUT_JSON")"
  # Push the git tag only after build, gate, and manifest publish all succeed.
  finalize_release:
    runs-on: ubuntu-latest
    needs: [compute_version, create_manifest]
    if: ${{ success() && needs.compute_version.outputs.new_tag != '' && needs.compute_version.outputs.is_release_tag != 'true' }}
    permissions:
      contents: write
    steps:
      - name: Checkout code
        uses: actions/checkout@v6
        with:
          fetch-depth: 0
          ref: ${{ github.event.inputs.branch }}
          token: ${{ secrets.GITHUB_TOKEN }}
      - name: Create and push git tag
        run: |
          git config --global user.name 'github-actions[bot]'
          git config --global user.email 'github-actions[bot]@users.noreply.github.com'
          NEXT_TAG="${{ needs.compute_version.outputs.new_tag }}"
          COMMIT_SHA="${{ needs.compute_version.outputs.commit_sha }}"
          echo "Tagging commit $COMMIT_SHA with $NEXT_TAG"
          git tag -a "$NEXT_TAG" "$COMMIT_SHA" -m "Docker build $NEXT_TAG"
          echo "Pushing tag $NEXT_TAG to origin"
          git push origin "$NEXT_TAG"
      - name: Verify tag push
        run: |
          echo "Checking if tag ${{ needs.compute_version.outputs.new_tag }} exists remotely..."
          sleep 5
          git ls-remote --tags origin | grep "refs/tags/${{ needs.compute_version.outputs.new_tag }}" || (echo "Tag push verification failed!" && exit 1)
          echo "Tag successfully pushed."
--- a/2
+++ b/2
@ -1 +1 @@
-0.0.26
+0.0.27
--- a/docker/.env.example
+++ b/docker/.env.example
@ -7,6 +7,16 @@
 # SurfSense version (use "latest" or a specific version like "0.0.14")
 SURFSENSE_VERSION=latest
 # Image variant: empty = CPU (default), "cuda" = CUDA 12.8, "cuda126" = CUDA 12.6.
 # GPU acceleration also requires the NVIDIA Container Toolkit on the host and
 # the GPU overlay in COMPOSE_FILE. Linux/macOS use ":"; Windows uses ";".
 # Example Linux/macOS: COMPOSE_FILE=docker-compose.yml:docker-compose.gpu.yml
 # Example Windows:     COMPOSE_FILE=docker-compose.yml;docker-compose.gpu.yml
 # Use "cuda126" for older NVIDIA driver stacks; use "cuda" for newer drivers.
 SURFSENSE_VARIANT=
 # COMPOSE_FILE=docker-compose.yml:docker-compose.gpu.yml
 # SURFSENSE_GPU_COUNT=1
 # Deployment environment: dev or production
 SURFSENSE_ENV=production
@ -55,6 +65,9 @@ EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2
 # -- Redis exposed port (dev only; Redis is internal-only in prod) --
 # REDIS_PORT=6379
 # -- WhatsApp bridge exposed port (dev/hybrid only; prod keeps it Docker-internal) --
 # WHATSAPP_BRIDGE_PORT=9929
 # -- Frontend Build Args --
 # In dev, the frontend is built from source and these are passed as build args.
 # In prod, they are automatically derived from AUTH_TYPE, ETL_SERVICE, and the port settings above.
@ -67,7 +80,7 @@ EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2
 # ------------------------------------------------------------------------------
 # ONLY set these if you are serving SurfSense on a real domain via a reverse
 # proxy (e.g. Caddy, Nginx, Cloudflare Tunnel).
-# For standard localhost deployments, leave all of these commented out —
+# For standard localhost deployments, leave all of these commented out.
 # they are automatically derived from the port settings above.
 #
 # NEXT_FRONTEND_URL=https://app.yourdomain.com
@ -89,7 +102,11 @@ EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2
 # Only change this if you manage publications manually.
 # ZERO_APP_PUBLICATIONS=zero_publication
-# Sync worker tuning — zero-cache defaults ZERO_NUM_SYNC_WORKERS to the number
+# Keep Zero's documented halt safety net enabled. If replication halts, Zero
 # can wipe and re-sync its local SQLite replica without touching Postgres.
 # ZERO_AUTO_RESET=true
 # Sync worker tuning. zero-cache defaults ZERO_NUM_SYNC_WORKERS to the number
 # of CPU cores, which can exceed the connection pool limits on high-core machines.
 # Each sync worker needs at least 1 connection from both the UPSTREAM and CVR
 # pools, so these constraints must hold:
@ -134,7 +151,7 @@ EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2
 # SSL mode for database connections: disable, require, verify-ca, verify-full
 # DB_SSLMODE=disable
-# Full DATABASE_URL override — when set, takes precedence over the individual
+# Full DATABASE_URL override. When set, this takes precedence over the individual
 # DB_USER / DB_PASSWORD / DB_NAME / DB_HOST / DB_PORT settings above.
 # Use this for managed databases (AWS RDS, GCP Cloud SQL, Supabase, etc.)
 # DATABASE_URL=postgresql+asyncpg://user:password@your-rds-host:5432/surfsense?sslmode=require
@ -149,7 +166,7 @@ EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2
 # REDIS_URL=redis://redis:6379/0
 # ------------------------------------------------------------------------------
-# Stripe (pay-as-you-go page packs — disabled by default)
+# Stripe (pay-as-you-go page packs, disabled by default)
 # ------------------------------------------------------------------------------
 # Set TRUE to allow users to buy additional page packs via Stripe Checkout
@ -168,7 +185,6 @@ STRIPE_PAGE_BUYING_ENABLED=FALSE
 # STRIPE_TOKEN_BUYING_ENABLED=FALSE
 # STRIPE_PREMIUM_TOKEN_PRICE_ID=price_...
 # STRIPE_CREDIT_MICROS_PER_UNIT=1000000
 # DEPRECATED — STRIPE_TOKENS_PER_UNIT=1000000
 # ------------------------------------------------------------------------------
 # TTS & STT (Text-to-Speech / Speech-to-Text)
@ -263,7 +279,44 @@ STT_SERVICE=local/base
 # COMPOSIO_REDIRECT_URI=http://localhost:8000/api/v1/auth/composio/connector/callback
 # ------------------------------------------------------------------------------
-# SearXNG (bundled web search — works out of the box, no config needed)
+# Messaging Channels (optional)
 # ------------------------------------------------------------------------------
 # Configure only the external chat channels you want to use.
 # -- Telegram --
 # TELEGRAM_SHARED_BOT_TOKEN=
 # TELEGRAM_SHARED_BOT_USERNAME=
 # TELEGRAM_WEBHOOK_SECRET=
 # GATEWAY_BASE_URL=http://localhost:8929
 # GATEWAY_TELEGRAM_INTAKE_MODE=webhook
 # -- WhatsApp --
 # GATEWAY_WHATSAPP_INTAKE_MODE=disabled
 # WHATSAPP_SHARED_BUSINESS_TOKEN=
 # WHATSAPP_SHARED_PHONE_NUMBER_ID=
 # WHATSAPP_SHARED_DISPLAY_PHONE_NUMBER=
 # WHATSAPP_SHARED_WABA_ID=
 # WHATSAPP_GRAPH_API_VERSION=v25.0
 # WHATSAPP_WEBHOOK_VERIFY_TOKEN=
 # WHATSAPP_WEBHOOK_APP_SECRET=
 # WHATSAPP_BRIDGE_URL=http://whatsapp-bridge:9929
 # -- Slack --
 # Uses SLACK_CLIENT_ID and SLACK_CLIENT_SECRET from the Slack connector section.
 #
 # GATEWAY_SLACK_ENABLED=FALSE
 # GATEWAY_SLACK_SIGNING_SECRET=
 # GATEWAY_SLACK_REDIRECT_URI=http://localhost:8929/api/v1/gateway/slack/callback
 # -- Discord --
 # Uses DISCORD_CLIENT_ID, DISCORD_CLIENT_SECRET, and DISCORD_BOT_TOKEN from the
 # Discord connector section.
 #
 # GATEWAY_DISCORD_ENABLED=FALSE
 # GATEWAY_DISCORD_REDIRECT_URI=http://localhost:8929/api/v1/gateway/discord/callback
 # ------------------------------------------------------------------------------
 # SearXNG (bundled web search, works out of the box with no config needed)
 # ------------------------------------------------------------------------------
 # SearXNG provides web search to all search spaces automatically.
 # To access the SearXNG UI directly: http://localhost:8888
@ -273,7 +326,7 @@ STT_SERVICE=local/base
 # SEARXNG_SECRET=surfsense-searxng-secret
 # ------------------------------------------------------------------------------
-# Daytona Sandbox (optional — cloud code execution for the deep agent)
+# Daytona Sandbox (optional cloud code execution for the deep agent)
 # ------------------------------------------------------------------------------
 # Set DAYTONA_SANDBOX_ENABLED=TRUE and provide credentials to give the agent
 # an isolated code execution environment via the Daytona cloud API.
@ -286,9 +339,6 @@ STT_SERVICE=local/base
 # External API Keys (optional)
 # ------------------------------------------------------------------------------
 # Firecrawl (web scraping)
 # FIRECRAWL_API_KEY=
 # Unstructured (if ETL_SERVICE=UNSTRUCTURED)
 # UNSTRUCTURED_API_KEY=
@ -364,7 +414,6 @@ SURFSENSE_ENABLE_DOOM_LOOP=true
 # Premium turns are debited at the actual per-call provider cost reported
 # by LiteLLM. Only applies to models with billing_tier=premium.
 # PREMIUM_CREDIT_MICROS_LIMIT=5000000
 # DEPRECATED — PREMIUM_TOKEN_LIMIT=5000000
 # Safety ceiling on per-call premium reservation, in micro-USD ($1.00 default).
 # QUOTA_MAX_RESERVE_MICROS=1000000
@ -376,10 +425,10 @@ SURFSENSE_ENABLE_DOOM_LOOP=true
 # QUOTA_DEFAULT_PODCAST_RESERVE_MICROS=200000
 # Per-video-presentation reservation for the video Celery task ($1.00 default).
-# Override path bypasses QUOTA_MAX_RESERVE_MICROS clamp — raise with care.
+# Override path bypasses QUOTA_MAX_RESERVE_MICROS clamp. Raise with care.
 # QUOTA_DEFAULT_VIDEO_PRESENTATION_RESERVE_MICROS=1000000
-# No-login (anonymous) mode — public users can chat without an account
+# No-login (anonymous) mode. Public users can chat without an account
 # Set TRUE to enable /free pages and anonymous chat API
 NOLOGIN_MODE_ENABLED=FALSE
 # ANON_TOKEN_LIMIT=1000000
--- a/docker/docker-compose.deps-only.yml
+++ b/docker/docker-compose.deps-only.yml
@ -114,6 +114,7 @@ services:
      - ZERO_REPLICA_FILE=/data/zero.db
      - ZERO_ADMIN_PASSWORD=${ZERO_ADMIN_PASSWORD:-surfsense-zero-admin}
      - ZERO_APP_PUBLICATIONS=${ZERO_APP_PUBLICATIONS:-zero_publication}
      - ZERO_AUTO_RESET=${ZERO_AUTO_RESET:-true}
      - ZERO_NUM_SYNC_WORKERS=${ZERO_NUM_SYNC_WORKERS:-4}
      - ZERO_UPSTREAM_MAX_CONNS=${ZERO_UPSTREAM_MAX_CONNS:-20}
      - ZERO_CVR_MAX_CONNS=${ZERO_CVR_MAX_CONNS:-30}
@ -122,11 +123,30 @@ services:
    volumes:
      - zero_cache_data:/data
    restart: unless-stopped
    stop_grace_period: 300s
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:4848/keepalive"]
      interval: 10s
      timeout: 5s
      retries: 5
      start_period: 600s
  # OPTIONAL — Azurite emulates Azure Blob Storage for testing the Azure
  # original-file backend. The default filesystem backend needs none of this.
  # To exercise it, set in surfsense_backend/.env:
  #   FILE_STORAGE_BACKEND=azure
  #   AZURE_STORAGE_CONTAINER=surfsense-documents
  #   AZURE_STORAGE_CONNECTION_STRING=DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://localhost:${AZURITE_BLOB_PORT:-10000}/devstoreaccount1;
  # The backend creates blobs on upload; create the container once first
  # (Azure CLI / Storage Explorer), then upload a document.
  azurite:
    image: mcr.microsoft.com/azure-storage/azurite:3.33.0
    command: azurite-blob --blobHost 0.0.0.0 --blobPort 10000
    ports:
      - "${AZURITE_BLOB_PORT:-10000}:10000"
    volumes:
      - azurite_data:/data
    restart: unless-stopped
 volumes:
  postgres_data:
@ -137,3 +157,5 @@ volumes:
    name: surfsense-deps-redis
  zero_cache_data:
    name: surfsense-deps-zero-cache
  azurite_data:
    name: surfsense-deps-azurite
--- a/docker/docker-compose.dev.yml
+++ b/docker/docker-compose.dev.yml
@ -46,8 +46,6 @@ services:
      - PYTHONPATH=/app
      - SERVICE_ROLE=migrate
      - MIGRATION_TIMEOUT=${MIGRATION_TIMEOUT:-900}
    volumes:
      - zero_init:/zero-init
    depends_on:
      db:
        condition: service_healthy
@ -126,6 +124,7 @@ services:
      - AUTH_TYPE=${AUTH_TYPE:-LOCAL}
      - NEXT_FRONTEND_URL=${NEXT_FRONTEND_URL:-http://localhost:3000}
      - SEARXNG_DEFAULT_HOST=${SEARXNG_DEFAULT_HOST:-http://searxng:8080}
      - WHATSAPP_BRIDGE_URL=${WHATSAPP_BRIDGE_URL:-http://whatsapp-bridge:9929}
      # Daytona Sandbox – uncomment and set credentials to enable cloud code execution
      # - DAYTONA_SANDBOX_ENABLED=TRUE
      # - DAYTONA_API_KEY=${DAYTONA_API_KEY:-}
@ -148,6 +147,25 @@ services:
      retries: 30
      start_period: 200s
  whatsapp-bridge:
    build: ../surfsense_backend/scripts/whatsapp-bridge
    profiles:
      - whatsapp
    ports:
      - "127.0.0.1:${WHATSAPP_BRIDGE_PORT:-9929}:9929"
    volumes:
      - whatsapp_sessions:/data/sessions
    environment:
      - PORT=9929
      - WHATSAPP_MODE=${WHATSAPP_MODE:-self-chat}
      - WHATSAPP_SESSION_DIR=/data/sessions
    restart: unless-stopped
    healthcheck:
      test: ["CMD", "wget", "-qO-", "http://localhost:9929/health"]
      interval: 30s
      timeout: 5s
      retries: 5
  celery_worker:
    build: *backend-build
    volumes:
@ -197,21 +215,6 @@ services:
      celery_worker:
        condition: service_started
  # flower:
  #   build: *backend-build
  #   ports:
  #     - "${FLOWER_PORT:-5555}:5555"
  #   env_file:
  #     - ../surfsense_backend/.env
  #   environment:
  #     - CELERY_BROKER_URL=${REDIS_URL:-redis://redis:6379/0}
  #     - CELERY_RESULT_BACKEND=${REDIS_URL:-redis://redis:6379/0}
  #     - PYTHONPATH=/app
  #   command: celery -A app.celery_app flower --port=5555
  #   depends_on:
  #     - redis
  #     - celery_worker
  zero-cache:
    image: rocicorp/zero:1.4.0
    ports:
@ -230,6 +233,7 @@ services:
      - ZERO_REPLICA_FILE=/data/zero.db
      - ZERO_ADMIN_PASSWORD=${ZERO_ADMIN_PASSWORD:-surfsense-zero-admin}
      - ZERO_APP_PUBLICATIONS=${ZERO_APP_PUBLICATIONS:-zero_publication}
      - ZERO_AUTO_RESET=${ZERO_AUTO_RESET:-true}
      - ZERO_NUM_SYNC_WORKERS=${ZERO_NUM_SYNC_WORKERS:-4}
      - ZERO_UPSTREAM_MAX_CONNS=${ZERO_UPSTREAM_MAX_CONNS:-20}
      - ZERO_CVR_MAX_CONNS=${ZERO_CVR_MAX_CONNS:-30}
@ -237,18 +241,14 @@ services:
      - ZERO_MUTATE_URL=${ZERO_MUTATE_URL:-http://frontend:3000/api/zero/mutate}
    volumes:
      - zero_cache_data:/data
      - zero_init:/zero-init
    # Wrapper: see docker/docker-compose.yml `zero-cache` for rationale.
    entrypoint: ["sh", "-c"]
    # Pass the script as a single list element so Compose does not tokenize it.
    command:
      - 'if [ -f /zero-init/needs_reset ]; then echo "[zero-init] publication change detected; wiping replica file(s) under /data" && rm -f /data/zero.db /data/zero.db-shm /data/zero.db-wal && rm -f /zero-init/needs_reset; fi; exec zero-cache'
    restart: unless-stopped
    stop_grace_period: 300s
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:4848/keepalive"]
      interval: 10s
      timeout: 5s
      retries: 5
      start_period: 600s
  frontend:
    build:
@ -280,5 +280,5 @@ volumes:
    name: surfsense-dev-shared-temp
  zero_cache_data:
    name: surfsense-dev-zero-cache
-  zero_init:
+  whatsapp_sessions:
-    name: surfsense-dev-zero-init
+    name: surfsense-dev-whatsapp-sessions
--- a/docker/docker-compose.gpu.yml
+++ b/docker/docker-compose.gpu.yml
@ -0,0 +1,30 @@
 services:
  backend:
    deploy:
      resources:
        reservations:
          devices:
            - driver: ${SURFSENSE_GPU_DRIVER:-nvidia}
              count: ${SURFSENSE_GPU_COUNT:-1}
              capabilities:
                - gpu
  celery_worker:
    deploy:
      resources:
        reservations:
          devices:
            - driver: ${SURFSENSE_GPU_DRIVER:-nvidia}
              count: ${SURFSENSE_GPU_COUNT:-1}
              capabilities:
                - gpu
  celery_beat:
    deploy:
      resources:
        reservations:
          devices:
            - driver: ${SURFSENSE_GPU_DRIVER:-nvidia}
              count: ${SURFSENSE_GPU_COUNT:-1}
              capabilities:
                - gpu
--- a/docker/docker-compose.yml
+++ b/docker/docker-compose.yml
@ -29,12 +29,11 @@ services:
  # Short-lived schema runner. Executes `alembic upgrade head` and verifies
  # that the `zero_publication` Postgres logical-replication publication
-  # exists, then exits 0. Downstream services (backend, celery_*, zero-cache)
+  # matches the canonical shape, then exits 0. Downstream services gate on this
-  # gate on this with `condition: service_completed_successfully` so a failed
+  # with `condition: service_completed_successfully` so a failed migration halts
-  # migration halts the whole stack instead of silently producing a half-built
+  # the whole stack instead of booting zero-cache against a drifted publication.
  # system that crash-loops zero-cache on missing publications.
  migrations:
-    image: ghcr.io/modsetter/surfsense-backend:${SURFSENSE_VERSION:-latest}
+    image: ghcr.io/modsetter/surfsense-backend:${SURFSENSE_VERSION:-latest}${SURFSENSE_VARIANT:+-${SURFSENSE_VARIANT}}
    env_file:
      - .env
    environment:
@ -42,8 +41,6 @@ services:
      PYTHONPATH: /app
      SERVICE_ROLE: migrate
      MIGRATION_TIMEOUT: ${MIGRATION_TIMEOUT:-900}
    volumes:
      - zero_init:/zero-init
    depends_on:
      db:
        condition: service_healthy
@ -61,28 +58,28 @@ services:
      timeout: 5s
      retries: 5
-  otel-collector:
+  # otel-collector:
-    image: otel/opentelemetry-collector-contrib:0.152.1
+  #   image: otel/opentelemetry-collector-contrib:0.152.1
-    profiles:
+  #   profiles:
-      - observability
+  #     - observability
-    command: ["--config=/etc/otelcol/config.yaml"]
+  #   command: ["--config=/etc/otelcol/config.yaml"]
-    volumes:
+  #   volumes:
-      - ./otel-collector/config.yaml:/etc/otelcol/config.yaml:ro
+  #     - ./otel-collector/config.yaml:/etc/otelcol/config.yaml:ro
-    environment:
+  #   environment:
-      GRAFANA_CLOUD_OTLP_ENDPOINT: ${GRAFANA_CLOUD_OTLP_ENDPOINT:-}
+  #     GRAFANA_CLOUD_OTLP_ENDPOINT: ${GRAFANA_CLOUD_OTLP_ENDPOINT:-}
-      GRAFANA_CLOUD_INSTANCE_ID: ${GRAFANA_CLOUD_INSTANCE_ID:-}
+  #     GRAFANA_CLOUD_INSTANCE_ID: ${GRAFANA_CLOUD_INSTANCE_ID:-}
-      GRAFANA_CLOUD_API_KEY: ${GRAFANA_CLOUD_API_KEY:-}
+  #     GRAFANA_CLOUD_API_KEY: ${GRAFANA_CLOUD_API_KEY:-}
-    ports:
+  #   ports:
-      - "${OTEL_GRPC_PORT:-4317}:4317"
+  #     - "${OTEL_GRPC_PORT:-4317}:4317"
-      - "${OTEL_HTTP_PORT:-4318}:4318"
+  #     - "${OTEL_HTTP_PORT:-4318}:4318"
-      - "${OTEL_HEALTH_PORT:-13133}:13133"
+  #     - "${OTEL_HEALTH_PORT:-13133}:13133"
-    mem_limit: 2g
+  #   mem_limit: 2g
-    restart: unless-stopped
+  #   restart: unless-stopped
-    healthcheck:
+  #   healthcheck:
-      test: ["CMD", "/otelcol-contrib", "--version"]
+  #     test: ["CMD", "/otelcol-contrib", "--version"]
-      interval: 30s
+  #     interval: 30s
-      timeout: 5s
+  #     timeout: 5s
-      retries: 3
+  #     retries: 3
  searxng:
    image: searxng/searxng:2026.3.13-3c1f68c59
@ -98,7 +95,7 @@ services:
      retries: 5
  backend:
-    image: ghcr.io/modsetter/surfsense-backend:${SURFSENSE_VERSION:-latest}
+    image: ghcr.io/modsetter/surfsense-backend:${SURFSENSE_VERSION:-latest}${SURFSENSE_VARIANT:+-${SURFSENSE_VARIANT}}
    ports:
      - "${BACKEND_PORT:-8929}:8000"
    volumes:
@ -118,6 +115,7 @@ services:
      UNSTRUCTURED_HAS_PATCHED_LOOP: "1"
      NEXT_FRONTEND_URL: ${NEXT_FRONTEND_URL:-http://localhost:${FRONTEND_PORT:-3929}}
      SEARXNG_DEFAULT_HOST: ${SEARXNG_DEFAULT_HOST:-http://searxng:8080}
      WHATSAPP_BRIDGE_URL: ${WHATSAPP_BRIDGE_URL:-http://whatsapp-bridge:9929}
      # Daytona Sandbox – uncomment and set credentials to enable cloud code execution
      # DAYTONA_SANDBOX_ENABLED: "TRUE"
      # DAYTONA_API_KEY: ${DAYTONA_API_KEY:-}
@ -143,8 +141,28 @@ services:
      retries: 30
      start_period: 200s
  # whatsapp-bridge:
  #   build: ../surfsense_backend/scripts/whatsapp-bridge
  #   profiles:
  #     - whatsapp
  #   expose:
  #     - "9929"
  #   volumes:
  #     - whatsapp_sessions:/data/sessions
  #   environment:
  #     PORT: 9929
  #     WHATSAPP_MODE: ${WHATSAPP_MODE:-self-chat}
  #     WHATSAPP_SESSION_DIR: /data/sessions
  #   mem_limit: 512m
  #   restart: unless-stopped
  #   healthcheck:
  #     test: ["CMD", "wget", "-qO-", "http://localhost:9929/health"]
  #     interval: 30s
  #     timeout: 5s
  #     retries: 5
  celery_worker:
-    image: ghcr.io/modsetter/surfsense-backend:${SURFSENSE_VERSION:-latest}
+    image: ghcr.io/modsetter/surfsense-backend:${SURFSENSE_VERSION:-latest}${SURFSENSE_VARIANT:+-${SURFSENSE_VARIANT}}
    volumes:
      - shared_temp:/shared_tmp
    env_file:
@ -174,7 +192,7 @@ services:
    restart: unless-stopped
  celery_beat:
-    image: ghcr.io/modsetter/surfsense-backend:${SURFSENSE_VERSION:-latest}
+    image: ghcr.io/modsetter/surfsense-backend:${SURFSENSE_VERSION:-latest}${SURFSENSE_VARIANT:+-${SURFSENSE_VARIANT}}
    env_file:
      - .env
    environment:
@ -197,22 +215,6 @@ services:
      - "com.centurylinklabs.watchtower.enable=true"
    restart: unless-stopped
  # flower:
  #   image: ghcr.io/modsetter/surfsense-backend:${SURFSENSE_VERSION:-latest}
  #   ports:
  #     - "${FLOWER_PORT:-5555}:5555"
  #   env_file:
  #     - .env
  #   environment:
  #     CELERY_BROKER_URL: ${REDIS_URL:-redis://redis:6379/0}
  #     CELERY_RESULT_BACKEND: ${REDIS_URL:-redis://redis:6379/0}
  #     PYTHONPATH: /app
  #   command: celery -A app.celery_app flower --port=5555
  #   depends_on:
  #     - redis
  #     - celery_worker
  #   restart: unless-stopped
  zero-cache:
    image: rocicorp/zero:1.4.0
    ports:
@ -226,6 +228,7 @@ services:
      ZERO_REPLICA_FILE: /data/zero.db
      ZERO_ADMIN_PASSWORD: ${ZERO_ADMIN_PASSWORD:-surfsense-zero-admin}
      ZERO_APP_PUBLICATIONS: ${ZERO_APP_PUBLICATIONS:-zero_publication}
      ZERO_AUTO_RESET: ${ZERO_AUTO_RESET:-true}
      ZERO_NUM_SYNC_WORKERS: ${ZERO_NUM_SYNC_WORKERS:-4}
      ZERO_UPSTREAM_MAX_CONNS: ${ZERO_UPSTREAM_MAX_CONNS:-20}
      ZERO_CVR_MAX_CONNS: ${ZERO_CVR_MAX_CONNS:-30}
@ -233,16 +236,8 @@ services:
      ZERO_MUTATE_URL: ${ZERO_MUTATE_URL:-http://frontend:3000/api/zero/mutate}
    volumes:
      - zero_cache_data:/data
      - zero_init:/zero-init
    # Wrapper: if the migrations service flagged a publication change via
    # /zero-init/needs_reset, wipe the SQLite replica before starting so
    # zero-cache does a clean initial sync. Recovers from the half-built
    # replica state (`_zero.tableMetadata` missing) caused by earlier crashes.
    entrypoint: ["sh", "-c"]
    # Pass the script as a single list element so Compose does not tokenize it.
    command:
      - 'if [ -f /zero-init/needs_reset ]; then echo "[zero-init] publication change detected; wiping replica file(s) under /data" && rm -f /data/zero.db /data/zero.db-shm /data/zero.db-wal && rm -f /zero-init/needs_reset; fi; exec zero-cache'
    restart: unless-stopped
    stop_grace_period: 300s
    depends_on:
      db:
        condition: service_healthy
@ -253,6 +248,7 @@ services:
      interval: 10s
      timeout: 5s
      retries: 5
      start_period: 600s
  frontend:
    image: ghcr.io/modsetter/surfsense-web:${SURFSENSE_VERSION:-latest}
@ -264,6 +260,7 @@ services:
      NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE: ${AUTH_TYPE:-LOCAL}
      NEXT_PUBLIC_ETL_SERVICE: ${ETL_SERVICE:-DOCLING}
      NEXT_PUBLIC_DEPLOYMENT_MODE: ${DEPLOYMENT_MODE:-self-hosted}
      NEXT_PUBLIC_WHATSAPP_DISPLAY_PHONE_NUMBER: ${WHATSAPP_SHARED_DISPLAY_PHONE_NUMBER:-}
      FASTAPI_BACKEND_INTERNAL_URL: ${FASTAPI_BACKEND_INTERNAL_URL:-http://backend:8000}
    labels:
      - "com.centurylinklabs.watchtower.enable=true"
@ -283,5 +280,5 @@ volumes:
    name: surfsense-shared-temp
  zero_cache_data:
    name: surfsense-zero-cache
-  zero_init:
+  whatsapp_sessions:
-    name: surfsense-zero-init
+    name: surfsense-whatsapp-sessions
--- a/docker/scripts/install.ps1
+++ b/docker/scripts/install.ps1
@ -7,6 +7,8 @@
 # To pass flags, save and run locally:
 #   .\install.ps1 -NoWatchtower
 #   .\install.ps1 -WatchtowerInterval 3600
 #   .\install.ps1 -Variant cuda
 #   .\install.ps1 -Variant cuda -GpuCount all
 #
 # Handles two cases automatically:
 #   1. Fresh install        — no prior SurfSense data detected
@ -17,7 +19,11 @@
 param(
    [switch]$NoWatchtower,
-    [int]$WatchtowerInterval = 86400
+    [int]$WatchtowerInterval = 86400,
    [ValidateSet("cpu", "cuda", "cuda126")]
    [string]$Variant,
    [string]$GpuCount,
    [switch]$Quiet
 )
 $ErrorActionPreference = 'Stop'
@ -34,6 +40,11 @@ $MigrationMode      = $false
 $SetupWatchtower    = -not $NoWatchtower
 $WatchtowerContainer = "watchtower"
 if ($GpuCount -and $GpuCount -notmatch '^([0-9]+|all)$') {
    Write-Host "[SurfSense] ERROR: Invalid -GpuCount '$GpuCount'. Use a number or 'all'." -ForegroundColor Red
    exit 1
 }
 # ── Output helpers ──────────────────────────────────────────────────────────
 function Write-Info    { param([string]$Msg) Write-Host "[SurfSense] " -ForegroundColor Cyan -NoNewline; Write-Host $Msg }
@ -42,6 +53,27 @@ function Write-Warn    { param([string]$Msg) Write-Host "[SurfSense] " -Foregrou
 function Write-Step    { param([string]$Msg) Write-Host "`n-- $Msg" -ForegroundColor Cyan }
 function Write-Err     { param([string]$Msg) Write-Host "[SurfSense] ERROR: $Msg" -ForegroundColor Red; exit 1 }
 function Show-Banner {
    Write-Host ""
    Write-Host @"
 ███████╗██╗   ██╗██████╗ ███████╗███████╗███████╗███╗   ██╗███████╗███████╗
 ██╔════╝██║   ██║██╔══██╗██╔════╝██╔════╝██╔════╝████╗  ██║██╔════╝██╔════╝
 ███████╗██║   ██║██████╔╝█████╗  ███████╗█████╗  ██╔██╗ ██║███████╗█████╗  
 ╚════██║██║   ██║██╔══██╗██╔══╝  ╚════██║██╔══╝  ██║╚██╗██║╚════██║██╔══╝  
 ███████║╚██████╔╝██║  ██║██║     ███████║███████╗██║ ╚████║███████║███████╗
 ╚══════╝ ╚═════╝ ╚═╝  ╚═╝╚═╝     ╚══════╝╚══════╝╚═╝  ╚═══╝╚══════╝╚══════╝
 "@ -ForegroundColor White
    Write-Host "         OSS Alternative to NotebookLM for Teams" -ForegroundColor Yellow
    Write-Host ("=" * 62) -ForegroundColor Cyan
    Write-Info "This installer will create $InstallDir\ and start SurfSense with Docker Compose."
 }
 Show-Banner
 function Invoke-NativeSafe {
    param([scriptblock]$Command)
    $previousErrorActionPreference = $ErrorActionPreference
@ -53,6 +85,28 @@ function Invoke-NativeSafe {
    }
 }
 function Resolve-WatchtowerPreference {
    if ($NoWatchtower -or $Quiet -or -not [Environment]::UserInteractive) {
        return
    }
    Write-Host ""
    Write-Host "Automatic updates" -ForegroundColor Cyan
    $choice = Read-Host "Enable automatic daily updates with Watchtower? (may download several GB in the background) [Y/n]"
    switch ($choice) {
        "" { $script:SetupWatchtower = $true }
        { $_ -match '^(?i)y(es)?$' } { $script:SetupWatchtower = $true }
        { $_ -match '^(?i)n(o)?$' } { $script:SetupWatchtower = $false }
        default {
            Write-Warn "Unrecognized choice '$choice'; enabling Watchtower by default. Use -NoWatchtower to skip it."
            $script:SetupWatchtower = $true
        }
    }
 }
 Resolve-WatchtowerPreference
 # ── Pre-flight checks ──────────────────────────────────────────────────────
 Write-Step "Checking prerequisites"
@ -97,143 +151,11 @@ function Wait-ForPostgres {
    Write-Ok "PostgreSQL is ready."
 }
-# ── Stack health helpers ────────────────────────────────────────────────────
+# ── Stack startup helper ────────────────────────────────────────────────────
 function Get-ComposeServices {
    Push-Location $InstallDir
    try {
        $raw = Invoke-NativeSafe { docker compose ps -a --format json 2>$null }
    } finally {
        Pop-Location
    }
    if ([string]::IsNullOrWhiteSpace($raw)) { return @() }
    # Compose v2.21+ emits a JSON array; older versions emit one object per line.
    try {
        $parsed = $raw | ConvertFrom-Json
        if ($parsed -is [System.Collections.IEnumerable] -and -not ($parsed -is [string])) {
            return @($parsed)
        }
        return @($parsed)
    } catch {
        $services = @()
        foreach ($line in ($raw -split "`r?`n")) {
            $line = $line.Trim()
            if (-not $line) { continue }
            try { $services += ($line | ConvertFrom-Json) } catch { }
        }
        return $services
    }
 }
 function Wait-StackHealthy {
    param([int]$TimeoutSec = 300)
    $deadline = (Get-Date).AddSeconds($TimeoutSec)
    $lastReport = ""
    while ((Get-Date) -lt $deadline) {
        $services = Get-ComposeServices
        if (-not $services -or $services.Count -eq 0) {
            Start-Sleep -Seconds 3
            continue
        }
        $bad = @()
        $waiting = @()
        $good = @()
        foreach ($svc in $services) {
            $name = $svc.Service
            $state = $svc.State
            $health = if ($svc.PSObject.Properties.Name -contains 'Health') { $svc.Health } else { '' }
            $exit = if ($svc.PSObject.Properties.Name -contains 'ExitCode') { $svc.ExitCode } else { $null }
            if ($name -eq 'migrations') {
                if ($state -eq 'exited' -and $exit -eq 0) { $good += $name }
                elseif ($state -eq 'exited') { $bad += "${name} (exit=${exit})" }
                else { $waiting += "${name} (${state})" }
                continue
            }
            if ($state -eq 'running') {
                if ([string]::IsNullOrEmpty($health) -or $health -eq 'healthy') {
                    $good += $name
                } elseif ($health -eq 'starting') {
                    $waiting += "${name} (starting)"
                } elseif ($health -eq 'unhealthy') {
                    $bad += "${name} (unhealthy)"
                } else {
                    $waiting += "${name} (${health})"
                }
            } elseif ($state -eq 'restarting') {
                $bad += "${name} (restarting)"
            } elseif ($state -eq 'exited') {
                $bad += "${name} (exited, code=${exit})"
            } else {
                $waiting += "${name} (${state})"
            }
        }
        if ($bad.Count -gt 0) {
            return @{ Ok = $false; Reason = 'failure'; Bad = $bad; Waiting = $waiting; Good = $good }
        }
        if ($waiting.Count -eq 0) {
            return @{ Ok = $true; Reason = 'all_healthy'; Good = $good }
        }
        $report = "Waiting on: " + ($waiting -join ', ')
        if ($report -ne $lastReport) {
            Write-Info $report
            $lastReport = $report
        }
        Start-Sleep -Seconds 5
    }
    return @{ Ok = $false; Reason = 'timeout'; Bad = $bad; Waiting = $waiting; Good = $good }
 }
 function Test-StaleZeroCacheVolume {
    $raw = Invoke-NativeSafe { docker volume ls --format '{{.Name}}' 2>$null }
    if ([string]::IsNullOrWhiteSpace($raw)) { return $false }
    $names = $raw -split "`r?`n" | ForEach-Object { $_.Trim() } | Where-Object { $_ }
    $hasZeroCache = $names -contains 'surfsense-zero-cache'
    $hasZeroInit = $names -contains 'surfsense-zero-init'
    # Pre-fix installs created surfsense-zero-cache but never surfsense-zero-init.
    # Such a volume may hold a half-initialized SQLite replica from an earlier
    # crash-loop. Wiping it forces zero-cache to do a fresh initial sync.
    return ($hasZeroCache -and -not $hasZeroInit)
 }
 function Invoke-StaleZeroCacheCleanup {
    if (-not (Test-StaleZeroCacheVolume)) { return }
    Write-Warn "Detected pre-existing 'surfsense-zero-cache' volume from an install that"
    Write-Warn "predates the migrations-service fix. It may contain a half-initialized"
    Write-Warn "SQLite replica that would block zero-cache from starting."
    Write-Warn "The volume will be removed in 5 seconds; press Ctrl+C to cancel."
    Start-Sleep -Seconds 5
    Push-Location $InstallDir
    Invoke-NativeSafe { docker compose down --remove-orphans 2>$null } | Out-Null
    Pop-Location
    Invoke-NativeSafe { docker volume rm surfsense-zero-cache 2>$null } | Out-Null
    Write-Ok "Removed surfsense-zero-cache volume; zero-cache will re-sync on next start."
 }
 function Write-Err-NoExit {
    param([string]$Message)
    Write-Host "[ERROR] $Message" -ForegroundColor Red
 }
 function Invoke-StackFailureReport {
    param([hashtable]$Result)
    Write-Host ""
-    Write-Err-NoExit "Stack did not reach a healthy state."
+    Write-Host "[ERROR] Stack did not reach a healthy state." -ForegroundColor Red
    if ($Result.Bad.Count -gt 0) { Write-Host ("  Failed: " + ($Result.Bad -join ', ')) }
    if ($Result.Waiting.Count -gt 0) { Write-Host ("  Stuck:  " + ($Result.Waiting -join ', ')) }
    Write-Host ""
    Write-Info "Recent logs from migrations / zero-cache / backend:"
    Push-Location $InstallDir
@ -247,11 +169,151 @@ function Invoke-StackFailureReport {
    Write-Host "Recovery hints:" -ForegroundColor Yellow
    Write-Host "  1. Inspect migrations:   cd $InstallDir; docker compose logs migrations"
    Write-Host "  2. Verify publication:   cd $InstallDir; docker compose exec db psql -U surfsense -d surfsense -c 'SELECT pubname FROM pg_publication;'"
-    Write-Host "  3. Hard reset zero db:   cd $InstallDir; docker compose down; docker volume rm surfsense-zero-cache; docker compose up -d"
+    Write-Host "  3. Hard reset zero db:   cd $InstallDir; docker compose down; docker volume rm surfsense-zero-cache; docker compose up -d --wait"
    Write-Host ""
    exit 1
 }
 function Invoke-ComposeUpWait {
    Push-Location $InstallDir
    try {
        Invoke-NativeSafe { docker compose up -d --wait }
    } finally {
        Pop-Location
    }
    if ($LASTEXITCODE -ne 0) {
        Invoke-StackFailureReport
    }
 }
 # ── Variant and .env helpers ────────────────────────────────────────────────
 function Set-EnvValue {
    param([string]$Path, [string]$Key, [string]$Value)
    $lines = @()
    if (Test-Path $Path) {
        $lines = @(Get-Content $Path)
    }
    $updated = $false
    $newLines = foreach ($line in $lines) {
        if ($line -match "^$([regex]::Escape($Key))=") {
            $updated = $true
            "$Key=$Value"
        } else {
            $line
        }
    }
    if (-not $updated) {
        $newLines += "$Key=$Value"
    }
    Set-Content -Path $Path -Value $newLines
 }
 function Remove-EnvValue {
    param([string]$Path, [string]$Key)
    if (-not (Test-Path $Path)) { return }
    $newLines = Get-Content $Path | Where-Object { $_ -notmatch "^$([regex]::Escape($Key))=" }
    Set-Content -Path $Path -Value $newLines
 }
 function Test-NvidiaGpu {
    if (-not (Get-Command nvidia-smi -ErrorAction SilentlyContinue)) { return $false }
    Invoke-NativeSafe { nvidia-smi *>$null } | Out-Null
    return ($LASTEXITCODE -eq 0)
 }
 function Test-NvidiaRuntime {
    $info = Invoke-NativeSafe { docker info 2>$null }
    if ($info -match 'nvidia') { return $true }
    if (Get-Command nvidia-ctk -ErrorAction SilentlyContinue) { return $true }
    if (Get-Command nvidia-container-runtime -ErrorAction SilentlyContinue) { return $true }
    return $false
 }
 function Get-RecommendedVariant {
    $driver = (Invoke-NativeSafe { nvidia-smi --query-gpu=driver_version --format=csv,noheader 2>$null } | Select-Object -First 1)
    $major = 0
    if ($driver -match '^(\d+)') {
        $major = [int]$Matches[1]
    }
    if ($major -gt 0 -and $major -lt 570) {
        return "cuda126"
    }
    return "cuda"
 }
 function Resolve-Variant {
    $hasGpu = Test-NvidiaGpu
    $hasRuntime = $false
    $recommended = "cpu"
    if ($hasGpu) {
        $recommended = Get-RecommendedVariant
        $hasRuntime = Test-NvidiaRuntime
    }
    if ($Variant) {
        if ($Variant -eq "cpu") { return "cpu" }
        if (-not $hasGpu) {
            Write-Warn "No NVIDIA GPU detected; falling back to CPU variant."
            return "cpu"
        }
        if (-not $hasRuntime) {
            Write-Warn "NVIDIA GPU detected, but NVIDIA Container Toolkit was not detected; falling back to CPU variant."
            Write-Warn "Install the toolkit before enabling SurfSense GPU acceleration."
            return "cpu"
        }
        return $Variant
    }
    if ($hasGpu -and -not $hasRuntime) {
        Write-Warn "NVIDIA GPU detected, but NVIDIA Container Toolkit was not detected; using CPU variant."
    }
    if ($hasGpu -and $hasRuntime -and -not $Quiet -and [Environment]::UserInteractive) {
        Write-Host ""
        Write-Host "SurfSense detected an NVIDIA GPU." -ForegroundColor Cyan
        $choice = Read-Host "Use GPU acceleration? [Y/n]"
        switch ($choice) {
            "" { return $recommended }
            { $_ -match '^(?i)y(es)?$' } { return $recommended }
            { $_ -match '^(?i)n(o)?$' } { return "cpu" }
            default {
                Write-Warn "Unrecognized choice '$choice'; using CPU variant."
                return "cpu"
            }
        }
    }
    return "cpu"
 }
 function Set-VariantEnv {
    param([string]$Path, [string]$SelectedVariant, [bool]$AllowExistingUpdate)
    if ((Test-Path $Path) -and -not $AllowExistingUpdate) {
        Write-Warn ".env already exists - keeping your existing configuration."
        Write-Info "To change variants later, edit SURFSENSE_VARIANT and COMPOSE_FILE in $Path, then run docker compose up -d --wait."
        return
    }
    if ($SelectedVariant -eq "cpu") {
        Set-EnvValue -Path $Path -Key "SURFSENSE_VARIANT" -Value ""
        Remove-EnvValue -Path $Path -Key "COMPOSE_FILE"
        Remove-EnvValue -Path $Path -Key "SURFSENSE_GPU_COUNT"
    } else {
        Set-EnvValue -Path $Path -Key "SURFSENSE_VARIANT" -Value $SelectedVariant
        Set-EnvValue -Path $Path -Key "COMPOSE_FILE" -Value "docker-compose.yml;docker-compose.gpu.yml"
        if ($GpuCount) {
            Set-EnvValue -Path $Path -Key "SURFSENSE_GPU_COUNT" -Value $GpuCount
        }
    }
    Remove-EnvValue -Path $Path -Key "COMPOSE_PROFILES"
 }
 $SelectedVariant = Resolve-Variant
 # ── Download files ──────────────────────────────────────────────────────────
 Write-Step "Downloading SurfSense files"
@ -262,6 +324,7 @@ New-Item -ItemType Directory -Path "$InstallDir\searxng" -Force | Out-Null
 $Files = @(
    @{ Src = "docker/docker-compose.yml";                Dest = "docker-compose.yml" }
    @{ Src = "docker/docker-compose.gpu.yml";            Dest = "docker-compose.gpu.yml" }
    @{ Src = "docker/.env.example";                      Dest = ".env.example" }
    @{ Src = "docker/postgresql.conf";                   Dest = "postgresql.conf" }
    @{ Src = "docker/scripts/migrate-database.ps1";      Dest = "scripts/migrate-database.ps1" }
@ -339,15 +402,19 @@ if (-not (Test-Path $envPath)) {
    $content = $content -replace 'SECRET_KEY=replace_me_with_a_random_string', "SECRET_KEY=$SecretKey"
    Set-Content -Path $envPath -Value $content -NoNewline
    Set-VariantEnv -Path $envPath -SelectedVariant $SelectedVariant -AllowExistingUpdate $false
    Write-Info "Created $envPath"
 } else {
-    Write-Warn ".env already exists - keeping your existing configuration."
+    if ($PSBoundParameters.ContainsKey('Variant')) {
        Set-VariantEnv -Path $envPath -SelectedVariant $SelectedVariant -AllowExistingUpdate $true
        Write-Info "Updated SurfSense image variant in existing $envPath"
    } else {
        Set-VariantEnv -Path $envPath -SelectedVariant $SelectedVariant -AllowExistingUpdate $false
    }
 }
 # ── Start containers ────────────────────────────────────────────────────────
 Invoke-StaleZeroCacheCleanup
 if ($MigrationMode) {
    $envContent = Get-Content $envPath
    $DbUser = ($envContent | Select-String '^DB_USER=' | ForEach-Object { ($_ -split '=',2)[1].Trim('"') }) | Select-Object -First 1
@ -405,31 +472,15 @@ if ($MigrationMode) {
    }
    Write-Step "Starting all SurfSense services"
-    Push-Location $InstallDir
+    Invoke-ComposeUpWait
-    Invoke-NativeSafe { docker compose up -d }
+    Write-Ok "All services started and healthy."
    Pop-Location
    Write-Ok "All containers started; waiting for stack to become healthy..."
    $waitResult = Wait-StackHealthy -TimeoutSec 300
    if (-not $waitResult.Ok) {
        Invoke-StackFailureReport -Result $waitResult
    }
    Write-Ok "All services healthy."
    Remove-Item $KeyFile -ErrorAction SilentlyContinue
 } else {
    Write-Step "Starting SurfSense"
-    Push-Location $InstallDir
+    Invoke-ComposeUpWait
-    Invoke-NativeSafe { docker compose up -d }
+    Write-Ok "All services started and healthy."
    Pop-Location
    Write-Ok "All containers started; waiting for stack to become healthy..."
    $waitResult = Wait-StackHealthy -TimeoutSec 300
    if (-not $waitResult.Ok) {
        Invoke-StackFailureReport -Result $waitResult
    }
    Write-Ok "All services healthy."
 }
 # ── Watchtower (auto-update) ────────────────────────────────────────────────
@ -461,7 +512,7 @@ if ($SetupWatchtower) {
        if ($LASTEXITCODE -eq 0) {
            Write-Ok "Watchtower started - labeled SurfSense containers will auto-update."
        } else {
-            Write-Warn "Could not start Watchtower. You can set it up manually or use: docker compose pull; docker compose up -d"
+            Write-Warn "Could not start Watchtower. You can set it up manually or use: docker compose pull; docker compose up -d --wait"
        }
    }
 } else {
@ -471,39 +522,26 @@ if ($SetupWatchtower) {
 # ── Done ────────────────────────────────────────────────────────────────────
 Write-Host ""
 Write-Host @"
 .d8888b.                    .d888 .d8888b.                                      
 d88P  Y88b                  d88P" d88P  Y88b                                     
 Y88b.                       888   Y88b.                                          
 "Y888b.   888  888 888d888 888888 "Y888b.    .d88b.  88888b.  .d8888b   .d88b.  
    "Y88b. 888  888 888P"   888       "Y88b. d8P  Y8b 888 "88b 88K      d8P  Y8b 
      "888 888  888 888     888         "888 88888888 888  888 "Y8888b. 88888888 
 Y88b  d88P Y88b 888 888     888   Y88b  d88P Y8b.     888  888      X88 Y8b.     
 "Y8888P"   "Y88888 888     888    "Y8888P"   "Y8888  888  888  88888P'  "Y8888  
 "@ -ForegroundColor White
 $versionDisplay = (Get-Content $envPath | Select-String '^SURFSENSE_VERSION=' | ForEach-Object { ($_ -split '=',2)[1].Trim('"') }) | Select-Object -First 1
 if (-not $versionDisplay) { $versionDisplay = "latest" }
-Write-Host "         OSS Alternative to NotebookLM for Teams  [$versionDisplay]" -ForegroundColor Yellow
+$variantDisplay = (Get-Content $envPath | Select-String '^SURFSENSE_VARIANT=' | ForEach-Object { ($_ -split '=',2)[1].Trim('"') }) | Select-Object -First 1
-Write-Host ("=" * 62) -ForegroundColor Cyan
+if (-not $variantDisplay) { $variantDisplay = "cpu" }
-Write-Host ""
+$wtHours = [math]::Floor($WatchtowerInterval / 3600)
 Write-Step "SurfSense is now installed [$versionDisplay]"
 Write-Info "  Frontend:  http://localhost:3929"
 Write-Info "  Backend:   http://localhost:8929"
 Write-Info "  API Docs:  http://localhost:8929/docs"
 Write-Info ""
 Write-Info "  Config:    $InstallDir\.env"
 Write-Info "  Variant:   $variantDisplay"
 Write-Info "  Logs:      cd $InstallDir; docker compose logs -f"
 Write-Info "  Stop:      cd $InstallDir; docker compose down"
-Write-Info "  Update:    cd $InstallDir; docker compose pull; docker compose up -d"
+Write-Info "  Update:    cd $InstallDir; docker compose pull; docker compose up -d --wait"
 Write-Info ""
 if ($SetupWatchtower) {
-    Write-Info "  Watchtower: auto-updates every ${wtHours}h (stop: docker rm -f $WatchtowerContainer)"
+    Write-Info "  Watchtower: auto-updates every ${wtHours}h (disable: docker rm -f $WatchtowerContainer)"
 } else {
    Write-Warn "  Watchtower skipped. For auto-updates, re-run without -NoWatchtower."
 }
--- a/docker/scripts/install.sh
+++ b/docker/scripts/install.sh
@ -8,6 +8,11 @@
 # Flags:
 #   --no-watchtower              Skip automatic Watchtower setup
 #   --watchtower-interval=SECS   Check interval in seconds (default: 86400 = 24h)
 #   --variant=cpu|cuda|cuda126   Select backend image variant
 #   --gpu                        Alias for --variant=cuda
 #   --cpu                        Alias for --variant=cpu
 #   --gpu-count=N|all            Number of GPUs to reserve when GPU is enabled
 #   --quiet                      Skip interactive prompts
 #
 # Handles two cases automatically:
 #   1. Fresh install        — no prior SurfSense data detected
@ -35,12 +40,22 @@ MIGRATION_MODE=false
 SETUP_WATCHTOWER=true
 WATCHTOWER_INTERVAL=86400
 WATCHTOWER_CONTAINER="watchtower"
 WATCHTOWER_EXPLICIT=false
 REQUESTED_VARIANT=""
 VARIANT_EXPLICIT=false
 GPU_COUNT=""
 QUIET=false
 # ── Parse flags ─────────────────────────────────────────────────────────────
 for arg in "$@"; do
    case "$arg" in
-        --no-watchtower) SETUP_WATCHTOWER=false ;;
+        --no-watchtower) SETUP_WATCHTOWER=false; WATCHTOWER_EXPLICIT=true ;;
        --watchtower-interval=*) WATCHTOWER_INTERVAL="${arg#*=}" ;;
        --variant=*) REQUESTED_VARIANT="${arg#*=}"; VARIANT_EXPLICIT=true ;;
        --gpu) REQUESTED_VARIANT="cuda"; VARIANT_EXPLICIT=true ;;
        --cpu) REQUESTED_VARIANT="cpu"; VARIANT_EXPLICIT=true ;;
        --gpu-count=*) GPU_COUNT="${arg#*=}" ;;
        --quiet) QUIET=true ;;
    esac
 done
@ -57,6 +72,57 @@ warn()    { printf "${YELLOW}[SurfSense]${NC} %s\n"      "$1"; }
 error()   { printf "${RED}[SurfSense]${NC} ERROR: %s\n"  "$1" >&2; exit 1; }
 step()    { printf "\n${BOLD}${CYAN}── %s${NC}\n"        "$1"; }
 show_banner() {
    echo ""
    printf '\033[1;37m'
    cat << 'EOF'
 ███████╗██╗   ██╗██████╗ ███████╗███████╗███████╗███╗   ██╗███████╗███████╗
 ██╔════╝██║   ██║██╔══██╗██╔════╝██╔════╝██╔════╝████╗  ██║██╔════╝██╔════╝
 ███████╗██║   ██║██████╔╝█████╗  ███████╗█████╗  ██╔██╗ ██║███████╗█████╗  
 ╚════██║██║   ██║██╔══██╗██╔══╝  ╚════██║██╔══╝  ██║╚██╗██║╚════██║██╔══╝  
 ███████║╚██████╔╝██║  ██║██║     ███████║███████╗██║ ╚████║███████║███████╗
 ╚══════╝ ╚═════╝ ╚═╝  ╚═╝╚═╝     ╚══════╝╚══════╝╚═╝  ╚═══╝╚══════╝╚══════╝
 EOF
    printf "${YELLOW}         OSS Alternative to NotebookLM for Teams${NC}\n"
    printf "${CYAN}══════════════════════════════════════════════════════════════${NC}\n"
    info "This installer will create ${INSTALL_DIR}/ and start SurfSense with Docker Compose."
 }
 show_banner
 case "${REQUESTED_VARIANT}" in
    ""|cpu|cuda|cuda126) ;;
    *) error "Invalid --variant='${REQUESTED_VARIANT}'. Use cpu, cuda, or cuda126." ;;
 esac
 if [[ -n "${GPU_COUNT}" && ! "${GPU_COUNT}" =~ ^([0-9]+|all)$ ]]; then
    error "Invalid --gpu-count='${GPU_COUNT}'. Use a number or 'all'."
 fi
 resolve_watchtower_preference() {
    if $WATCHTOWER_EXPLICIT || $QUIET || [[ ! -r /dev/tty || ! -w /dev/tty ]]; then
        return 0
    fi
    local choice
    echo "" > /dev/tty
    printf "${BOLD}${CYAN}Automatic updates${NC}\n" > /dev/tty
    printf "Enable automatic daily updates with Watchtower? (may download several GB in the background) [Y/n]: " > /dev/tty
    read -r choice < /dev/tty || choice=""
    case "$choice" in
        ""|[Yy]|[Yy][Ee][Ss]) SETUP_WATCHTOWER=true ;;
        [Nn]|[Nn][Oo]) SETUP_WATCHTOWER=false ;;
        *) warn "Unrecognized choice '${choice}', enabling Watchtower by default. Use --no-watchtower to skip it." >&2; SETUP_WATCHTOWER=true ;;
    esac
 }
 resolve_watchtower_preference
 # ── Pre-flight checks ────────────────────────────────────────────────────────
 step "Checking prerequisites"
@ -97,126 +163,11 @@ wait_for_pg() {
    success "PostgreSQL is ready."
 }
-# ── Stack health helpers ─────────────────────────────────────────────────────
+# ── Stack startup helper ─────────────────────────────────────────────────────
 # Enumerate compose services for project `surfsense` as `service|state|health|exitcode`
 # lines. Uses `docker inspect` so we don't depend on `jq`, `python3`, or the
 # exact ordering of fields in `docker compose ps --format json` output.
 get_compose_services() {
    local containers
    containers=$(docker ps -a --filter "label=com.docker.compose.project=surfsense" --format '{{.Names}}' 2>/dev/null) || true
    [[ -z "$containers" ]] && return 0
    while IFS= read -r container; do
        [[ -z "$container" ]] && continue
        local svc state health code
        svc=$(docker inspect -f '{{index .Config.Labels "com.docker.compose.service"}}' "$container" 2>/dev/null || echo "")
        state=$(docker inspect -f '{{.State.Status}}' "$container" 2>/dev/null || echo "unknown")
        health=$(docker inspect -f '{{if .State.Health}}{{.State.Health.Status}}{{end}}' "$container" 2>/dev/null || echo "")
        code=$(docker inspect -f '{{.State.ExitCode}}' "$container" 2>/dev/null || echo "")
        [[ -z "$svc" ]] && continue
        printf '%s|%s|%s|%s\n' "$svc" "$state" "$health" "$code"
    done <<< "$containers"
 }
 # Globals populated by wait_stack_healthy / consumed by stack_failure_report.
 STACK_BAD=()
 STACK_WAITING=()
 STACK_GOOD=()
 STACK_TIMEOUT=false
 wait_stack_healthy() {
    local timeout_sec=${1:-300}
    local deadline=$(($(date +%s) + timeout_sec))
    local last_report=""
    local bad=()
    local waiting=()
    local good=()
    while [[ $(date +%s) -lt $deadline ]]; do
        local lines
        lines=$(get_compose_services)
        if [[ -z "$lines" ]]; then
            sleep 3
            continue
        fi
        bad=()
        waiting=()
        good=()
        while IFS='|' read -r name state health code; do
            [[ -z "$name" ]] && continue
            if [[ "$name" == "migrations" ]]; then
                if [[ "$state" == "exited" && "$code" == "0" ]]; then
                    good+=("$name")
                elif [[ "$state" == "exited" ]]; then
                    bad+=("${name} (exit=${code})")
                else
                    waiting+=("${name} (${state})")
                fi
                continue
            fi
            if [[ "$state" == "running" ]]; then
                if [[ -z "$health" || "$health" == "healthy" ]]; then
                    good+=("$name")
                elif [[ "$health" == "starting" ]]; then
                    waiting+=("${name} (starting)")
                elif [[ "$health" == "unhealthy" ]]; then
                    bad+=("${name} (unhealthy)")
                else
                    waiting+=("${name} (${health})")
                fi
            elif [[ "$state" == "restarting" ]]; then
                bad+=("${name} (restarting)")
            elif [[ "$state" == "exited" ]]; then
                bad+=("${name} (exited, code=${code})")
            else
                waiting+=("${name} (${state})")
            fi
        done <<< "$lines"
        if (( ${#bad[@]} > 0 )); then
            STACK_BAD=("${bad[@]}")
            STACK_WAITING=("${waiting[@]}")
            STACK_GOOD=("${good[@]}")
            return 1
        fi
        if (( ${#waiting[@]} == 0 )); then
            STACK_GOOD=("${good[@]}")
            return 0
        fi
        local report="Waiting on: ${waiting[*]}"
        if [[ "$report" != "$last_report" ]]; then
            info "$report"
            last_report="$report"
        fi
        sleep 5
    done
    # bad/waiting/good are declared at function scope so referencing them is
    # safe even if the polling loop never executed its body.
    STACK_BAD=()
    [[ ${#bad[@]} -gt 0 ]] && STACK_BAD=("${bad[@]}")
    STACK_WAITING=()
    [[ ${#waiting[@]} -gt 0 ]] && STACK_WAITING=("${waiting[@]}")
    STACK_GOOD=()
    [[ ${#good[@]} -gt 0 ]] && STACK_GOOD=("${good[@]}")
    STACK_TIMEOUT=true
    return 1
 }
 stack_failure_report() {
    echo ""
    echo -e "\033[31m[ERROR]\033[0m Stack did not reach a healthy state."
    if (( ${#STACK_BAD[@]} > 0 )) && [[ -n "${STACK_BAD[0]}" ]]; then
        echo "  Failed: ${STACK_BAD[*]}"
    fi
    if (( ${#STACK_WAITING[@]} > 0 )) && [[ -n "${STACK_WAITING[0]}" ]]; then
        echo "  Stuck:  ${STACK_WAITING[*]}"
    fi
    echo ""
    info "Recent logs from migrations / zero-cache / backend:"
    (cd "${INSTALL_DIR}" && ${DC} logs --tail=60 migrations zero-cache backend 2>&1) || true
@ -224,36 +175,158 @@ stack_failure_report() {
    echo "Recovery hints:"
    echo "  1. Inspect migrations:   cd ${INSTALL_DIR} && ${DC} logs migrations"
    echo "  2. Verify publication:   cd ${INSTALL_DIR} && ${DC} exec db psql -U surfsense -d surfsense -c 'SELECT pubname FROM pg_publication;'"
-    echo "  3. Hard reset zero db:   cd ${INSTALL_DIR} && ${DC} down && docker volume rm surfsense-zero-cache && ${DC} up -d"
+    echo "  3. Hard reset zero db:   cd ${INSTALL_DIR} && ${DC} down && docker volume rm surfsense-zero-cache && ${DC} up -d --wait"
    echo ""
    exit 1
 }
-# True if `surfsense-zero-cache` exists but `surfsense-zero-init` does not.
+compose_up_wait() {
-# That signals an install that predates the migrations-service fix; the old
+    local service="${1:-}"
-# replica may be half-initialized and would block zero-cache on next start.
+    if [[ -n "$service" ]]; then
-test_stale_zero_cache_volume() {
+        (cd "${INSTALL_DIR}" && ${DC} up -d --wait "$service") < /dev/null
-    local has_zc has_zi
+    else
-    has_zc=$(docker volume ls --format '{{.Name}}' 2>/dev/null | grep -Fx 'surfsense-zero-cache' || true)
+        (cd "${INSTALL_DIR}" && ${DC} up -d --wait) < /dev/null
-    has_zi=$(docker volume ls --format '{{.Name}}' 2>/dev/null | grep -Fx 'surfsense-zero-init' || true)
+    fi
    [[ -n "$has_zc" && -z "$has_zi" ]]
 }
-invoke_stale_zero_cache_cleanup() {
+# ── Variant and .env helpers ─────────────────────────────────────────────────
-    if ! test_stale_zero_cache_volume; then
+
 set_env_value() {
    local file="$1"
    local key="$2"
    local value="$3"
    local tmp
    tmp=$(mktemp)
    if grep -q "^${key}=" "$file" 2>/dev/null; then
        awk -v key="$key" -v value="$value" 'BEGIN { prefix = key "=" } $0 ~ "^" prefix { print prefix value; next } { print }' "$file" > "$tmp"
    else
        cp "$file" "$tmp"
        printf '\n%s=%s\n' "$key" "$value" >> "$tmp"
    fi
    mv "$tmp" "$file"
 }
 remove_env_value() {
    local file="$1"
    local key="$2"
    local tmp
    tmp=$(mktemp)
    awk -v key="$key" 'BEGIN { prefix = key "=" } $0 !~ "^" prefix { print }' "$file" > "$tmp"
    mv "$tmp" "$file"
 }
 version_major() {
    printf '%s' "$1" | cut -d. -f1
 }
 recommend_cuda_variant() {
    local driver_version driver_major
    driver_version=$(nvidia-smi --query-gpu=driver_version --format=csv,noheader 2>/dev/null | head -n 1 | tr -d '[:space:]' || true)
    driver_major=$(version_major "$driver_version")
    # CUDA 12.8 generally requires an R570+ driver. Use CUDA 12.6 as the
    # compatibility fallback for older 12.x driver stacks and GPUs.
    if [[ "$driver_major" =~ ^[0-9]+$ && "$driver_major" -lt 570 ]]; then
        printf 'cuda126'
    else
        printf 'cuda'
    fi
 }
 gpu_runtime_available() {
    docker info 2>/dev/null | grep -qi 'nvidia' \
        || command -v nvidia-ctk >/dev/null 2>&1 \
        || command -v nvidia-container-runtime >/dev/null 2>&1
 }
 host_has_nvidia_gpu() {
    command -v nvidia-smi >/dev/null 2>&1 && nvidia-smi >/dev/null 2>&1
 }
 resolve_variant() {
    local detected_variant="cpu"
    local has_gpu=false
    local has_runtime=false
    if host_has_nvidia_gpu; then
        has_gpu=true
        detected_variant=$(recommend_cuda_variant)
        if gpu_runtime_available; then
            has_runtime=true
        fi
    fi
    if $VARIANT_EXPLICIT; then
        if [[ "$REQUESTED_VARIANT" == "cpu" ]]; then
            printf 'cpu'
            return 0
        fi
        if ! $has_gpu; then
            warn "No NVIDIA GPU detected; falling back to CPU variant." >&2
            printf 'cpu'
            return 0
        fi
        if ! $has_runtime; then
            warn "NVIDIA GPU detected, but NVIDIA Container Toolkit was not detected; falling back to CPU variant." >&2
            warn "Install the toolkit before enabling SurfSense GPU acceleration." >&2
            printf 'cpu'
            return 0
        fi
        printf '%s' "$REQUESTED_VARIANT"
        return 0
    fi
    warn "Detected pre-existing 'surfsense-zero-cache' volume from an install that"
    warn "predates the migrations-service fix. It may contain a half-initialized"
    warn "SQLite replica that would block zero-cache from starting."
    warn "The volume will be removed in 5 seconds; press Ctrl+C to cancel."
    sleep 5
-    (cd "${INSTALL_DIR}" && ${DC} down --remove-orphans 2>/dev/null) || true
+    if $has_gpu && ! $has_runtime; then
-    docker volume rm surfsense-zero-cache 2>/dev/null || true
+        warn "NVIDIA GPU detected, but NVIDIA Container Toolkit was not detected; using CPU variant." >&2
-    success "Removed surfsense-zero-cache volume; zero-cache will re-sync on next start."
+    fi
    if $has_gpu && $has_runtime && ! $QUIET && [[ -r /dev/tty && -w /dev/tty ]]; then
        local choice
        echo "" > /dev/tty
        printf "${BOLD}${CYAN}SurfSense detected an NVIDIA GPU.${NC}\n" > /dev/tty
        printf "Use GPU acceleration? [Y/n]: " > /dev/tty
        read -r choice < /dev/tty || choice=""
        case "$choice" in
            "") printf '%s' "$detected_variant" ;;
            [Yy]|[Yy][Ee][Ss]) printf '%s' "$detected_variant" ;;
            [Nn]|[Nn][Oo]) printf 'cpu' ;;
            *) warn "Unrecognized choice '${choice}', using CPU variant." >&2; printf 'cpu' ;;
        esac
        return 0
    fi
    printf 'cpu'
 }
 apply_variant_env() {
    local env_file="$1"
    local variant="$2"
    local allow_existing_update="$3"
    if [[ -f "$env_file" && "$allow_existing_update" != "true" ]]; then
        warn ".env already exists — keeping your existing configuration."
        info "To change variants later, edit SURFSENSE_VARIANT and COMPOSE_FILE in ${env_file}, then run ${DC} up -d --wait."
        return 0
    fi
    if [[ "$variant" == "cpu" ]]; then
        set_env_value "$env_file" "SURFSENSE_VARIANT" ""
        remove_env_value "$env_file" "COMPOSE_FILE"
        remove_env_value "$env_file" "SURFSENSE_GPU_COUNT"
    else
        set_env_value "$env_file" "SURFSENSE_VARIANT" "$variant"
        set_env_value "$env_file" "COMPOSE_FILE" "docker-compose.yml:docker-compose.gpu.yml"
        if [[ -n "$GPU_COUNT" ]]; then
            set_env_value "$env_file" "SURFSENSE_GPU_COUNT" "$GPU_COUNT"
        fi
    fi
    remove_env_value "$env_file" "COMPOSE_PROFILES"
 }
 SELECTED_VARIANT=$(resolve_variant)
 # ── Download files ───────────────────────────────────────────────────────────
 step "Downloading SurfSense files"
@ -263,6 +336,7 @@ mkdir -p "${INSTALL_DIR}/searxng"
 FILES=(
    "docker/docker-compose.yml:docker-compose.yml"
    "docker/docker-compose.gpu.yml:docker-compose.gpu.yml"
    "docker/.env.example:.env.example"
    "docker/postgresql.conf:postgresql.conf"
    "docker/scripts/migrate-database.sh:scripts/migrate-database.sh"
@ -336,15 +410,19 @@ if [ ! -f "${INSTALL_DIR}/.env" ]; then
    else
        sed -i "s|SECRET_KEY=replace_me_with_a_random_string|SECRET_KEY=${SECRET_KEY}|" "${INSTALL_DIR}/.env"
    fi
    apply_variant_env "${INSTALL_DIR}/.env" "$SELECTED_VARIANT" "false"
    info "Created ${INSTALL_DIR}/.env"
 else
-    warn ".env already exists — keeping your existing configuration."
+    if $VARIANT_EXPLICIT; then
        apply_variant_env "${INSTALL_DIR}/.env" "$SELECTED_VARIANT" "true"
        info "Updated SurfSense image variant in existing ${INSTALL_DIR}/.env"
    else
        apply_variant_env "${INSTALL_DIR}/.env" "$SELECTED_VARIANT" "false"
    fi
 fi
 # ── Start containers ─────────────────────────────────────────────────────────
 invoke_stale_zero_cache_cleanup
 if $MIGRATION_MODE; then
    # Read DB credentials from .env (fall back to defaults from docker-compose.yml)
    DB_USER=$(grep '^DB_USER=' "${INSTALL_DIR}/.env" 2>/dev/null | cut -d= -f2 | tr -d '"' | head -1 || true)
@ -401,26 +479,20 @@ if $MIGRATION_MODE; then
    fi
    step "Starting all SurfSense services"
-    (cd "${INSTALL_DIR}" && ${DC} up -d) < /dev/null
+    if ! compose_up_wait; then
    success "All containers started; waiting for stack to become healthy..."
    if ! wait_stack_healthy 300; then
        stack_failure_report
    fi
-    success "All services healthy."
+    success "All services started and healthy."
    # Key file is no longer needed — SECRET_KEY is now in .env
    rm -f "${KEY_FILE}"
 else
    step "Starting SurfSense"
-    (cd "${INSTALL_DIR}" && ${DC} up -d) < /dev/null
+    if ! compose_up_wait; then
    success "All containers started; waiting for stack to become healthy..."
    if ! wait_stack_healthy 300; then
        stack_failure_report
    fi
-    success "All services healthy."
+    success "All services started and healthy."
 fi
 # ── Watchtower (auto-update) ─────────────────────────────────────────────────
@ -445,7 +517,7 @@ if $SETUP_WATCHTOWER; then
            --label-enable \
            --interval "${WATCHTOWER_INTERVAL}" >/dev/null 2>&1 < /dev/null \
            && success "Watchtower started — labeled SurfSense containers will auto-update." \
-            || warn "Could not start Watchtower. You can set it up manually or use: docker compose pull && docker compose up -d"
+            || warn "Could not start Watchtower. You can set it up manually or use: docker compose pull && docker compose up -d --wait"
    fi
 else
    info "Skipping Watchtower setup (--no-watchtower flag)."
@ -454,38 +526,25 @@ fi
 # ── Done ─────────────────────────────────────────────────────────────────────
 echo ""
 printf '\033[1;37m'
 cat << 'EOF'
 .d8888b.                    .d888 .d8888b.                                      
 d88P  Y88b                  d88P" d88P  Y88b                                     
 Y88b.                       888   Y88b.                                          
 "Y888b.   888  888 888d888 888888 "Y888b.    .d88b.  88888b.  .d8888b   .d88b.  
    "Y88b. 888  888 888P"   888       "Y88b. d8P  Y8b 888 "88b 88K      d8P  Y8b 
      "888 888  888 888     888         "888 88888888 888  888 "Y8888b. 88888888 
 Y88b  d88P Y88b 888 888     888   Y88b  d88P Y8b.     888  888      X88 Y8b.     
 "Y8888P"   "Y88888 888     888    "Y8888P"   "Y8888  888  888  88888P'  "Y8888  
 EOF
 _version_display=$(grep '^SURFSENSE_VERSION=' "${INSTALL_DIR}/.env" 2>/dev/null | cut -d= -f2 | tr -d '"' | head -1 || true)
 _version_display="${_version_display:-latest}"
-printf "         OSS Alternative to NotebookLM for Teams  ${YELLOW}[%s]${NC}\n" "${_version_display}"
+_variant_display=$(grep '^SURFSENSE_VARIANT=' "${INSTALL_DIR}/.env" 2>/dev/null | cut -d= -f2 | tr -d '"' | head -1 || true)
-printf "${CYAN}══════════════════════════════════════════════════════════════${NC}\n\n"
+_variant_display="${_variant_display:-cpu}"
 step "SurfSense is now installed [${_version_display}]"
 info "  Frontend:  http://localhost:3929"
 info "  Backend:   http://localhost:8929"
 info "  API Docs:  http://localhost:8929/docs"
 info ""
 info "  Config:    ${INSTALL_DIR}/.env"
 info "  Variant:   ${_variant_display}"
 info "  Logs:      cd ${INSTALL_DIR} && ${DC} logs -f"
 info "  Stop:      cd ${INSTALL_DIR} && ${DC} down"
-info "  Update:    cd ${INSTALL_DIR} && ${DC} pull && ${DC} up -d"
+info "  Update:    cd ${INSTALL_DIR} && ${DC} pull && ${DC} up -d --wait"
 info ""
 if $SETUP_WATCHTOWER; then
-    info "  Watchtower: auto-updates every $((WATCHTOWER_INTERVAL / 3600))h (stop: docker rm -f ${WATCHTOWER_CONTAINER})"
+    info "  Watchtower: auto-updates every $((WATCHTOWER_INTERVAL / 3600))h (disable: docker rm -f ${WATCHTOWER_CONTAINER})"
 else
    warn "  Watchtower skipped. For auto-updates, re-run without --no-watchtower."
 fi
--- a/docs/chinese-llm-setup.md
+++ b/docs/chinese-llm-setup.md
@ -212,9 +212,9 @@ API Base URL: https://open.bigmodel.cn/api/paas/v4
 | 字段 | 值 | 说明 |
 |------|-----|------|
-| **Configuration Name** | `MiniMax M2.5` | 配置名称（自定义） |
+| **Configuration Name** | `MiniMax M3` | 配置名称（自定义） |
 | **Provider** | `MINIMAX` | 选择 MiniMax |
-| **Model Name** | `MiniMax-M2.5` | 推荐模型<br>其他选项: `MiniMax-M2.5-highspeed` |
+| **Model Name** | `MiniMax-M3` | 推荐模型<br>其他选项: `MiniMax-M2.7`、`MiniMax-M2.7-highspeed` |
 | **API Key** | `eyJ...` | 你的 MiniMax API Key |
 | **API Base URL** | `https://api.minimax.io/v1` | MiniMax API 地址 |
 | **Parameters** | `{"temperature": 1.0}` | 注意：temperature 必须在 (0.0, 1.0] 范围内，不能为 0 |
@ -222,22 +222,23 @@ API Base URL: https://open.bigmodel.cn/api/paas/v4
 ### 示例配置
 ```
-Configuration Name: MiniMax M2.5
+Configuration Name: MiniMax M3
 Provider: MINIMAX
-Model Name: MiniMax-M2.5
+Model Name: MiniMax-M3
 API Key: eyJxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
 API Base URL: https://api.minimax.io/v1
 ```
 ### 可用模型
- **MiniMax-M2.5**: 高性能通用模型，204K 上下文窗口（推荐）
+- **MiniMax-M3**: 旗舰模型，512K 上下文窗口（推荐）
- **MiniMax-M2.5-highspeed**: 高速推理版本，204K 上下文窗口
+- **MiniMax-M2.7**: 上一代通用模型，204K 上下文窗口
 - **MiniMax-M2.7-highspeed**: 上一代高速推理版本，204K 上下文窗口
 ### 注意事项
 - **temperature 参数**: MiniMax 要求 temperature 必须在 (0.0, 1.0] 范围内，不能设置为 0。建议使用 1.0。
- 两个模型都支持 204K 超长上下文窗口，适合处理长文本任务。
+- M3 支持 512K 超长上下文，M2.7 系列保留 204K，适合按需求选择。
 ### 定价
 - 请访问 [MiniMax 定价页面](https://platform.minimaxi.com/document/Price) 查看最新价格
@ -315,8 +316,8 @@ docker compose logs backend | grep -i "error"
 |---------|---------|------|
 | **文档摘要** | Qwen-Plus, GLM-4 | 平衡性能和成本 |
 | **代码分析** | DeepSeek-Coder | 代码专用 |
-| **长文本处理** | Kimi 128K, MiniMax-M2.5 (204K) | 超长上下文 |
+| **长文本处理** | Kimi 128K, MiniMax-M3 (512K) | 超长上下文 |
-| **快速响应** | Qwen-Turbo, GLM-4-Flash, MiniMax-M2.5-highspeed | 速度优先 |
+| **快速响应** | Qwen-Turbo, GLM-4-Flash, MiniMax-M2.7-highspeed | 速度优先 |
 ### 2. 成本优化
--- a/surfsense_backend/.env.example
+++ b/surfsense_backend/.env.example
@ -3,18 +3,46 @@ DATABASE_URL=postgresql+asyncpg://postgres:postgres@localhost:5432/surfsense
 # Deployment environment: dev or production
 SURFSENSE_ENV=dev
-#Celery Config
+# Redis (single endpoint for Celery broker/result backend + app features)
-CELERY_BROKER_URL=redis://localhost:6379/0
+REDIS_URL=redis://localhost:6379/0
-CELERY_RESULT_BACKEND=redis://localhost:6379/0
+# Optional: override individually only to split Redis across instances.
 # Each defaults to REDIS_URL when unset.
 # CELERY_BROKER_URL=redis://localhost:6379/0
 # CELERY_RESULT_BACKEND=redis://localhost:6379/0
 # REDIS_APP_URL=redis://localhost:6379/0
 # Optional: isolate queues when sharing Redis with other apps
 CELERY_TASK_DEFAULT_QUEUE=surfsense
 # Redis for app-level features (heartbeats, podcast markers)
 # Defaults to CELERY_BROKER_URL when not set
 REDIS_APP_URL=redis://localhost:6379/0
 # Optional: TTL in seconds for connector indexing lock key
 # CONNECTOR_INDEXING_LOCK_TTL_SECONDS=28800
 # Messaging Gateway (global)
 # GATEWAY_ENABLED: master switch for ALL messaging gateway channels (Telegram, WhatsApp,
 # Slack, Discord). When FALSE, no gateway background workers/supervisors start and all
 # gateway HTTP routes (webhooks, OAuth callbacks, pairing) return 404. Set per-channel
 # flags below to control individual platforms once the gateway is enabled.
 GATEWAY_ENABLED=TRUE
 # Telegram Gateway
 # TELEGRAM_WEBHOOK_SECRET must be 1-256 chars and contain only A-Z, a-z, 0-9, _ or -
 # GATEWAY_TELEGRAM_INTAKE_MODE: `webhook` for production, `longpoll` for single-replica self-host fallback, `disabled` to skip Telegram intake
 TELEGRAM_SHARED_BOT_TOKEN=
 TELEGRAM_SHARED_BOT_USERNAME=
 TELEGRAM_WEBHOOK_SECRET=
 GATEWAY_BASE_URL=http://localhost:8000
 GATEWAY_TELEGRAM_INTAKE_MODE=webhook
 # WhatsApp Gateway
 # GATEWAY_WHATSAPP_INTAKE_MODE: `cloud` for Meta Cloud API, `baileys` for self-hosted bridge, `disabled` to skip WhatsApp intake
 GATEWAY_WHATSAPP_INTAKE_MODE=disabled
 WHATSAPP_SHARED_BUSINESS_TOKEN=
 WHATSAPP_SHARED_PHONE_NUMBER_ID=
 WHATSAPP_SHARED_DISPLAY_PHONE_NUMBER=
 WHATSAPP_SHARED_WABA_ID=
 WHATSAPP_GRAPH_API_VERSION=v25.0
 WHATSAPP_WEBHOOK_VERIFY_TOKEN=
 WHATSAPP_WEBHOOK_APP_SECRET=
 WHATSAPP_BRIDGE_URL=http://whatsapp-bridge:9929
 # Platform Web Search (SearXNG)
 # Set this to enable built-in web search. Docker Compose sets it automatically.
 # Only uncomment if running the backend outside Docker (e.g. uvicorn on host).
@ -64,8 +92,6 @@ STRIPE_PAGE_BUYING_ENABLED=TRUE
 STRIPE_TOKEN_BUYING_ENABLED=FALSE
 STRIPE_PREMIUM_TOKEN_PRICE_ID=price_...
 STRIPE_CREDIT_MICROS_PER_UNIT=1000000
 # DEPRECATED — use STRIPE_CREDIT_MICROS_PER_UNIT (1:1 numerical mapping):
 # STRIPE_TOKENS_PER_UNIT=1000000
 # Periodic Stripe safety net for purchases left in PENDING (minutes old)
 STRIPE_RECONCILIATION_LOOKBACK_MINUTES=10
@ -98,11 +124,14 @@ CLICKUP_CLIENT_ID=your_clickup_client_id_here
 CLICKUP_CLIENT_SECRET=your_clickup_client_secret_here
 CLICKUP_REDIRECT_URI=http://localhost:8000/api/v1/auth/clickup/connector/callback
-# Discord OAuth Configuration
+# Discord OAuth / Gateway Configuration
 # The Discord connector and Discord gateway use the same Discord application/bot.
 DISCORD_CLIENT_ID=your_discord_client_id_here
 DISCORD_CLIENT_SECRET=your_discord_client_secret_here
 DISCORD_REDIRECT_URI=http://localhost:8000/api/v1/auth/discord/connector/callback
 DISCORD_BOT_TOKEN=your_bot_token_from_developer_portal
 GATEWAY_DISCORD_ENABLED=FALSE
 GATEWAY_DISCORD_REDIRECT_URI=http://localhost:8000/api/v1/gateway/discord/callback
 # Atlassian OAuth Configuration (Jira & Confluence)
 ATLASSIAN_CLIENT_ID=your_atlassian_client_id_here
@ -120,10 +149,14 @@ NOTION_CLIENT_ID=your_notion_client_id_here
 NOTION_CLIENT_SECRET=your_notion_client_secret_here
 NOTION_REDIRECT_URI=http://localhost:8000/api/v1/auth/notion/connector/callback
-# Slack OAuth Configuration
+# Slack OAuth / Gateway Configuration
 # The Slack connector and Slack gateway can use the same Slack app client ID/secret.
 SLACK_CLIENT_ID=your_slack_client_id_here
 SLACK_CLIENT_SECRET=your_slack_client_secret_here
 SLACK_REDIRECT_URI=http://localhost:8000/api/v1/auth/slack/connector/callback
 GATEWAY_SLACK_ENABLED=FALSE
 GATEWAY_SLACK_SIGNING_SECRET=your_slack_signing_secret_here
 GATEWAY_SLACK_REDIRECT_URI=http://localhost:8000/api/v1/gateway/slack/callback
 # Microsoft OAuth (Teams & OneDrive)
 MICROSOFT_CLIENT_ID=your_microsoft_client_id_here
@ -197,8 +230,6 @@ PAGES_LIMIT=500
 # models bill proportionally. Applies only to models with
 # billing_tier=premium in global_llm_config.yaml.
 PREMIUM_CREDIT_MICROS_LIMIT=5000000
 # DEPRECATED — use PREMIUM_CREDIT_MICROS_LIMIT (1:1 numerical mapping):
 # PREMIUM_TOKEN_LIMIT=5000000
 # Safety ceiling on per-call premium reservation, in micro-USD.
 # stream_new_chat estimates an upper-bound cost from the model's
@ -246,17 +277,19 @@ TURNSTILE_ENABLED=FALSE
 TURNSTILE_SECRET_KEY=
 # Proxy provider selection. Selects a ProxyProvider implementation registered in
 # app/utils/proxy/registry.py. Default: "anonymous_proxies". Add new vendors there.
 # PROXY_PROVIDER=anonymous_proxies
 # Residential Proxy Configuration (anonymous-proxies.net)
 # Used for web crawling, link previews, and YouTube transcript fetching to avoid IP bans.
-# Leave commented out to disable proxying.
+# Consumed by the "anonymous_proxies" provider. Leave commented out to disable proxying.
 # RESIDENTIAL_PROXY_USERNAME=your_proxy_username
 # RESIDENTIAL_PROXY_PASSWORD=your_proxy_password
 # RESIDENTIAL_PROXY_HOSTNAME=rotating.dnsproxifier.com:31230
 # RESIDENTIAL_PROXY_LOCATION=
 # RESIDENTIAL_PROXY_TYPE=1
 FIRECRAWL_API_KEY=fcr-01J0000000000000000000000
 # File Parser Service
 ETL_SERVICE=UNSTRUCTURED or LLAMACLOUD or DOCLING
 UNSTRUCTURED_API_KEY=Tpu3P0U8iy
@ -265,6 +298,16 @@ LLAMA_CLOUD_API_KEY=llx-nnn
 # AZURE_DI_ENDPOINT=https://your-resource.cognitiveservices.azure.com/
 # AZURE_DI_KEY=your-key
 # Original File Storage
 # Where to persist the original bytes of uploaded documents (for download today,
 # redaction / form-filling later). "local" needs no cloud creds and is the dev default.
 FILE_STORAGE_BACKEND=local
 # Local backend: directory for stored files (defaults to surfsense_backend/.local_object_store)
 # FILE_STORAGE_LOCAL_PATH=/var/lib/surfsense/object-store
 # Azure Blob backend (set FILE_STORAGE_BACKEND=azure):
 # AZURE_STORAGE_CONNECTION_STRING=DefaultEndpointsProtocol=https;AccountName=...;AccountKey=...;EndpointSuffix=core.windows.net
 # AZURE_STORAGE_CONTAINER=surfsense-documents
 # Daytona Sandbox (isolated code execution)
 # DAYTONA_SANDBOX_ENABLED=FALSE
 # DAYTONA_API_KEY=your-daytona-api-key
@ -285,9 +328,6 @@ LANGSMITH_PROJECT=surfsense
 # =============================================================================
 # OPTIONAL: New-chat agent feature flags
 # =============================================================================
 # Multi-agent orchestrator switch for authenticated chat streaming.
 # MULTI_AGENT_CHAT_ENABLED=false
 # Master kill-switch — when true, every flag below is forced OFF.
 # SURFSENSE_DISABLE_NEW_AGENT_STACK=false
@ -322,6 +362,13 @@ LANGSMITH_PROJECT=surfsense
 # SURFSENSE_ENABLE_SPECIALIZED_SUBAGENTS=false
 # SURFSENSE_ENABLE_KB_PLANNER_RUNNABLE=false
 # KB retrieval mode (default OFF = lazy). When OFF, the main agent retrieves
 # KB content on demand via the `search_knowledge_base` tool and skips the
 # expensive per-turn pre-injection (planner LLM + embed + hybrid search,
 # ~2.3s); explicit @-mentions are still surfaced cheaply. Set to true to
 # restore the original eager `<priority_documents>` pre-injection.
 # SURFSENSE_ENABLE_KB_PRIORITY_PREINJECTION=false
 # Snapshot / revert
 # SURFSENSE_ENABLE_ACTION_LOG=false
 # SURFSENSE_ENABLE_REVERT_ROUTE=false        # Backend-only; flip when UI ships
@ -342,6 +389,15 @@ LANGSMITH_PROJECT=surfsense
 # rollback if you suspect cache-related staleness.
 # SURFSENSE_ENABLE_AGENT_CACHE=true
 # Cross-thread reuse (default ON). Drops thread_id from the cache key so a
 # returning user's NEW chats (same user + search space + config + visibility)
 # hit the already-compiled graph instead of paying a fresh ~4-5s compile —
 # turning a cold first turn into a warm one. Safe because ActionLog,
 # KB-persistence, and the deliverables tools now resolve the chat thread from
 # the live RunnableConfig at call time rather than a build-time closure. Flip
 # OFF to fall back to a per-thread cache key (instant rollback).
 # SURFSENSE_ENABLE_CROSS_THREAD_AGENT_CACHE=true
 # Cache capacity (max number of compiled-agent entries kept in memory)
 # and TTL per entry (seconds). Working set is typically one entry per
 # active thread on this replica; tune up for very large deployments.
--- a/surfsense_backend/.gitignore
+++ b/surfsense_backend/.gitignore
@ -2,6 +2,7 @@
 .venv
 venv/
 data/
 .local_object_store/
 __pycache__/
 .flashrank_cache
 surf_new_backend.egg-info/
--- a/surfsense_backend/Dockerfile
+++ b/surfsense_backend/Dockerfile
@ -1,3 +1,4 @@
 # syntax=docker.io/docker/dockerfile:1
 # =============================================================================
 # SurfSense Backend — Multi-stage Dockerfile
 # =============================================================================
@ -61,15 +62,25 @@ COPY pyproject.toml uv.lock ./
 # Exporting the lock to requirements.txt and feeding it to `uv pip install`
 # pins every transitive package to the exact version captured in uv.lock.
 #
-# Note on torch/CUDA: we do NOT install torch from a separate cu* index here.
+# Note on torch/CUDA: the export must always select either the cpu or CUDA
-# PyPI's torch wheels for Linux x86_64 already ship CUDA-enabled and pull
+# extra declared in pyproject.toml. A no-extra export would resolve torch from
-# nvidia-cudnn-cu13, nvidia-nccl-cu13, triton, etc. as install deps (all
+# PyPI on Linux, which currently pulls CUDA-enabled wheels and nvidia-* deps.
-# captured in uv.lock). If a specific CUDA version is needed, wire it through
+# Keep CUDA version selection in [tool.uv.sources] so uv.lock remains the
-# [tool.uv.sources] in pyproject.toml so the lock stays the source of truth.
+# source of truth. The install step also needs the matching PyTorch index,
 # because requirements.txt preserves the +cpu/+cu wheel pins but not uv's
 # package source metadata.
 ARG USE_CUDA=false
 ARG CUDA_EXTRA=cu128
 RUN pip install --no-cache-dir uv && \
    if [ "$USE_CUDA" = "true" ]; then EXTRA="$CUDA_EXTRA"; else EXTRA="cpu"; fi && \
    TORCH_INDEX="https://download.pytorch.org/whl/${EXTRA}" && \
    uv export --frozen --no-dev --no-hashes --no-emit-project \
        --extra "$EXTRA" \
        --format requirements-txt -o /tmp/requirements.txt && \
-    uv pip install --system --no-cache-dir -r /tmp/requirements.txt && \
+    uv pip install --system --no-cache-dir \
        --index "$TORCH_INDEX" \
        --index-strategy unsafe-best-match \
        -r /tmp/requirements.txt && \
    rm /tmp/requirements.txt
@ -94,10 +105,14 @@ RUN printf '%s\n' \
    | python || true
 ARG EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2
-RUN python -c "from chonkie import AutoEmbeddings; AutoEmbeddings.get_embeddings('${EMBEDDING_MODEL}')"
+RUN --mount=type=secret,id=HF_TOKEN \
    HF_TOKEN="$(cat /run/secrets/HF_TOKEN 2>/dev/null || true)" \
    python -c "from chonkie import AutoEmbeddings; AutoEmbeddings.get_embeddings('${EMBEDDING_MODEL}')"
-# Install Playwright browsers (the playwright python package itself is in deps)
+# Install Scrapling's browser engines (patchright Chromium + Camoufox).
-RUN playwright install chromium --with-deps
+# Scrapling pulls playwright/patchright via the `fetchers` extra; `scrapling install`
 # downloads the matching browser binaries used by DynamicFetcher/StealthyFetcher.
 RUN scrapling install
 # Shared temp directory for file uploads between API and Worker containers.
 # Python's tempfile module uses TMPDIR, so uploaded files land here.
--- a/surfsense_backend/alembic/env.py
+++ b/surfsense_backend/alembic/env.py
@ -3,6 +3,7 @@ import os
 import sys
 from logging.config import fileConfig
 import sqlalchemy as sa
 from sqlalchemy import pool
 from sqlalchemy.engine import Connection
 from sqlalchemy.ext.asyncio import async_engine_from_config
@ -36,6 +37,9 @@ if config.config_file_name is not None:
 # target_metadata = mymodel.Base.metadata
 target_metadata = Base.metadata
 MIGRATION_ADVISORY_LOCK_NAMESPACE = "surfsense"
 MIGRATION_ADVISORY_LOCK_NAME = "alembic_migrations"
 # other values from the config, defined by the needs of env.py,
 # can be acquired:
 # my_important_option = config.get_main_option("my_important_option")
@ -73,8 +77,22 @@ def do_run_migrations(connection: Connection) -> None:
        transaction_per_migration=True,
    )
-    with context.begin_transaction():
+    lock_params = {
-        context.run_migrations()
+        "namespace": MIGRATION_ADVISORY_LOCK_NAMESPACE,
        "name": MIGRATION_ADVISORY_LOCK_NAME,
    }
    connection.execute(
        sa.text("SELECT pg_advisory_lock(hashtext(:namespace), hashtext(:name))"),
        lock_params,
    )
    try:
        with context.begin_transaction():
            context.run_migrations()
    finally:
        connection.execute(
            sa.text("SELECT pg_advisory_unlock(hashtext(:namespace), hashtext(:name))"),
            lock_params,
        )
 async def run_async_migrations() -> None:
--- a/surfsense_backend/alembic/versions/143_force_zero_publication_resync.py
+++ b/surfsense_backend/alembic/versions/143_force_zero_publication_resync.py
@ -47,7 +47,6 @@ depends_on: str | Sequence[str] | None = None
 PUBLICATION_NAME = "zero_publication"
 # Must stay in sync with the column lists in migrations 117 / 139 / 140.
 DOCUMENT_COLS = [
    "id",
    "title",
--- a/surfsense_backend/alembic/versions/148_add_automation_runs_to_zero_publication.py
+++ b/surfsense_backend/alembic/versions/148_add_automation_runs_to_zero_publication.py
@ -0,0 +1,175 @@
 """add automation_runs to zero_publication with thin column list
 Publishes ``automation_runs`` so the dashboard can replace polling with a
 live run status + per-step ticker. Only the columns the list and ticker
 read are exposed (``id, automation_id, trigger_id, status, step_results,
 started_at, finished_at, created_at``); heavy JSONB
 (``definition_snapshot``, ``inputs``, ``output``, ``artifacts``, ``error``)
 stays on REST and is fetched lazily on detail expand.
 Uses the canonical ``ALTER PUBLICATION ... SET TABLE`` + ``COMMENT``
 bookend pattern (see migration 143) -- the shape Zero ``>=1.0`` requires
 to fire its schema-change hook. Existing tables are re-emitted unchanged.
 Revision ID: 148
 Revises: 147
 """
 from collections.abc import Sequence
 import sqlalchemy as sa
 from alembic import op
 revision: str = "148"
 down_revision: str | None = "147"
 branch_labels: str | Sequence[str] | None = None
 depends_on: str | Sequence[str] | None = None
 PUBLICATION_NAME = "zero_publication"
 # Mirrors migration 143. Kept in sync explicitly: any change to these lists
 # must be re-emitted in a new resync migration with COMMENT bookends.
 DOCUMENT_COLS = [
    "id",
    "title",
    "document_type",
    "search_space_id",
    "folder_id",
    "created_by_id",
    "status",
    "created_at",
    "updated_at",
 ]
 USER_COLS = [
    "id",
    "pages_limit",
    "pages_used",
    "premium_credit_micros_limit",
    "premium_credit_micros_used",
 ]
 # Thin set: status + lightweight progress only. Heavy JSONB stays on REST.
 AUTOMATION_RUN_COLS = [
    "id",
    "automation_id",
    "trigger_id",
    "status",
    "step_results",
    "started_at",
    "finished_at",
    "created_at",
 ]
 def _has_zero_version(conn, table: str) -> bool:
    return (
        conn.execute(
            sa.text(
                "SELECT 1 FROM information_schema.columns "
                "WHERE table_name = :tbl AND column_name = '_0_version'"
            ),
            {"tbl": table},
        ).fetchone()
        is not None
    )
 def _build_set_table_ddl(
    *, documents_has_zero_ver: bool, user_has_zero_ver: bool
 ) -> str:
    doc_cols = DOCUMENT_COLS + (['"_0_version"'] if documents_has_zero_ver else [])
    user_cols = USER_COLS + (['"_0_version"'] if user_has_zero_ver else [])
    doc_col_list = ", ".join(doc_cols)
    user_col_list = ", ".join(user_cols)
    run_col_list = ", ".join(AUTOMATION_RUN_COLS)
    return (
        f"ALTER PUBLICATION {PUBLICATION_NAME} SET TABLE "
        f"notifications, "
        f"documents ({doc_col_list}), "
        f"folders, "
        f"search_source_connectors, "
        f"new_chat_messages, "
        f"chat_comments, "
        f"chat_session_state, "
        f'"user" ({user_col_list}), '
        f"automation_runs ({run_col_list})"
    )
 def upgrade() -> None:
    conn = op.get_bind()
    exists = conn.execute(
        sa.text("SELECT 1 FROM pg_publication WHERE pubname = :name"),
        {"name": PUBLICATION_NAME},
    ).fetchone()
    if not exists:
        return
    documents_has_zero_ver = _has_zero_version(conn, "documents")
    user_has_zero_ver = _has_zero_version(conn, "user")
    # COMMENT-ALTER-COMMENT trio must be one transaction so Zero observes
    # them as one schema-change event. Matches the SAVEPOINT pattern used
    # in migrations 117 / 139 / 140 / 143.
    tx = conn.begin_nested() if conn.in_transaction() else conn.begin()
    with tx:
        conn.execute(
            sa.text(f"COMMENT ON PUBLICATION {PUBLICATION_NAME} IS 'pre-148-resync'")
        )
        conn.execute(
            sa.text(
                _build_set_table_ddl(
                    documents_has_zero_ver=documents_has_zero_ver,
                    user_has_zero_ver=user_has_zero_ver,
                )
            )
        )
        conn.execute(
            sa.text(f"COMMENT ON PUBLICATION {PUBLICATION_NAME} IS 'post-148-resync'")
        )
 def downgrade() -> None:
    """Re-emit migration 143's shape (no automation_runs)."""
    conn = op.get_bind()
    exists = conn.execute(
        sa.text("SELECT 1 FROM pg_publication WHERE pubname = :name"),
        {"name": PUBLICATION_NAME},
    ).fetchone()
    if not exists:
        return
    documents_has_zero_ver = _has_zero_version(conn, "documents")
    user_has_zero_ver = _has_zero_version(conn, "user")
    doc_cols = DOCUMENT_COLS + (['"_0_version"'] if documents_has_zero_ver else [])
    user_cols = USER_COLS + (['"_0_version"'] if user_has_zero_ver else [])
    doc_col_list = ", ".join(doc_cols)
    user_col_list = ", ".join(user_cols)
    ddl = (
        f"ALTER PUBLICATION {PUBLICATION_NAME} SET TABLE "
        f"notifications, "
        f"documents ({doc_col_list}), "
        f"folders, "
        f"search_source_connectors, "
        f"new_chat_messages, "
        f"chat_comments, "
        f"chat_session_state, "
        f'"user" ({user_col_list})'
    )
    tx = conn.begin_nested() if conn.in_transaction() else conn.begin()
    with tx:
        conn.execute(
            sa.text(f"COMMENT ON PUBLICATION {PUBLICATION_NAME} IS 'pre-148-downgrade'")
        )
        conn.execute(sa.text(ddl))
        conn.execute(
            sa.text(
                f"COMMENT ON PUBLICATION {PUBLICATION_NAME} IS 'post-148-downgrade'"
            )
        )
--- a/surfsense_backend/alembic/versions/149_add_gateway_tables.py
+++ b/surfsense_backend/alembic/versions/149_add_gateway_tables.py
@ -0,0 +1,667 @@
 """add external chat surface tables
 Revision ID: 149
 Revises: 148
 Create Date: 2026-05-27
 Adds the lean external chat surface schema:
 * external_chat_accounts
 * external_chat_bindings
 * external_chat_inbound_events
 External chat surfaces store Telegram-originated conversations in the existing
 chat tables. This migration adds ``source`` to ``new_chat_threads`` and
 ``new_chat_messages`` as UI metadata while publishing all chat-message sources
 through Zero so a future SurfSense UI layer can render external chats. External
 chat adapter tables are served through REST in v1, so they are intentionally not
 added to ``zero_publication``.
 """
 from __future__ import annotations
 from collections.abc import Sequence
 import sqlalchemy as sa
 from sqlalchemy.dialects import postgresql
 from alembic import op
 revision: str = "149"
 down_revision: str | None = "148"
 branch_labels: str | Sequence[str] | None = None
 depends_on: str | Sequence[str] | None = None
 PUBLICATION_NAME = "zero_publication"
 DOCUMENT_COLS = [
    "id",
    "title",
    "document_type",
    "search_space_id",
    "folder_id",
    "created_by_id",
    "status",
    "created_at",
    "updated_at",
 ]
 USER_COLS = [
    "id",
    "pages_limit",
    "pages_used",
    "premium_credit_micros_limit",
    "premium_credit_micros_used",
 ]
 AUTOMATION_RUN_COLS = [
    "id",
    "automation_id",
    "trigger_id",
    "status",
    "step_results",
    "started_at",
    "finished_at",
    "created_at",
 ]
 def _has_zero_version(conn, table: str) -> bool:
    return (
        conn.execute(
            sa.text(
                "SELECT 1 FROM information_schema.columns "
                "WHERE table_name = :tbl AND column_name = '_0_version'"
            ),
            {"tbl": table},
        ).fetchone()
        is not None
    )
 def _cols(columns: list[str]) -> str:
    return ", ".join(columns)
 def _table_exists(conn, table: str) -> bool:
    return (
        conn.execute(
            sa.text(
                "SELECT 1 FROM information_schema.tables "
                "WHERE table_schema = current_schema() AND table_name = :tbl"
            ),
            {"tbl": table},
        ).fetchone()
        is not None
    )
 def _column_exists(conn, table: str, column: str) -> bool:
    return (
        conn.execute(
            sa.text(
                "SELECT 1 FROM information_schema.columns "
                "WHERE table_schema = current_schema() "
                "AND table_name = :tbl AND column_name = :col"
            ),
            {"tbl": table, "col": column},
        ).fetchone()
        is not None
    )
 def _index_exists(conn, index_name: str) -> bool:
    return (
        conn.execute(
            sa.text(
                "SELECT 1 FROM pg_indexes "
                "WHERE schemaname = current_schema() AND indexname = :name"
            ),
            {"name": index_name},
        ).fetchone()
        is not None
    )
 def _constraint_exists(conn, table: str, constraint_name: str) -> bool:
    return (
        conn.execute(
            sa.text(
                "SELECT 1 FROM information_schema.table_constraints "
                "WHERE table_schema = current_schema() "
                "AND table_name = :tbl AND constraint_name = :name"
            ),
            {"tbl": table, "name": constraint_name},
        ).fetchone()
        is not None
    )
 def _drop_index_if_exists(index_name: str, table_name: str) -> None:
    if _index_exists(op.get_bind(), index_name):
        op.drop_index(index_name, table_name=table_name)
 def _drop_column_if_exists(table_name: str, column_name: str) -> None:
    if _column_exists(op.get_bind(), table_name, column_name):
        op.drop_column(table_name, column_name)
 def _build_set_table_ddl(
    *, documents_has_zero_ver: bool, user_has_zero_ver: bool
 ) -> str:
    doc_cols = DOCUMENT_COLS + (['"_0_version"'] if documents_has_zero_ver else [])
    user_cols = USER_COLS + (['"_0_version"'] if user_has_zero_ver else [])
    return (
        f"ALTER PUBLICATION {PUBLICATION_NAME} SET TABLE "
        f"notifications, "
        f"documents ({_cols(doc_cols)}), "
        f"folders, "
        f"search_source_connectors, "
        f"new_chat_messages, "
        f"chat_comments, "
        f"chat_session_state, "
        f'"user" ({_cols(user_cols)}), '
        f"automation_runs ({_cols(AUTOMATION_RUN_COLS)})"
    )
 def _create_enum(name: str, values: tuple[str, ...]) -> postgresql.ENUM:
    enum = postgresql.ENUM(*values, name=name)
    enum.create(op.get_bind(), checkfirst=True)
    return postgresql.ENUM(*values, name=name, create_type=False)
 def upgrade() -> None:
    conn = op.get_bind()
    external_chat_platform_enum = _create_enum(
        "external_chat_platform", ("telegram", "whatsapp", "signal")
    )
    external_chat_account_mode_enum = _create_enum(
        "external_chat_account_mode", ("cloud_shared", "self_host_byo")
    )
    external_chat_health_status_enum = _create_enum(
        "external_chat_health_status", ("unknown", "ok", "failing")
    )
    external_chat_binding_state_enum = _create_enum(
        "external_chat_binding_state", ("pending", "bound", "revoked", "suspended")
    )
    external_chat_peer_kind_enum = _create_enum(
        "external_chat_peer_kind", ("direct", "group", "channel", "unknown")
    )
    external_chat_event_kind_enum = _create_enum(
        "external_chat_event_kind",
        ("message", "edited_message", "callback_query", "other"),
    )
    external_chat_event_status_enum = _create_enum(
        "external_chat_event_status",
        ("received", "processing", "processed", "ignored", "failed"),
    )
    if not _table_exists(conn, "external_chat_accounts"):
        op.create_table(
            "external_chat_accounts",
            sa.Column("id", sa.BigInteger(), primary_key=True),
            sa.Column("platform", external_chat_platform_enum, nullable=False),
            sa.Column("mode", external_chat_account_mode_enum, nullable=False),
            sa.Column("owner_user_id", postgresql.UUID(as_uuid=True), nullable=True),
            sa.Column("owner_search_space_id", sa.Integer(), nullable=True),
            sa.Column(
                "is_system_account",
                sa.Boolean(),
                nullable=False,
                server_default="false",
            ),
            sa.Column("encrypted_credentials", sa.Text(), nullable=True),
            sa.Column("bot_username", sa.String(255), nullable=True),
            sa.Column("webhook_secret", sa.String(64), nullable=True),
            sa.Column(
                "cursor_state",
                postgresql.JSONB(astext_type=sa.Text()),
                nullable=False,
                server_default=sa.text("'{}'::jsonb"),
            ),
            sa.Column(
                "health_status",
                external_chat_health_status_enum,
                nullable=False,
                server_default="unknown",
            ),
            sa.Column(
                "last_health_check_at", sa.TIMESTAMP(timezone=True), nullable=True
            ),
            sa.Column("suspended_at", sa.TIMESTAMP(timezone=True), nullable=True),
            sa.Column("suspended_reason", sa.Text(), nullable=True),
            sa.Column(
                "created_at",
                sa.TIMESTAMP(timezone=True),
                nullable=False,
                server_default=sa.text("(now() AT TIME ZONE 'utc')"),
            ),
            sa.Column(
                "updated_at",
                sa.TIMESTAMP(timezone=True),
                nullable=False,
                server_default=sa.text("(now() AT TIME ZONE 'utc')"),
            ),
            sa.CheckConstraint(
                "(is_system_account = true AND owner_user_id IS NULL) OR "
                "(is_system_account = false AND owner_user_id IS NOT NULL)",
                name="ck_external_chat_accounts_owner_shape",
            ),
            sa.ForeignKeyConstraint(["owner_user_id"], ["user.id"], ondelete="CASCADE"),
            sa.ForeignKeyConstraint(
                ["owner_search_space_id"], ["searchspaces.id"], ondelete="CASCADE"
            ),
        )
    op.create_index(
        "uq_external_chat_accounts_owner_platform",
        "external_chat_accounts",
        ["owner_user_id", "platform"],
        unique=True,
        postgresql_where=sa.text("is_system_account = false"),
        if_not_exists=True,
    )
    op.create_index(
        "uq_external_chat_accounts_system_platform",
        "external_chat_accounts",
        ["platform"],
        unique=True,
        postgresql_where=sa.text("is_system_account = true"),
        if_not_exists=True,
    )
    op.create_index(
        "uq_external_chat_accounts_webhook_secret",
        "external_chat_accounts",
        ["webhook_secret"],
        unique=True,
        postgresql_where=sa.text("webhook_secret IS NOT NULL"),
        if_not_exists=True,
    )
    if not _table_exists(conn, "external_chat_bindings"):
        op.create_table(
            "external_chat_bindings",
            sa.Column("id", sa.BigInteger(), primary_key=True),
            sa.Column("account_id", sa.BigInteger(), nullable=False),
            sa.Column("user_id", postgresql.UUID(as_uuid=True), nullable=False),
            sa.Column("search_space_id", sa.Integer(), nullable=False),
            sa.Column(
                "state",
                external_chat_binding_state_enum,
                nullable=False,
                server_default="pending",
            ),
            sa.Column("pairing_code", sa.Text(), nullable=True),
            sa.Column(
                "pairing_code_expires_at", sa.TIMESTAMP(timezone=True), nullable=True
            ),
            sa.Column("external_peer_id", sa.Text(), nullable=True),
            sa.Column(
                "external_peer_kind",
                external_chat_peer_kind_enum,
                nullable=False,
                server_default="unknown",
            ),
            sa.Column(
                "external_thread_id",
                sa.Text(),
                nullable=True,
                comment="Reserved for Telegram message_thread_id when group/forum support lands.",
            ),
            sa.Column("external_display_name", sa.Text(), nullable=True),
            sa.Column("external_username", sa.Text(), nullable=True),
            sa.Column(
                "external_metadata",
                postgresql.JSONB(astext_type=sa.Text()),
                nullable=False,
                server_default=sa.text("'{}'::jsonb"),
            ),
            sa.Column("new_chat_thread_id", sa.Integer(), nullable=True),
            sa.Column("revoked_at", sa.TIMESTAMP(timezone=True), nullable=True),
            sa.Column("suspended_at", sa.TIMESTAMP(timezone=True), nullable=True),
            sa.Column("suspended_reason", sa.Text(), nullable=True),
            sa.Column(
                "created_at",
                sa.TIMESTAMP(timezone=True),
                nullable=False,
                server_default=sa.text("(now() AT TIME ZONE 'utc')"),
            ),
            sa.Column(
                "updated_at",
                sa.TIMESTAMP(timezone=True),
                nullable=False,
                server_default=sa.text("(now() AT TIME ZONE 'utc')"),
            ),
            sa.ForeignKeyConstraint(
                ["account_id"], ["external_chat_accounts.id"], ondelete="CASCADE"
            ),
            sa.ForeignKeyConstraint(["user_id"], ["user.id"], ondelete="CASCADE"),
            sa.ForeignKeyConstraint(
                ["search_space_id"], ["searchspaces.id"], ondelete="CASCADE"
            ),
            sa.ForeignKeyConstraint(
                ["new_chat_thread_id"], ["new_chat_threads.id"], ondelete="SET NULL"
            ),
        )
    op.create_index(
        "uq_external_chat_bindings_account_peer_active",
        "external_chat_bindings",
        ["account_id", "external_peer_id"],
        unique=True,
        postgresql_where=sa.text(
            "state IN ('bound', 'suspended') AND external_peer_id IS NOT NULL"
        ),
        if_not_exists=True,
    )
    op.create_index(
        "uq_external_chat_bindings_pairing_code_pending",
        "external_chat_bindings",
        ["pairing_code"],
        unique=True,
        postgresql_where=sa.text("state = 'pending'"),
        if_not_exists=True,
    )
    op.create_index(
        "ix_external_chat_bindings_user_state",
        "external_chat_bindings",
        ["user_id", "state"],
        if_not_exists=True,
    )
    op.create_index(
        "ix_external_chat_bindings_search_space_state",
        "external_chat_bindings",
        ["search_space_id", "state"],
        if_not_exists=True,
    )
    if not _table_exists(conn, "external_chat_inbound_events"):
        op.create_table(
            "external_chat_inbound_events",
            sa.Column("id", sa.BigInteger(), primary_key=True),
            sa.Column("account_id", sa.BigInteger(), nullable=False),
            sa.Column("external_chat_binding_id", sa.BigInteger(), nullable=True),
            sa.Column("platform", external_chat_platform_enum, nullable=False),
            sa.Column("event_dedupe_key", sa.Text(), nullable=False),
            sa.Column("external_event_id", sa.Text(), nullable=True),
            sa.Column("external_message_id", sa.Text(), nullable=True),
            sa.Column("event_kind", external_chat_event_kind_enum, nullable=False),
            sa.Column(
                "raw_payload",
                postgresql.JSONB(astext_type=sa.Text()),
                nullable=True,
            ),
            sa.Column("request_id", sa.String(64), nullable=True),
            sa.Column(
                "status",
                external_chat_event_status_enum,
                nullable=False,
                server_default="received",
            ),
            sa.Column(
                "attempt_count", sa.Integer(), nullable=False, server_default="0"
            ),
            sa.Column("last_error", sa.Text(), nullable=True),
            sa.Column(
                "received_at",
                sa.TIMESTAMP(timezone=True),
                nullable=False,
                server_default=sa.text("(now() AT TIME ZONE 'utc')"),
            ),
            sa.Column("processed_at", sa.TIMESTAMP(timezone=True), nullable=True),
            sa.Column(
                "created_at",
                sa.TIMESTAMP(timezone=True),
                nullable=False,
                server_default=sa.text("(now() AT TIME ZONE 'utc')"),
            ),
            sa.ForeignKeyConstraint(
                ["account_id"], ["external_chat_accounts.id"], ondelete="CASCADE"
            ),
            sa.ForeignKeyConstraint(
                ["external_chat_binding_id"],
                ["external_chat_bindings.id"],
                ondelete="SET NULL",
            ),
            sa.UniqueConstraint(
                "account_id",
                "event_dedupe_key",
                name="uq_external_chat_inbound_account_dedupe_key",
            ),
        )
    op.create_index(
        "ix_external_chat_inbound_status_received_at",
        "external_chat_inbound_events",
        ["status", "received_at"],
        if_not_exists=True,
    )
    op.create_index(
        "ix_external_chat_inbound_binding_received_at",
        "external_chat_inbound_events",
        ["external_chat_binding_id", "received_at"],
        if_not_exists=True,
    )
    op.create_index(
        "ix_external_chat_inbound_request_id",
        "external_chat_inbound_events",
        ["request_id"],
        postgresql_where=sa.text("request_id IS NOT NULL"),
        if_not_exists=True,
    )
    if not _column_exists(conn, "new_chat_threads", "source"):
        op.add_column(
            "new_chat_threads",
            sa.Column("source", sa.Text(), nullable=False, server_default="surfsense"),
        )
    op.alter_column("new_chat_threads", "source", type_=sa.Text())
    if not _column_exists(conn, "new_chat_threads", "external_chat_binding_id"):
        op.add_column(
            "new_chat_threads",
            sa.Column("external_chat_binding_id", sa.BigInteger(), nullable=True),
        )
    if not _constraint_exists(
        conn,
        "new_chat_threads",
        "fk_new_chat_threads_external_chat_external_chat_binding_id",
    ):
        op.create_foreign_key(
            "fk_new_chat_threads_external_chat_external_chat_binding_id",
            "new_chat_threads",
            "external_chat_bindings",
            ["external_chat_binding_id"],
            ["id"],
            ondelete="SET NULL",
        )
    op.create_index(
        "ix_new_chat_threads_source", "new_chat_threads", ["source"], if_not_exists=True
    )
    op.create_index(
        "ix_new_chat_threads_external_chat_binding_id",
        "new_chat_threads",
        ["external_chat_binding_id"],
        if_not_exists=True,
    )
    if not _column_exists(conn, "new_chat_messages", "source"):
        op.add_column(
            "new_chat_messages",
            sa.Column("source", sa.Text(), nullable=False, server_default="surfsense"),
        )
    op.alter_column("new_chat_messages", "source", type_=sa.Text())
    if not _column_exists(conn, "new_chat_messages", "platform_metadata"):
        op.add_column(
            "new_chat_messages",
            sa.Column(
                "platform_metadata",
                postgresql.JSONB(astext_type=sa.Text()),
                nullable=True,
            ),
        )
    op.create_index(
        "ix_new_chat_messages_source",
        "new_chat_messages",
        ["source"],
        if_not_exists=True,
    )
    op.create_index(
        "uq_new_chat_messages_inbound_platform",
        "new_chat_messages",
        [
            "thread_id",
            sa.text("(platform_metadata->>'platform')"),
            sa.text("(platform_metadata->>'external_message_id')"),
        ],
        unique=True,
        postgresql_where=sa.text(
            "platform_metadata IS NOT NULL "
            "AND platform_metadata->>'direction' = 'inbound'"
        ),
        if_not_exists=True,
    )
    op.execute("ALTER TABLE new_chat_messages REPLICA IDENTITY FULL")
    exists = conn.execute(
        sa.text("SELECT 1 FROM pg_publication WHERE pubname = :name"),
        {"name": PUBLICATION_NAME},
    ).fetchone()
    if exists:
        documents_has_zero_ver = _has_zero_version(conn, "documents")
        user_has_zero_ver = _has_zero_version(conn, "user")
        tx = conn.begin_nested() if conn.in_transaction() else conn.begin()
        with tx:
            conn.execute(
                sa.text(
                    f"COMMENT ON PUBLICATION {PUBLICATION_NAME} IS 'pre-144-external-chat'"
                )
            )
            conn.execute(
                sa.text(
                    _build_set_table_ddl(
                        documents_has_zero_ver=documents_has_zero_ver,
                        user_has_zero_ver=user_has_zero_ver,
                    )
                )
            )
            conn.execute(
                sa.text(
                    f"COMMENT ON PUBLICATION {PUBLICATION_NAME} IS 'post-144-external-chat'"
                )
            )
 def downgrade() -> None:
    conn = op.get_bind()
    exists = conn.execute(
        sa.text("SELECT 1 FROM pg_publication WHERE pubname = :name"),
        {"name": PUBLICATION_NAME},
    ).fetchone()
    if exists:
        documents_has_zero_ver = _has_zero_version(conn, "documents")
        user_has_zero_ver = _has_zero_version(conn, "user")
        # Restore the publication shape from migration 148.
        doc_cols = DOCUMENT_COLS + (['"_0_version"'] if documents_has_zero_ver else [])
        user_cols = USER_COLS + (['"_0_version"'] if user_has_zero_ver else [])
        ddl = (
            f"ALTER PUBLICATION {PUBLICATION_NAME} SET TABLE "
            f"notifications, "
            f"documents ({_cols(doc_cols)}), "
            f"folders, "
            f"search_source_connectors, "
            f"new_chat_messages, "
            f"chat_comments, "
            f"chat_session_state, "
            f'"user" ({_cols(user_cols)}), '
            f"automation_runs ({_cols(AUTOMATION_RUN_COLS)})"
        )
        tx = conn.begin_nested() if conn.in_transaction() else conn.begin()
        with tx:
            conn.execute(
                sa.text(
                    f"COMMENT ON PUBLICATION {PUBLICATION_NAME} IS 'pre-144-downgrade'"
                )
            )
            conn.execute(sa.text(ddl))
            conn.execute(
                sa.text(
                    f"COMMENT ON PUBLICATION {PUBLICATION_NAME} IS 'post-144-downgrade'"
                )
            )
    if _column_exists(conn, "new_chat_messages", "source"):
        op.execute("ALTER TABLE new_chat_messages REPLICA IDENTITY DEFAULT")
    _drop_index_if_exists("uq_new_chat_messages_inbound_platform", "new_chat_messages")
    _drop_index_if_exists("ix_new_chat_messages_source", "new_chat_messages")
    _drop_column_if_exists("new_chat_messages", "platform_metadata")
    _drop_column_if_exists("new_chat_messages", "source")
    _drop_index_if_exists(
        "ix_new_chat_threads_external_chat_binding_id", "new_chat_threads"
    )
    _drop_index_if_exists("ix_new_chat_threads_source", "new_chat_threads")
    if _constraint_exists(
        conn,
        "new_chat_threads",
        "fk_new_chat_threads_external_chat_external_chat_binding_id",
    ):
        op.drop_constraint(
            "fk_new_chat_threads_external_chat_external_chat_binding_id",
            "new_chat_threads",
            type_="foreignkey",
        )
    _drop_column_if_exists("new_chat_threads", "external_chat_binding_id")
    _drop_column_if_exists("new_chat_threads", "source")
    _drop_index_if_exists(
        "ix_external_chat_inbound_binding_received_at", "external_chat_inbound_events"
    )
    _drop_index_if_exists(
        "ix_external_chat_inbound_request_id", "external_chat_inbound_events"
    )
    _drop_index_if_exists(
        "ix_external_chat_inbound_status_received_at", "external_chat_inbound_events"
    )
    if _table_exists(conn, "external_chat_inbound_events"):
        op.drop_table("external_chat_inbound_events")
    _drop_index_if_exists(
        "ix_external_chat_bindings_search_space_state",
        "external_chat_bindings",
    )
    _drop_index_if_exists(
        "ix_external_chat_bindings_user_state", "external_chat_bindings"
    )
    _drop_index_if_exists(
        "uq_external_chat_bindings_pairing_code_pending",
        "external_chat_bindings",
    )
    _drop_index_if_exists(
        "uq_external_chat_bindings_account_peer_active",
        "external_chat_bindings",
    )
    if _table_exists(conn, "external_chat_bindings"):
        op.drop_table("external_chat_bindings")
    _drop_index_if_exists(
        "uq_external_chat_accounts_system_platform", "external_chat_accounts"
    )
    _drop_index_if_exists(
        "uq_external_chat_accounts_owner_platform", "external_chat_accounts"
    )
    _drop_index_if_exists(
        "uq_external_chat_accounts_webhook_secret", "external_chat_accounts"
    )
    if _table_exists(conn, "external_chat_accounts"):
        op.drop_table("external_chat_accounts")
    for enum_name in (
        "external_chat_event_status",
        "external_chat_event_kind",
        "external_chat_peer_kind",
        "external_chat_binding_state",
        "external_chat_health_status",
        "external_chat_account_mode",
        "external_chat_platform",
    ):
        postgresql.ENUM(name=enum_name).drop(conn, checkfirst=True)
--- a/surfsense_backend/alembic/versions/150_add_slack_gateway_platform.py
+++ b/surfsense_backend/alembic/versions/150_add_slack_gateway_platform.py
@ -0,0 +1,102 @@
 """add slack gateway platform
 Revision ID: 150
 Revises: 149
 Create Date: 2026-05-31
 """
 from __future__ import annotations
 from collections.abc import Sequence
 import sqlalchemy as sa
 from alembic import op
 revision: str = "150"
 down_revision: str | None = "149"
 branch_labels: str | Sequence[str] | None = None
 depends_on: str | Sequence[str] | None = None
 def _enum_value_exists(enum_name: str, value: str) -> bool:
    conn = op.get_bind()
    return (
        conn.execute(
            sa.text(
                "SELECT 1 FROM pg_enum e "
                "JOIN pg_type t ON t.oid = e.enumtypid "
                "WHERE t.typname = :enum_name AND e.enumlabel = :value"
            ),
            {"enum_name": enum_name, "value": value},
        ).fetchone()
        is not None
    )
 def _index_exists(index_name: str) -> bool:
    conn = op.get_bind()
    return (
        conn.execute(
            sa.text(
                "SELECT 1 FROM pg_indexes "
                "WHERE schemaname = current_schema() AND indexname = :index_name"
            ),
            {"index_name": index_name},
        ).fetchone()
        is not None
    )
 def upgrade() -> None:
    if not _enum_value_exists("external_chat_platform", "slack"):
        op.execute("ALTER TYPE external_chat_platform ADD VALUE 'slack'")
    if _index_exists("uq_external_chat_accounts_system_platform"):
        op.drop_index(
            "uq_external_chat_accounts_system_platform",
            table_name="external_chat_accounts",
        )
    op.create_index(
        "uq_external_chat_accounts_system_platform",
        "external_chat_accounts",
        ["platform"],
        unique=True,
        postgresql_where=sa.text(
            "is_system_account = true AND NOT (cursor_state ? 'team_id')"
        ),
        if_not_exists=True,
    )
    op.create_index(
        "uq_external_chat_accounts_slack_team",
        "external_chat_accounts",
        ["platform", sa.text("(cursor_state ->> 'team_id')")],
        unique=True,
        postgresql_where=sa.text(
            "is_system_account = true AND cursor_state ? 'team_id'"
        ),
        if_not_exists=True,
    )
 def downgrade() -> None:
    if _index_exists("uq_external_chat_accounts_slack_team"):
        op.drop_index(
            "uq_external_chat_accounts_slack_team",
            table_name="external_chat_accounts",
        )
    if _index_exists("uq_external_chat_accounts_system_platform"):
        op.drop_index(
            "uq_external_chat_accounts_system_platform",
            table_name="external_chat_accounts",
        )
    op.create_index(
        "uq_external_chat_accounts_system_platform",
        "external_chat_accounts",
        ["platform"],
        unique=True,
        postgresql_where=sa.text("is_system_account = true"),
        if_not_exists=True,
    )
    # PostgreSQL enum values are intentionally not removed on downgrade.
--- a/surfsense_backend/alembic/versions/151_add_discord_gateway_platform.py
+++ b/surfsense_backend/alembic/versions/151_add_discord_gateway_platform.py
@ -0,0 +1,106 @@
 """add discord gateway platform
 Revision ID: 151
 Revises: 150
 Create Date: 2026-06-01
 """
 from __future__ import annotations
 from collections.abc import Sequence
 import sqlalchemy as sa
 from alembic import op
 revision: str = "151"
 down_revision: str | None = "150"
 branch_labels: str | Sequence[str] | None = None
 depends_on: str | Sequence[str] | None = None
 def _enum_value_exists(enum_name: str, value: str) -> bool:
    conn = op.get_bind()
    return (
        conn.execute(
            sa.text(
                "SELECT 1 FROM pg_enum e "
                "JOIN pg_type t ON t.oid = e.enumtypid "
                "WHERE t.typname = :enum_name AND e.enumlabel = :value"
            ),
            {"enum_name": enum_name, "value": value},
        ).fetchone()
        is not None
    )
 def _index_exists(index_name: str) -> bool:
    conn = op.get_bind()
    return (
        conn.execute(
            sa.text(
                "SELECT 1 FROM pg_indexes "
                "WHERE schemaname = current_schema() AND indexname = :index_name"
            ),
            {"index_name": index_name},
        ).fetchone()
        is not None
    )
 def upgrade() -> None:
    if not _enum_value_exists("external_chat_platform", "discord"):
        op.execute("ALTER TYPE external_chat_platform ADD VALUE 'discord'")
    if _index_exists("uq_external_chat_accounts_system_platform"):
        op.drop_index(
            "uq_external_chat_accounts_system_platform",
            table_name="external_chat_accounts",
        )
    op.create_index(
        "uq_external_chat_accounts_system_platform",
        "external_chat_accounts",
        ["platform"],
        unique=True,
        postgresql_where=sa.text(
            "is_system_account = true "
            "AND NOT (cursor_state ? 'team_id') "
            "AND NOT (cursor_state ? 'guild_id')"
        ),
        if_not_exists=True,
    )
    op.create_index(
        "uq_external_chat_accounts_discord_guild",
        "external_chat_accounts",
        ["platform", sa.text("(cursor_state ->> 'guild_id')")],
        unique=True,
        postgresql_where=sa.text(
            "is_system_account = true AND cursor_state ? 'guild_id'"
        ),
        if_not_exists=True,
    )
 def downgrade() -> None:
    if _index_exists("uq_external_chat_accounts_discord_guild"):
        op.drop_index(
            "uq_external_chat_accounts_discord_guild",
            table_name="external_chat_accounts",
        )
    if _index_exists("uq_external_chat_accounts_system_platform"):
        op.drop_index(
            "uq_external_chat_accounts_system_platform",
            table_name="external_chat_accounts",
        )
    op.create_index(
        "uq_external_chat_accounts_system_platform",
        "external_chat_accounts",
        ["platform"],
        unique=True,
        postgresql_where=sa.text(
            "is_system_account = true AND NOT (cursor_state ? 'team_id')"
        ),
        if_not_exists=True,
    )
    # PostgreSQL enum values are intentionally not removed on downgrade.
--- a/surfsense_backend/alembic/versions/152_add_document_files.py
+++ b/surfsense_backend/alembic/versions/152_add_document_files.py
@ -0,0 +1,85 @@
 """add document_files table for stored original uploads
 Revision ID: 152
 Revises: 151
 """
 from collections.abc import Sequence
 from alembic import op
 revision: str = "152"
 down_revision: str | None = "151"
 branch_labels: str | Sequence[str] | None = None
 depends_on: str | Sequence[str] | None = None
 def upgrade() -> None:
    # The enum type must precede the table that references it.
    op.execute(
        """
        DO $$
        BEGIN
            IF NOT EXISTS (
                SELECT 1 FROM pg_type WHERE typname = 'document_file_kind'
            ) THEN
                CREATE TYPE document_file_kind AS ENUM (
                    'ORIGINAL', 'REDACTED', 'FILLED_FORM'
                );
            END IF;
        END
        $$;
        """
    )
    op.execute(
        """
        CREATE TABLE IF NOT EXISTS document_files (
            id SERIAL PRIMARY KEY,
            document_id INTEGER NOT NULL
                REFERENCES documents(id) ON DELETE CASCADE,
            search_space_id INTEGER NOT NULL
                REFERENCES searchspaces(id) ON DELETE CASCADE,
            kind document_file_kind NOT NULL DEFAULT 'ORIGINAL',
            storage_backend VARCHAR(32) NOT NULL,
            storage_key TEXT NOT NULL,
            original_filename TEXT NOT NULL,
            mime_type TEXT,
            size_bytes BIGINT NOT NULL,
            checksum_sha256 VARCHAR(64),
            created_by_id UUID
                REFERENCES "user"(id) ON DELETE SET NULL,
            created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW()
        );
        """
    )
    op.execute(
        "CREATE INDEX IF NOT EXISTS ix_document_files_document_id "
        "ON document_files(document_id);"
    )
    op.execute(
        "CREATE INDEX IF NOT EXISTS ix_document_files_search_space_id "
        "ON document_files(search_space_id);"
    )
    op.execute(
        "CREATE INDEX IF NOT EXISTS ix_document_files_kind ON document_files(kind);"
    )
    op.execute(
        "CREATE INDEX IF NOT EXISTS ix_document_files_created_by_id "
        "ON document_files(created_by_id);"
    )
    op.execute(
        "CREATE INDEX IF NOT EXISTS ix_document_files_created_at "
        "ON document_files(created_at);"
    )
 def downgrade() -> None:
    op.execute("DROP INDEX IF EXISTS ix_document_files_created_at;")
    op.execute("DROP INDEX IF EXISTS ix_document_files_created_by_id;")
    op.execute("DROP INDEX IF EXISTS ix_document_files_kind;")
    op.execute("DROP INDEX IF EXISTS ix_document_files_search_space_id;")
    op.execute("DROP INDEX IF EXISTS ix_document_files_document_id;")
    op.execute("DROP TABLE IF EXISTS document_files;")
    op.execute("DROP TYPE IF EXISTS document_file_kind;")
--- a/surfsense_backend/alembic/versions/153_restore_automation_runs_to_zero_publication.py
+++ b/surfsense_backend/alembic/versions/153_restore_automation_runs_to_zero_publication.py
@ -0,0 +1,121 @@
 """restore automation_runs to zero_publication
 Migration 149's ``SET TABLE`` dropped ``automation_runs`` (added in 148),
 breaking the dashboard live run ticker with a SchemaVersionNotSupported
 reload loop. Re-emit the publication with ``automation_runs`` using the
 ``COMMENT`` bookend pattern so zero-cache fires its schema-change hook.
 Revision ID: 153
 Revises: 152
 """
 from collections.abc import Sequence
 import sqlalchemy as sa
 from alembic import op
 revision: str = "153"
 down_revision: str | None = "152"
 branch_labels: str | Sequence[str] | None = None
 depends_on: str | Sequence[str] | None = None
 PUBLICATION_NAME = "zero_publication"
 DOCUMENT_COLS = [
    "id",
    "title",
    "document_type",
    "search_space_id",
    "folder_id",
    "created_by_id",
    "status",
    "created_at",
    "updated_at",
 ]
 USER_COLS = [
    "id",
    "pages_limit",
    "pages_used",
    "premium_credit_micros_limit",
    "premium_credit_micros_used",
 ]
 AUTOMATION_RUN_COLS = [
    "id",
    "automation_id",
    "trigger_id",
    "status",
    "step_results",
    "started_at",
    "finished_at",
    "created_at",
 ]
 def _has_zero_version(conn, table: str) -> bool:
    return (
        conn.execute(
            sa.text(
                "SELECT 1 FROM information_schema.columns "
                "WHERE table_name = :tbl AND column_name = '_0_version'"
            ),
            {"tbl": table},
        ).fetchone()
        is not None
    )
 def _set_table_ddl(*, with_automation_runs: bool, conn) -> str:
    doc_cols = DOCUMENT_COLS + (
        ['"_0_version"'] if _has_zero_version(conn, "documents") else []
    )
    user_cols = USER_COLS + (
        ['"_0_version"'] if _has_zero_version(conn, "user") else []
    )
    tables = [
        "notifications",
        f"documents ({', '.join(doc_cols)})",
        "folders",
        "search_source_connectors",
        "new_chat_messages",
        "chat_comments",
        "chat_session_state",
        f'"user" ({", ".join(user_cols)})',
    ]
    if with_automation_runs:
        tables.append(f"automation_runs ({', '.join(AUTOMATION_RUN_COLS)})")
    return f"ALTER PUBLICATION {PUBLICATION_NAME} SET TABLE " + ", ".join(tables)
 def _resync(*, with_automation_runs: bool, tag: str) -> None:
    conn = op.get_bind()
    exists = conn.execute(
        sa.text("SELECT 1 FROM pg_publication WHERE pubname = :name"),
        {"name": PUBLICATION_NAME},
    ).fetchone()
    if not exists:
        return
    tx = conn.begin_nested() if conn.in_transaction() else conn.begin()
    with tx:
        conn.execute(
            sa.text(f"COMMENT ON PUBLICATION {PUBLICATION_NAME} IS 'pre-{tag}'")
        )
        conn.execute(
            sa.text(
                _set_table_ddl(with_automation_runs=with_automation_runs, conn=conn)
            )
        )
        conn.execute(
            sa.text(f"COMMENT ON PUBLICATION {PUBLICATION_NAME} IS 'post-{tag}'")
        )
 def upgrade() -> None:
    _resync(with_automation_runs=True, tag="153-resync")
 def downgrade() -> None:
    _resync(with_automation_runs=False, tag="153-downgrade")
--- a/surfsense_backend/alembic/versions/154_remove_document_summary_llm.py
+++ b/surfsense_backend/alembic/versions/154_remove_document_summary_llm.py
@ -0,0 +1,147 @@
 """remove document summary llm settings
 Revision ID: 154
 Revises: 153
 """
 from collections.abc import Sequence
 import sqlalchemy as sa
 from alembic import op
 revision: str = "154"
 down_revision: str | None = "153"
 branch_labels: str | Sequence[str] | None = None
 depends_on: str | Sequence[str] | None = None
 PUBLICATION_NAME = "zero_publication"
 DOCUMENT_COLS = [
    "id",
    "title",
    "document_type",
    "search_space_id",
    "folder_id",
    "created_by_id",
    "status",
    "created_at",
    "updated_at",
 ]
 USER_COLS = [
    "id",
    "pages_limit",
    "pages_used",
    "premium_credit_micros_limit",
    "premium_credit_micros_used",
 ]
 AUTOMATION_RUN_COLS = [
    "id",
    "automation_id",
    "trigger_id",
    "status",
    "step_results",
    "started_at",
    "finished_at",
    "created_at",
 ]
 def _column_exists(conn, table: str, column: str) -> bool:
    return (
        conn.execute(
            sa.text(
                "SELECT 1 FROM information_schema.columns "
                "WHERE table_name = :table AND column_name = :column"
            ),
            {"table": table, "column": column},
        ).fetchone()
        is not None
    )
 def _has_zero_version(conn, table: str) -> bool:
    return _column_exists(conn, table, "_0_version")
 def _set_table_ddl(conn) -> str:
    doc_cols = DOCUMENT_COLS + (
        ['"_0_version"'] if _has_zero_version(conn, "documents") else []
    )
    user_cols = USER_COLS + (
        ['"_0_version"'] if _has_zero_version(conn, "user") else []
    )
    tables = [
        "notifications",
        f"documents ({', '.join(doc_cols)})",
        "folders",
        "search_source_connectors",
        "new_chat_messages",
        "chat_comments",
        "chat_session_state",
        f'"user" ({", ".join(user_cols)})',
        f"automation_runs ({', '.join(AUTOMATION_RUN_COLS)})",
    ]
    return f"ALTER PUBLICATION {PUBLICATION_NAME} SET TABLE " + ", ".join(tables)
 def _resync_zero_publication(tag: str) -> None:
    conn = op.get_bind()
    exists = conn.execute(
        sa.text("SELECT 1 FROM pg_publication WHERE pubname = :name"),
        {"name": PUBLICATION_NAME},
    ).fetchone()
    if not exists:
        return
    tx = conn.begin_nested() if conn.in_transaction() else conn.begin()
    with tx:
        conn.execute(
            sa.text(f"COMMENT ON PUBLICATION {PUBLICATION_NAME} IS 'pre-{tag}'")
        )
        conn.execute(sa.text(_set_table_ddl(conn)))
        conn.execute(
            sa.text(f"COMMENT ON PUBLICATION {PUBLICATION_NAME} IS 'post-{tag}'")
        )
 def upgrade() -> None:
    conn = op.get_bind()
    if _column_exists(conn, "searchspaces", "document_summary_llm_id"):
        op.drop_column("searchspaces", "document_summary_llm_id")
    if _column_exists(conn, "search_source_connectors", "enable_summary"):
        op.drop_column("search_source_connectors", "enable_summary")
    _resync_zero_publication("154-summary-removal")
 def downgrade() -> None:
    conn = op.get_bind()
    if not _column_exists(conn, "searchspaces", "document_summary_llm_id"):
        op.add_column(
            "searchspaces",
            sa.Column(
                "document_summary_llm_id",
                sa.Integer(),
                nullable=True,
                server_default="0",
            ),
        )
    if not _column_exists(conn, "search_source_connectors", "enable_summary"):
        op.add_column(
            "search_source_connectors",
            sa.Column(
                "enable_summary",
                sa.Boolean(),
                nullable=False,
                server_default=sa.text("false"),
            ),
        )
    _resync_zero_publication("154-summary-removal-downgrade")
--- a/surfsense_backend/alembic/versions/155_reconcile_zero_publication.py
+++ b/surfsense_backend/alembic/versions/155_reconcile_zero_publication.py
@ -0,0 +1,23 @@
 """reconcile zero_publication from canonical definition
 Revision ID: 155
 Revises: 154
 """
 from collections.abc import Sequence
 from alembic import op
 from app.zero_publication import apply_publication
 revision: str = "155"
 down_revision: str | None = "154"
 branch_labels: str | Sequence[str] | None = None
 depends_on: str | Sequence[str] | None = None
 def upgrade() -> None:
    apply_publication(op.get_bind())
 def downgrade() -> None:
    """No-op. Historical publication shapes are immutable."""
--- a/surfsense_backend/app/agents/autocomplete/autocomplete_agent.py
+++ b/surfsense_backend/app/agents/autocomplete/autocomplete_agent.py
@ -1,557 +0,0 @@
 """Vision autocomplete agent with scoped filesystem exploration.
 Converts the stateless single-shot vision autocomplete into an agent that
 seeds a virtual filesystem from KB search results and lets the vision LLM
 explore documents via ``ls``, ``read_file``, ``glob``, ``grep``, etc.
 before generating the final completion.
 Performance: KB search and agent graph compilation run in parallel so
 the only sequential latency is KB-search (or agent compile, whichever is
 slower) + the agent's LLM turns.  There is no separate "query extraction"
 LLM call — the window title is used directly as the KB search query.
 """
 from __future__ import annotations
 import asyncio
 import json
 import logging
 import re
 import uuid
 from collections.abc import AsyncGenerator
 from typing import Any
 from deepagents.graph import BASE_AGENT_PROMPT
 from deepagents.middleware.patch_tool_calls import PatchToolCallsMiddleware
 from langchain.agents import create_agent
 from langchain_anthropic.middleware import AnthropicPromptCachingMiddleware
 from langchain_core.language_models import BaseChatModel
 from langchain_core.messages import AIMessage, ToolMessage
 from app.agents.new_chat.document_xml import build_document_xml
 from app.agents.new_chat.middleware.filesystem import SurfSenseFilesystemMiddleware
 from app.agents.new_chat.middleware.knowledge_search import (
    search_knowledge_base,
 )
 from app.agents.new_chat.path_resolver import (
    DOCUMENTS_ROOT,
    build_path_index,
    doc_to_virtual_path,
 )
 from app.db import shielded_async_session
 from app.services.new_streaming_service import VercelStreamingService
 try:
    from deepagents.backends.utils import create_file_data
 except Exception:  # pragma: no cover - defensive
    def create_file_data(content: str) -> dict[str, Any]:
        return {"content": content.split("\n")}
 async def _build_autocomplete_filesystem(
    *,
    documents: Any,
    search_space_id: int,
 ) -> tuple[dict[str, Any], dict[int, str]]:
    """Build a ``state['files']``-shaped dict from KB search results.
    This is the autocomplete-specific replacement for the previous
    ``build_scoped_filesystem`` helper. It uses the canonical path resolver
    so paths line up with the rest of the system, including collision
    suffixes for duplicate titles.
    """
    files: dict[str, Any] = {}
    doc_id_to_path: dict[int, str] = {}
    if not documents:
        return files, doc_id_to_path
    async with shielded_async_session() as session:
        index = await build_path_index(session, search_space_id)
    for document in documents:
        if not isinstance(document, dict):
            continue
        meta = document.get("document") or {}
        doc_id = meta.get("id")
        if not isinstance(doc_id, int):
            continue
        title = str(meta.get("title") or "untitled")
        folder_id = meta.get("folder_id")
        path = doc_to_virtual_path(
            doc_id=doc_id, title=title, folder_id=folder_id, index=index
        )
        chunk_ids = document.get("matched_chunk_ids") or []
        try:
            matched_set = {int(c) for c in chunk_ids}
        except (TypeError, ValueError):
            matched_set = set()
        xml = build_document_xml(document, matched_chunk_ids=matched_set)
        files[path] = create_file_data(xml)
        doc_id_to_path[doc_id] = path
    if not files:
        # Ensure the synthetic /documents folder is visible even when empty.
        files.setdefault(f"{DOCUMENTS_ROOT}/.placeholder", create_file_data(""))
    return files, doc_id_to_path
 logger = logging.getLogger(__name__)
 KB_TOP_K = 10
 # ---------------------------------------------------------------------------
 # System prompt
 # ---------------------------------------------------------------------------
 AUTOCOMPLETE_SYSTEM_PROMPT = """You are a smart writing assistant that analyzes the user's screen to draft or complete text.
 You will receive a screenshot of the user's screen. Your PRIMARY source of truth is the screenshot itself — the visual context determines what to write.
 Your job:
 1. Analyze the ENTIRE screenshot to understand what the user is working on (email thread, chat conversation, document, code editor, form, etc.).
 2. Identify the text area where the user will type.
 3. Generate the text the user most likely wants to write based on the visual context.
 You also have access to the user's knowledge base documents via filesystem tools. However:
 - ONLY consult the knowledge base if the screenshot clearly involves a topic where your KB documents are DIRECTLY relevant (e.g., the user is writing about a specific project/topic that matches a document title).
 - Do NOT explore documents just because they exist. Most autocomplete requests can be answered purely from the screenshot.
 - If you do read a document, only incorporate information that is 100% relevant to what the user is typing RIGHT NOW. Do not add extra details, background, or tangential information from the KB.
 - Keep your output SHORT — autocomplete should feel like a natural continuation, not an essay.
 Key behavior:
 - If the text area is EMPTY, draft a concise response or message based on what you see on screen (e.g., reply to an email, respond to a chat message, continue a document).
 - If the text area already has text, continue it naturally — typically just a sentence or two.
 Rules:
 - Be CONCISE. Prefer a single paragraph or a few sentences. Autocomplete is a quick assist, not a full draft.
 - Match the tone and formality of the surrounding context.
 - If the screen shows code, write code. If it shows a casual chat, be casual. If it shows a formal email, be formal.
 - Do NOT describe the screenshot or explain your reasoning.
 - Do NOT cite or reference documents explicitly — just let the knowledge inform your writing naturally.
 - If you cannot determine what to write, output an empty JSON array: []
 ## Output Format
 You MUST provide exactly 3 different suggestion options. Each should be a distinct, plausible completion — vary the tone, detail level, or angle.
 Return your suggestions as a JSON array of exactly 3 strings. Output ONLY the JSON array, nothing else — no markdown fences, no explanation, no commentary.
 Example format:
 ["First suggestion text here.", "Second suggestion — a different take.", "Third option with another approach."]
 ## Filesystem Tools `ls`, `read_file`, `write_file`, `edit_file`, `glob`, `grep`
 All file paths must start with a `/`.
 - ls: list files and directories at a given path.
 - read_file: read a file from the filesystem.
 - write_file: create a temporary file in the session (not persisted).
 - edit_file: edit a file in the session (not persisted for /documents/ files).
 - glob: find files matching a pattern (e.g., "**/*.xml").
 - grep: search for text within files.
 ## When to Use Filesystem Tools
 BEFORE reaching for any tool, ask yourself: "Can I write a good completion purely from the screenshot?" If yes, just write it — do NOT explore the KB.
 Only use tools when:
 - The user is clearly writing about a specific topic that likely has detailed information in their KB.
 - You need a specific fact, name, number, or reference that the screenshot doesn't provide.
 When you do use tools, be surgical:
 - Check the `ls` output first. If no document title looks relevant, stop — do not read files just to see what's there.
 - If a title looks relevant, read only the `<chunk_index>` (first ~20 lines) and jump to matched chunks. Do not read entire documents.
 - Extract only the specific information you need and move on to generating the completion.
 ## Reading Documents Efficiently
 Documents are formatted as XML. Each document contains:
 - `<document_metadata>` — title, type, URL, etc.
 - `<chunk_index>` — a table of every chunk with its **line range** and a
  `matched="true"` flag for chunks that matched the search query.
 - `<document_content>` — the actual chunks in original document order.
 **Workflow**: read the first ~20 lines to see the `<chunk_index>`, identify
 chunks marked `matched="true"`, then use `read_file(path, offset=<start_line>,
 limit=<lines>)` to jump directly to those sections."""
 APP_CONTEXT_BLOCK = """
 The user is currently working in "{app_name}" (window: "{window_title}"). Use this to understand the type of application and adapt your tone and format accordingly."""
 def _build_autocomplete_system_prompt(app_name: str, window_title: str) -> str:
    prompt = AUTOCOMPLETE_SYSTEM_PROMPT
    if app_name:
        prompt += APP_CONTEXT_BLOCK.format(app_name=app_name, window_title=window_title)
    return prompt
 # ---------------------------------------------------------------------------
 # Pre-compute KB filesystem (runs in parallel with agent compilation)
 # ---------------------------------------------------------------------------
 class _KBResult:
    """Container for pre-computed KB filesystem results."""
    __slots__ = ("files", "ls_ai_msg", "ls_tool_msg")
    def __init__(
        self,
        files: dict[str, Any] | None = None,
        ls_ai_msg: AIMessage | None = None,
        ls_tool_msg: ToolMessage | None = None,
    ) -> None:
        self.files = files
        self.ls_ai_msg = ls_ai_msg
        self.ls_tool_msg = ls_tool_msg
    @property
    def has_documents(self) -> bool:
        return bool(self.files)
 async def precompute_kb_filesystem(
    search_space_id: int,
    query: str,
    top_k: int = KB_TOP_K,
 ) -> _KBResult:
    """Search the KB and build the scoped filesystem outside the agent.
    This is designed to be called via ``asyncio.gather`` alongside agent
    graph compilation so the two run concurrently.
    """
    if not query:
        return _KBResult()
    try:
        search_results = await search_knowledge_base(
            query=query,
            search_space_id=search_space_id,
            top_k=top_k,
        )
        if not search_results:
            return _KBResult()
        new_files, _ = await _build_autocomplete_filesystem(
            documents=search_results,
            search_space_id=search_space_id,
        )
        if not new_files:
            return _KBResult()
        doc_paths = [
            p
            for p, v in new_files.items()
            if p.startswith("/documents/") and v is not None
        ]
        tool_call_id = f"auto_ls_{uuid.uuid4().hex[:12]}"
        ai_msg = AIMessage(
            content="",
            tool_calls=[
                {"name": "ls", "args": {"path": "/documents"}, "id": tool_call_id}
            ],
        )
        tool_msg = ToolMessage(
            content=str(doc_paths) if doc_paths else "No documents found.",
            tool_call_id=tool_call_id,
        )
        return _KBResult(files=new_files, ls_ai_msg=ai_msg, ls_tool_msg=tool_msg)
    except Exception:
        logger.warning(
            "KB pre-computation failed, proceeding without KB", exc_info=True
        )
        return _KBResult()
 # ---------------------------------------------------------------------------
 # Filesystem middleware — no save_document, no persistence
 # ---------------------------------------------------------------------------
 class AutocompleteFilesystemMiddleware(SurfSenseFilesystemMiddleware):
    """Filesystem middleware for autocomplete — read-only exploration only.
    Passes ``search_space_id=None`` so the new persistence pipeline is
    bypassed; the autocomplete flow only reads, never commits to Postgres.
    """
    def __init__(self) -> None:
        super().__init__(search_space_id=None, created_by_id=None)
 # ---------------------------------------------------------------------------
 # Agent factory
 # ---------------------------------------------------------------------------
 async def _compile_agent(
    llm: BaseChatModel,
    app_name: str,
    window_title: str,
 ) -> Any:
    """Compile the agent graph (CPU-bound, runs in a thread)."""
    system_prompt = _build_autocomplete_system_prompt(app_name, window_title)
    final_system_prompt = system_prompt + "\n\n" + BASE_AGENT_PROMPT
    middleware = [
        AutocompleteFilesystemMiddleware(),
        PatchToolCallsMiddleware(),
        AnthropicPromptCachingMiddleware(unsupported_model_behavior="ignore"),
    ]
    agent = await asyncio.to_thread(
        create_agent,
        llm,
        system_prompt=final_system_prompt,
        tools=[],
        middleware=middleware,
    )
    return agent.with_config({"recursion_limit": 200})
 async def create_autocomplete_agent(
    llm: BaseChatModel,
    *,
    search_space_id: int,
    kb_query: str,
    app_name: str = "",
    window_title: str = "",
 ) -> tuple[Any, _KBResult]:
    """Create the autocomplete agent and pre-compute KB in parallel.
    Returns ``(agent, kb_result)`` so the caller can inject the pre-computed
    filesystem into the agent's initial state without any middleware delay.
    """
    agent, kb = await asyncio.gather(
        _compile_agent(llm, app_name, window_title),
        precompute_kb_filesystem(search_space_id, kb_query),
    )
    return agent, kb
 # ---------------------------------------------------------------------------
 # JSON suggestion parsing (with fallback)
 # ---------------------------------------------------------------------------
 def _parse_suggestions(raw: str) -> list[str]:
    """Extract a list of suggestion strings from the agent's output.
    Tries, in order:
      1. Direct ``json.loads``
      2. Extract content between ```json ... ``` fences
      3. Find the first ``[`` … ``]`` span
    Falls back to wrapping the raw text as a single suggestion.
    """
    text = raw.strip()
    if not text:
        return []
    for candidate in _json_candidates(text):
        try:
            parsed = json.loads(candidate)
            if isinstance(parsed, list) and all(isinstance(s, str) for s in parsed):
                return [s for s in parsed if s.strip()]
        except (json.JSONDecodeError, ValueError):
            continue
    return [text]
 def _json_candidates(text: str) -> list[str]:
    """Yield candidate JSON strings from raw text."""
    candidates = [text]
    fence = re.search(r"```(?:json)?\s*\n?(.*?)```", text, re.DOTALL)
    if fence:
        candidates.append(fence.group(1).strip())
    bracket = re.search(r"\[.*]", text, re.DOTALL)
    if bracket:
        candidates.append(bracket.group(0))
    return candidates
 # ---------------------------------------------------------------------------
 # Streaming helper
 # ---------------------------------------------------------------------------
 async def stream_autocomplete_agent(
    agent: Any,
    input_data: dict[str, Any],
    streaming_service: VercelStreamingService,
    *,
    emit_message_start: bool = True,
 ) -> AsyncGenerator[str, None]:
    """Stream agent events as Vercel SSE, with thinking steps for tool calls.
    When ``emit_message_start`` is False the caller has already sent the
    ``message_start`` event (e.g. to show preparation steps before the agent
    runs).
    """
    thread_id = uuid.uuid4().hex
    config = {"configurable": {"thread_id": thread_id}}
    text_buffer: list[str] = []
    active_tool_depth = 0
    thinking_step_counter = 0
    tool_step_ids: dict[str, str] = {}
    step_titles: dict[str, str] = {}
    completed_step_ids: set[str] = set()
    last_active_step_id: str | None = None
    def next_thinking_step_id() -> str:
        nonlocal thinking_step_counter
        thinking_step_counter += 1
        return f"autocomplete-step-{thinking_step_counter}"
    def complete_current_step() -> str | None:
        nonlocal last_active_step_id
        if last_active_step_id and last_active_step_id not in completed_step_ids:
            completed_step_ids.add(last_active_step_id)
            title = step_titles.get(last_active_step_id, "Done")
            event = streaming_service.format_thinking_step(
                step_id=last_active_step_id,
                title=title,
                status="complete",
            )
            last_active_step_id = None
            return event
        return None
    if emit_message_start:
        yield streaming_service.format_message_start()
    gen_step_id = next_thinking_step_id()
    last_active_step_id = gen_step_id
    step_titles[gen_step_id] = "Generating suggestions"
    yield streaming_service.format_thinking_step(
        step_id=gen_step_id,
        title="Generating suggestions",
        status="in_progress",
    )
    try:
        async for event in agent.astream_events(
            input_data, config=config, version="v2"
        ):
            event_type = event.get("event", "")
            if event_type == "on_chat_model_stream":
                if active_tool_depth > 0:
                    continue
                if "surfsense:internal" in event.get("tags", []):
                    continue
                chunk = event.get("data", {}).get("chunk")
                if chunk and hasattr(chunk, "content"):
                    content = chunk.content
                    if content and isinstance(content, str):
                        text_buffer.append(content)
            elif event_type == "on_chat_model_end":
                if active_tool_depth > 0:
                    continue
                if "surfsense:internal" in event.get("tags", []):
                    continue
                output = event.get("data", {}).get("output")
                if output and hasattr(output, "content"):
                    if getattr(output, "tool_calls", None):
                        continue
                    content = output.content
                    if content and isinstance(content, str) and not text_buffer:
                        text_buffer.append(content)
            elif event_type == "on_tool_start":
                active_tool_depth += 1
                tool_name = event.get("name", "unknown_tool")
                run_id = event.get("run_id", "")
                tool_input = event.get("data", {}).get("input", {})
                step_event = complete_current_step()
                if step_event:
                    yield step_event
                tool_step_id = next_thinking_step_id()
                tool_step_ids[run_id] = tool_step_id
                last_active_step_id = tool_step_id
                title, items = _describe_tool_call(tool_name, tool_input)
                step_titles[tool_step_id] = title
                yield streaming_service.format_thinking_step(
                    step_id=tool_step_id,
                    title=title,
                    status="in_progress",
                    items=items,
                )
            elif event_type == "on_tool_end":
                active_tool_depth = max(0, active_tool_depth - 1)
                run_id = event.get("run_id", "")
                step_id = tool_step_ids.pop(run_id, None)
                if step_id and step_id not in completed_step_ids:
                    completed_step_ids.add(step_id)
                    title = step_titles.get(step_id, "Done")
                    yield streaming_service.format_thinking_step(
                        step_id=step_id,
                        title=title,
                        status="complete",
                    )
                    if last_active_step_id == step_id:
                        last_active_step_id = None
        step_event = complete_current_step()
        if step_event:
            yield step_event
        raw_text = "".join(text_buffer)
        suggestions = _parse_suggestions(raw_text)
        yield streaming_service.format_data("suggestions", {"options": suggestions})
        yield streaming_service.format_finish()
        yield streaming_service.format_done()
    except Exception as e:
        logger.error(f"Autocomplete agent streaming error: {e}", exc_info=True)
        yield streaming_service.format_error("Autocomplete failed. Please try again.")
        yield streaming_service.format_done()
 def _describe_tool_call(tool_name: str, tool_input: Any) -> tuple[str, list[str]]:
    """Return a human-readable (title, items) for a tool call thinking step."""
    inp = tool_input if isinstance(tool_input, dict) else {}
    if tool_name == "ls":
        path = inp.get("path", "/")
        return "Listing files", [path]
    if tool_name == "read_file":
        fp = inp.get("file_path", "")
        display = fp if len(fp) <= 80 else "…" + fp[-77:]
        return "Reading file", [display]
    if tool_name == "write_file":
        fp = inp.get("file_path", "")
        display = fp if len(fp) <= 80 else "…" + fp[-77:]
        return "Writing file", [display]
    if tool_name == "edit_file":
        fp = inp.get("file_path", "")
        display = fp if len(fp) <= 80 else "…" + fp[-77:]
        return "Editing file", [display]
    if tool_name == "glob":
        pat = inp.get("pattern", "")
        base = inp.get("path", "/")
        return "Searching files", [f"{pat} in {base}"]
    if tool_name == "grep":
        pat = inp.get("pattern", "")
        path = inp.get("path", "")
        display_pat = pat[:60] + ("…" if len(pat) > 60 else "")
        return "Searching content", [
            f'"{display_pat}"' + (f" in {path}" if path else "")
        ]
    return f"Using {tool_name}", []
--- a/surfsense_backend/app/agents/chat/init.py
+++ b/surfsense_backend/app/agents/chat/init.py
@ -0,0 +1,5 @@
 """Chat agents category.
 Groups the conversational agents that share a kernel: ``anonymous_chat`` and
 ``multi_agent_chat``. Code shared by *both* lives in ``chat/shared/``.
 """
--- a/surfsense_backend/app/agents/chat/anonymous_chat/init.py
+++ b/surfsense_backend/app/agents/chat/anonymous_chat/init.py
@ -0,0 +1,14 @@
 """Anonymous / free-chat agent.
 The no-login chat experience: a deliberately minimal agent that bypasses the
 full SurfSense deep-agent stack (filesystem, knowledge-base persistence,
 subagents, skills, memory) and answers with an optional ``web_search`` tool and
 an optional read-only uploaded document. See :mod:`.agent` for details.
 """
 from app.agents.chat.anonymous_chat.agent import (
    build_anonymous_system_prompt,
    create_anonymous_chat_agent,
 )
 __all__ = ["build_anonymous_system_prompt", "create_anonymous_chat_agent"]
--- a/surfsense_backend/app/agents/chat/anonymous_chat/agent.py
+++ b/surfsense_backend/app/agents/chat/anonymous_chat/agent.py
@ -27,12 +27,12 @@ from langchain.agents.middleware import (
 from langchain_core.language_models import BaseChatModel
 from langgraph.types import Checkpointer
-from app.agents.new_chat.context import SurfSenseContextSchema
+from app.agents.chat.shared.context import SurfSenseContextSchema
-from app.agents.new_chat.middleware import (
+from app.agents.chat.shared.middleware import (
    RetryAfterMiddleware,
    create_surfsense_compaction_middleware,
 )
-from app.agents.new_chat.tools.web_search import create_web_search_tool
+from app.agents.chat.shared.tools.web_search import create_web_search_tool
 # Cap how much of an uploaded document we inline into the system prompt. The
 # upload endpoint allows files up to several MB, but the doc is re-sent on
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/init.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/init.py
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/constants.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/constants.py
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/init.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/init.py
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/context_prune/init.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/context_prune/init.py
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/context_prune/prune_tool_names.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/context_prune/prune_tool_names.py
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/graph/init.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/graph/init.py
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/graph/compile_graph_sync.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/graph/compile_graph_sync.py
@ -2,6 +2,7 @@
 from __future__ import annotations
 import time
 from collections.abc import Sequence
 from typing import Any
@ -11,13 +12,16 @@ from langchain_core.language_models import BaseChatModel
 from langchain_core.tools import BaseTool
 from langgraph.types import Checkpointer
-from app.agents.multi_agent_chat.middleware.stack import (
+from app.agents.chat.multi_agent_chat.main_agent.middleware.stack import (
    build_main_agent_deepagent_middleware,
 )
-from app.agents.new_chat.context import SurfSenseContextSchema
+from app.agents.chat.multi_agent_chat.shared.feature_flags import AgentFeatureFlags
-from app.agents.new_chat.feature_flags import AgentFeatureFlags
+from app.agents.chat.multi_agent_chat.shared.filesystem_selection import FilesystemMode
-from app.agents.new_chat.filesystem_selection import FilesystemMode
+from app.agents.chat.shared.context import SurfSenseContextSchema
 from app.db import ChatVisibility
 from app.utils.perf import get_perf_logger
 _perf_log = get_perf_logger()
 def build_compiled_agent_graph_sync(
@ -43,6 +47,7 @@ def build_compiled_agent_graph_sync(
    disabled_tools: list[str] | None = None,
 ):
    """Sync compile: middleware + ``create_agent`` (run via ``asyncio.to_thread``)."""
    mw_start = time.perf_counter()
    main_agent_middleware = build_main_agent_deepagent_middleware(
        llm=llm,
        tools=tools,
@ -63,7 +68,9 @@ def build_compiled_agent_graph_sync(
        mcp_tools_by_agent=mcp_tools_by_agent,
        disabled_tools=disabled_tools,
    )
    mw_elapsed = time.perf_counter() - mw_start
    create_start = time.perf_counter()
    agent = create_agent(
        llm,
        system_prompt=final_system_prompt,
@ -72,6 +79,15 @@ def build_compiled_agent_graph_sync(
        context_schema=SurfSenseContextSchema,
        checkpointer=checkpointer,
    )
    create_elapsed = time.perf_counter() - create_start
    _perf_log.info(
        "[graph_compile] middleware_build=%.3fs main_create_agent=%.3fs "
        "total=%.3fs mw_count=%d",
        mw_elapsed,
        create_elapsed,
        mw_elapsed + create_elapsed,
        len(main_agent_middleware),
    )
    return agent.with_config(
        {
            "recursion_limit": 10_000,
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/init.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/init.py
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/action_log/init.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/action_log/init.py
@ -0,0 +1,10 @@
 """Action-log middleware: audit row per tool call (impl + builder)."""
 from .builder import build_action_log_mw
 from .middleware import ActionLogMiddleware, ToolDefinition
 __all__ = [
    "ActionLogMiddleware",
    "ToolDefinition",
    "build_action_log_mw",
 ]
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/action_log/builder.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/action_log/builder.py
@ -4,11 +4,10 @@ from __future__ import annotations
 import logging
-from app.agents.new_chat.feature_flags import AgentFeatureFlags
+from app.agents.chat.multi_agent_chat.shared.feature_flags import AgentFeatureFlags
-from app.agents.new_chat.middleware import ActionLogMiddleware
+from app.agents.chat.multi_agent_chat.shared.middleware.flags import enabled
 from app.agents.new_chat.tools.registry import BUILTIN_TOOLS
-from ..shared.flags import enabled
+from .middleware import ActionLogMiddleware
 def build_action_log_mw(
@ -21,12 +20,13 @@ def build_action_log_mw(
    if not enabled(flags, "enable_action_log") or thread_id is None:
        return None
    try:
-        tool_defs_by_name = {td.name: td for td in BUILTIN_TOOLS}
+        # No built-in tool declares a ``reverse`` callable yet, so the action
        # log runs without a tool_definitions map. Reversibility is opt-in per
        # tool via ``ToolDefinition.reverse`` and can be wired here when used.
        return ActionLogMiddleware(
            thread_id=thread_id,
            search_space_id=search_space_id,
            user_id=user_id,
            tool_definitions=tool_defs_by_name,
        )
    except Exception:  # pragma: no cover - defensive
        logging.warning(
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/action_log/middleware.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/action_log/middleware.py
@ -1,25 +1,15 @@
 """Append-only action-log middleware for the SurfSense agent.
-Wraps every tool call via :meth:`AgentMiddleware.awrap_tool_call` and writes
+Wraps every tool call and writes a row to :class:`~app.db.AgentActionLog`
-a row to :class:`~app.db.AgentActionLog` after the tool returns. Tools opt
+after the tool returns. Tools opt into reversibility via a ``reverse``
-into reversibility by declaring a ``reverse`` callable on their
+callable on their :class:`ToolDefinition`; the rendered descriptor powers
 :class:`~app.agents.new_chat.tools.registry.ToolDefinition`; the rendered
 descriptor is persisted in ``reverse_descriptor`` for use by
 ``/api/threads/{thread_id}/revert/{action_id}``.
-Design points:
+Logging is fully defensive — DB-write failures are swallowed so the tool's
-
+result is always returned untouched. Only metadata (name, capped args,
-* **Defensive.** Logging never blocks the agent. We catch every exception
+result_id, reverse_descriptor) is stored; tool output stays in the
-  on the DB write path and emit a warning; the tool's ``ToolMessage``
+checkpoint. Reversibility is best-effort: a reverse callable that raises
-  result is always returned untouched.
+just leaves the action non-reversible.
 * **Lightweight payload.** Only the tool ``name`` + ``args`` (capped) +
  ``result_id`` + ``reverse_descriptor`` are stored. Tool output text
  remains in the LangGraph checkpoint / spilled tool-output files.
 * **Best-effort reversibility.** We invoke ``reverse(args, result_obj)``
  with the parsed JSON result when the tool's content is a JSON object;
  otherwise the raw text is passed. Exceptions in the reverse callable
  are swallowed and logged — a failed descriptor render simply means the
  action is NOT marked reversible.
 """
 from __future__ import annotations
@ -27,14 +17,14 @@ from __future__ import annotations
 import json
 import logging
 from collections.abc import Awaitable, Callable
 from dataclasses import dataclass
 from typing import TYPE_CHECKING, Any
 from langchain.agents.middleware import AgentMiddleware
 from langchain_core.callbacks import adispatch_custom_event
 from langchain_core.messages import ToolMessage
-from app.agents.new_chat.feature_flags import get_flags
+from app.agents.chat.multi_agent_chat.shared.feature_flags import get_flags
 from app.agents.new_chat.tools.registry import ToolDefinition
 if TYPE_CHECKING:  # pragma: no cover - type-only
    from langchain.agents.middleware.types import ToolCallRequest
@ -44,6 +34,31 @@ if TYPE_CHECKING:  # pragma: no cover - type-only
 logger = logging.getLogger(__name__)
@dataclass
 class ToolDefinition:
    """Reversibility descriptor consumed by :class:`ActionLogMiddleware`.
    Only ``name`` and ``reverse`` are read by the middleware; the remaining
    fields let callers and tests describe a tool declaratively. A tool is
    marked reversible in the action log when ``reverse`` is set and renders a
    descriptor without raising.
    Attributes:
        name: Unique identifier for the tool.
        description: Human-readable description of what the tool does.
        factory: Optional callable that builds the tool (unused by the
            middleware; retained for declarative call sites/tests).
        reverse: Optional callable that, given the tool's ``(args, result)``,
            returns a ``ReverseDescriptor`` describing the inverse invocation.
    """
    name: str
    description: str = ""
    factory: Callable[[dict[str, Any]], Any] | None = None
    reverse: Callable[[dict[str, Any], Any], dict[str, Any]] | None = None
 # Cap for the persisted ``args`` JSON to avoid bloating the action log with
 # accidentally-huge inputs. Values are truncated and a flag is set in the
 # stored payload so consumers can detect truncation.
@ -93,18 +108,32 @@ class ActionLogMiddleware(AgentMiddleware):
        self._user_id = user_id
        self._tool_definitions = dict(tool_definitions or {})
-    def _enabled(self) -> bool:
+    def _enabled(self, thread_id: int | None) -> bool:
        flags = get_flags()
        if flags.disable_new_agent_stack:
            return False
-        return bool(flags.enable_action_log) and self._thread_id is not None
+        return bool(flags.enable_action_log) and thread_id is not None
    def _resolve_thread_id(self, request: ToolCallRequest) -> int | None:
        """Resolve the live thread id, preferring the runtime config.
        Reading ``configurable.thread_id`` from the active ``RunnableConfig``
        (rather than the value captured at ``__init__``) lets a single cached
        compiled graph safely serve many threads — without it, a cache hit
        would attribute action-log rows to whichever thread first built the
        graph. Falls back to the constructor value for legacy/test runtimes
        that don't surface a config.
        """
        resolved = _resolve_thread_id(request)
        return resolved if resolved is not None else self._thread_id
    async def awrap_tool_call(
        self,
        request: ToolCallRequest,
        handler: Callable[[ToolCallRequest], Awaitable[ToolMessage | Command[Any]]],
    ) -> ToolMessage | Command[Any]:
-        if not self._enabled():
+        thread_id = self._resolve_thread_id(request)
        if not self._enabled(thread_id):
            return await handler(request)
        result: ToolMessage | Command[Any]
@ -119,10 +148,16 @@ class ActionLogMiddleware(AgentMiddleware):
                request=request,
                result=None,
                error_payload=error_payload,
                thread_id=thread_id,
            )
            raise
-        await self._record(request=request, result=result, error_payload=None)
+        await self._record(
            request=request,
            result=result,
            error_payload=None,
            thread_id=thread_id,
        )
        return result
    async def _record(
@ -131,6 +166,7 @@ class ActionLogMiddleware(AgentMiddleware):
        request: ToolCallRequest,
        result: ToolMessage | Command[Any] | None,
        error_payload: dict[str, Any] | None,
        thread_id: int | None,
    ) -> None:
        """Persist one ``agent_action_log`` row. Defensive: never raises."""
        try:
@ -149,7 +185,7 @@ class ActionLogMiddleware(AgentMiddleware):
            chat_turn_id = _resolve_chat_turn_id(request)
            row = AgentActionLog(
-                thread_id=self._thread_id,
+                thread_id=thread_id,
                user_id=self._user_id,
                search_space_id=self._search_space_id,
                # ``turn_id`` is the deprecated alias of ``tool_call_id``
@ -178,11 +214,9 @@ class ActionLogMiddleware(AgentMiddleware):
            )
            return
-        # Surface a side-channel SSE event so the chat tool card can
+        # Side-channel event (relayed by ``stream_new_chat`` as a
-        # render a Revert button immediately after the row is durable.
+        # ``data-action-log`` SSE) so the tool card can show a Revert button
-        # ``stream_new_chat`` translates this into a
+        # once the row is durable. Carries a presence flag, not the descriptor.
        # ``data-action-log`` SSE event. We DO NOT include the
        # ``reverse_descriptor`` payload here; only a presence flag.
        try:
            await adispatch_custom_event(
                "action_log",
@ -337,6 +371,36 @@ def _resolve_chat_turn_id(request: Any) -> str | None:
    return None
 def _resolve_thread_id(request: Any) -> int | None:
    """Return ``configurable.thread_id`` (as int) for this request, if accessible.
    Mirrors :func:`_resolve_chat_turn_id`: ``ToolRuntime.config`` is exposed by
    LangGraph at ``request.runtime.config``, and the chat thread id lives at
    ``configurable.thread_id`` (a stringified ``chat_id`` at the main-graph
    level). Returns ``None`` when absent or unparseable so the caller can fall
    back to the constructor value.
    """
    try:
        runtime = getattr(request, "runtime", None)
        if runtime is None:
            return None
        config = getattr(runtime, "config", None)
        if not isinstance(config, dict):
            return None
        configurable = config.get("configurable")
        if not isinstance(configurable, dict):
            return None
        value = configurable.get("thread_id")
        if value is None:
            return None
        try:
            return int(value)
        except (TypeError, ValueError):
            return None
    except Exception:  # pragma: no cover - defensive
        return None
 def _resolve_message_id(request: Any) -> str | None:
    """Tool-call IDs serve as best-available message correlator at this layer."""
    return _resolve_tool_call_id(request)
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/anonymous_document/init.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/anonymous_document/init.py
@ -0,0 +1,9 @@
 """Anonymous-document middleware: Redis hydration, cloud only (impl + builder)."""
 from .builder import build_anonymous_doc_mw
 from .middleware import AnonymousDocumentMiddleware
 __all__ = [
    "AnonymousDocumentMiddleware",
    "build_anonymous_doc_mw",
 ]
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/anonymous_document/builder.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/anonymous_document/builder.py
@ -2,8 +2,9 @@
 from __future__ import annotations
-from app.agents.new_chat.filesystem_selection import FilesystemMode
+from app.agents.chat.multi_agent_chat.shared.filesystem_selection import FilesystemMode
-from app.agents.new_chat.middleware import AnonymousDocumentMiddleware
+
 from .middleware import AnonymousDocumentMiddleware
 def build_anonymous_doc_mw(
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/anonymous_document/middleware.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/anonymous_document/middleware.py
@ -24,8 +24,13 @@ from typing import Any
 from langchain.agents.middleware import AgentMiddleware, AgentState
 from langgraph.runtime import Runtime
-from app.agents.new_chat.filesystem_state import SurfSenseFilesystemState
+from app.agents.chat.multi_agent_chat.shared.state.filesystem_state import (
-from app.agents.new_chat.path_resolver import DOCUMENTS_ROOT, safe_filename
+    SurfSenseFilesystemState,
 )
 from app.agents.chat.runtime.path_resolver import (
    DOCUMENTS_ROOT,
    safe_filename,
 )
 logger = logging.getLogger(__name__)
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/busy_mutex/init.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/busy_mutex/init.py
@ -0,0 +1,25 @@
 """Per-turn cooperative busy-lock middleware + cancel primitives (main-agent)."""
 from .builder import build_busy_mutex_mw
 from .middleware import (
    BusyMutexMiddleware,
    end_turn,
    get_cancel_event,
    get_cancel_state,
    is_cancel_requested,
    manager,
    request_cancel,
    reset_cancel,
 )
 __all__ = [
    "BusyMutexMiddleware",
    "build_busy_mutex_mw",
    "end_turn",
    "get_cancel_event",
    "get_cancel_state",
    "is_cancel_requested",
    "manager",
    "request_cancel",
    "reset_cancel",
 ]
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/busy_mutex/builder.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/busy_mutex/builder.py
@ -2,10 +2,12 @@
 from __future__ import annotations
-from app.agents.new_chat.feature_flags import AgentFeatureFlags
+from app.agents.chat.multi_agent_chat.shared.feature_flags import AgentFeatureFlags
-from app.agents.new_chat.middleware import BusyMutexMiddleware
+from app.agents.chat.multi_agent_chat.shared.middleware.flags import enabled
-from ..shared.flags import enabled
+from .middleware import (
    BusyMutexMiddleware,
 )
 def build_busy_mutex_mw(flags: AgentFeatureFlags) -> BusyMutexMiddleware | None:
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/busy_mutex/middleware.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/busy_mutex/middleware.py
@ -1,32 +1,12 @@
-"""
+"""Per-thread asyncio lock + cooperative cancel token, keyed by ``thread_id``.
 BusyMutexMiddleware — per-thread asyncio lock + cancel token.
-LangChain has no built-in concept of "this thread is already running a
+Refuses a second concurrent turn on the same thread (e.g. double-clicked
-turn — refuse the second concurrent request". Without it, a user
+"send") that would otherwise race on the same checkpoint and duplicate tool
-double-clicking "send" or refreshing the page mid-stream can spawn two
+calls. Also exposes a per-thread cancel event that long-running tools poll
-turns racing on the same checkpoint, producing duplicated tool calls
+via ``runtime.context.cancel_event.is_set()`` to abort cooperatively.
 and mangled state.
-Ported from OpenCode's ``Stream.scoped(AbortController)`` pattern: a
+Process-local and in-memory; multi-worker deployments need a distributed lock
-single-process, in-memory lock + cooperative cancellation token keyed by
+(Redis / PostgreSQL advisory locks) as a follow-up.
 ``thread_id``. For multi-worker deployments a distributed lock backend
 (Redis or PostgreSQL advisory locks) is a phase-2 follow-up.
 What this provides:
 - A ``WeakValueDictionary[str, asyncio.Lock]`` keyed by ``thread_id``;
  acquiring the lock during ``before_agent`` blocks any concurrent
  prompt on the same thread until release.
 - A per-thread ``asyncio.Event`` (``cancel_event``) that long-running
  tools can poll to abort cooperatively. The event is reset between
  turns. Tools should check ``runtime.context.cancel_event.is_set()``
  in tight inner loops.
 - A typed :class:`~app.agents.new_chat.errors.BusyError` raised when a
  second turn arrives while the lock is held.
 Note: SurfSense's ``stream_new_chat`` is the call site that should
 acquire/release. Wiring this as middleware means the contract is
 explicit and the lock manager is shared with subagents that compile
 their own ``create_agent`` runnables.
 """
 from __future__ import annotations
@ -46,7 +26,7 @@ from langchain.agents.middleware.types import (
 from langgraph.config import get_config
 from langgraph.runtime import Runtime
-from app.agents.new_chat.errors import BusyError
+from app.agents.chat.runtime.errors import BusyError
 logger = logging.getLogger(__name__)
@ -152,9 +132,8 @@ class _ThreadLockManager:
        return True
-# Module-level singleton — process-local but reused across all agent
+# Process-local singleton shared across all agents/subagents built in this
-# instances built in this process. Subagents created in nested
+# process so per-thread locks stay coherent.
 # ``create_agent`` calls also get this so locks are coherent.
 manager = _ThreadLockManager()
@ -266,7 +245,6 @@ class BusyMutexMiddleware(AgentMiddleware[AgentState[ResponseT], ContextT, Respo
        await lock.acquire()
        epoch = manager.bump_turn_epoch(thread_id)
        self._held_locks[thread_id] = (lock, epoch)
        # Reset the cancel event so this turn starts fresh
        reset_cancel(thread_id)
        return None
@ -289,17 +267,14 @@ class BusyMutexMiddleware(AgentMiddleware[AgentState[ResponseT], ContextT, Respo
            return None
        if lock.locked():
            lock.release()
-        # Always clear cancel event between turns so a stale signal
+        # Clear cancel event so a stale signal doesn't leak into the next turn.
        # doesn't leak into the next request.
        reset_cancel(thread_id)
        return None
    # Provide sync no-ops because the middleware base class allows them
    def before_agent(  # type: ignore[override]
        self, state: AgentState[Any], runtime: Runtime[ContextT]
    ) -> dict[str, Any] | None:
-        # Sync path: no asyncio.Lock to acquire. Best we can do is reject
+        # Sync path can't await an asyncio.Lock; only reject if one is in flight.
        # if anyone else is in flight.
        thread_id = self._thread_id(runtime)
        if thread_id is None:
            if self._require_thread_id:
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/checkpointed_subagent_middleware/init.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/checkpointed_subagent_middleware/init.py
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/checkpointed_subagent_middleware/config.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/checkpointed_subagent_middleware/config.py
@ -1,7 +1,9 @@
-"""RunnableConfig wiring for nested subagent invocations.
+"""HITL resume side-channel for nested subagent invocations.
-Forwards the parent's ``runtime.config`` (thread_id, …) into the subagent and
+Exposes the configurable side-channel ``stream_resume_chat`` uses to ferry
-exposes the side-channel ``stream_resume_chat`` uses to ferry resume payloads.
+resume payloads into a mid-flight subagent. The ``RunnableConfig`` builder and
 state-key filter shared with subagents live in
 ``app.agents.chat.multi_agent_chat.subagents.shared.invocation``.
 """
 from __future__ import annotations
@ -11,8 +13,6 @@ from typing import Any
 from langchain.tools import ToolRuntime
 from .constants import DEFAULT_SUBAGENT_RECURSION_LIMIT
 logger = logging.getLogger(__name__)
 # langgraph stores the parent task's scratchpad under this configurable key;
@ -20,39 +20,6 @@ logger = logging.getLogger(__name__)
 _LANGGRAPH_SCRATCHPAD_KEY = "__pregel_scratchpad"
 def subagent_invoke_config(runtime: ToolRuntime) -> dict[str, Any]:
    """RunnableConfig for the nested invoke; raises ``recursion_limit`` and isolates ``thread_id``.
    Each parallel subagent invocation lands in its own checkpoint slot keyed
    by an extended ``thread_id`` of the form ``{parent_thread}::task:{tool_call_id}``.
    The same call across the resume cycle keeps reading from the same snapshot
    (``tool_call_id`` is stable per LLM-emitted call).
    We namespace via ``thread_id`` rather than ``checkpoint_ns`` because
    langgraph's ``aget_state`` interprets a non-empty ``checkpoint_ns`` as a
    subgraph path and raises ``ValueError("Subgraph X not found")``.
    """
    merged: dict[str, Any] = dict(runtime.config) if runtime.config else {}
    current_limit = merged.get("recursion_limit")
    try:
        current_int = int(current_limit) if current_limit is not None else 0
    except (TypeError, ValueError):
        current_int = 0
    if current_int < DEFAULT_SUBAGENT_RECURSION_LIMIT:
        merged["recursion_limit"] = DEFAULT_SUBAGENT_RECURSION_LIMIT
    configurable: dict[str, Any] = dict(merged.get("configurable") or {})
    parent_thread_id = configurable.get("thread_id")
    per_call_suffix = f"task:{runtime.tool_call_id}"
    configurable["thread_id"] = (
        f"{parent_thread_id}::{per_call_suffix}"
        if parent_thread_id
        else per_call_suffix
    )
    merged["configurable"] = configurable
    return merged
 def consume_surfsense_resume(runtime: ToolRuntime) -> Any:
    """Pop the resume payload for *this* call's ``tool_call_id``.
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/checkpointed_subagent_middleware/constants.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/checkpointed_subagent_middleware/constants.py
@ -1,24 +1,14 @@
-"""Constants shared by the checkpointed subagent middleware."""
+"""Tuning constants for the checkpointed subagent middleware.
 ``EXCLUDED_STATE_KEYS`` and ``DEFAULT_SUBAGENT_RECURSION_LIMIT`` are part of the
 subagent-invocation contract shared with subagents and now live in
 ``app.agents.chat.multi_agent_chat.subagents.shared.invocation``.
 """
 from __future__ import annotations
 import os
 # Mirror of deepagents.middleware.subagents._EXCLUDED_STATE_KEYS.
 EXCLUDED_STATE_KEYS = frozenset(
    {
        "messages",
        "todos",
        "structured_response",
        "skills_metadata",
        "memory_contents",
    }
 )
 # Match the parent graph's budget; the LangGraph default of 25 trips on
 # multi-step subagent runs.
 DEFAULT_SUBAGENT_RECURSION_LIMIT = 10_000
 def _read_timeout_env(name: str, default: float) -> float:
    """Parse ``name`` from the environment; fall back to ``default`` on bad values.
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/checkpointed_subagent_middleware/middleware.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/checkpointed_subagent_middleware/middleware.py
@ -0,0 +1,188 @@
 """SubAgent middleware that compiles each subagent against the parent checkpointer."""
 from __future__ import annotations
 import time
 from collections.abc import Callable
 from typing import Any, cast
 from deepagents.backends.protocol import BackendFactory, BackendProtocol
 from deepagents.middleware.subagents import (
    TASK_SYSTEM_PROMPT,
    CompiledSubAgent,
    SubAgent,
    SubAgentMiddleware,
 )
 from langchain.agents import create_agent
 from langchain.chat_models import init_chat_model
 from langchain_core.runnables import Runnable
 from langgraph.types import Checkpointer
 from app.agents.chat.multi_agent_chat.subagents.shared.spec import (
    SURF_CONTEXT_HINT_PROVIDER_KEY,
    SURF_LAZY_SPEC_FACTORY_KEY,
 )
 from app.utils.perf import get_perf_logger
 from .task_tool import build_task_tool_with_parent_config
 _perf_log = get_perf_logger()
 class SurfSenseCheckpointedSubAgentMiddleware(SubAgentMiddleware):
    """``SubAgentMiddleware`` variant that compiles each subagent against the parent checkpointer."""
    def __init__(
        self,
        *,
        checkpointer: Checkpointer,
        backend: BackendProtocol | BackendFactory,
        subagents: list[SubAgent | CompiledSubAgent],
        system_prompt: str | None = TASK_SYSTEM_PROMPT,
        task_description: str | None = None,
        search_space_id: int | None = None,
    ) -> None:
        self._surf_checkpointer = checkpointer
        super(SubAgentMiddleware, self).__init__()
        if not subagents:
            raise ValueError(
                "At least one subagent must be specified when using the new API"
            )
        self._backend = backend
        self._subagents = subagents
        # Search-space id is captured at build time (the orchestrator runs in
        # exactly one search space for its lifetime). The spawn-paused kill
        # switch keys on it so an operator can quarantine one workspace
        # without affecting the rest of the deployment.
        self._search_space_id = search_space_id
        # Lazy subagent compilation. Compiling a subagent graph via
        # ``create_agent`` is expensive (~250-400ms each) and there can be up
        # to ~17 of them. Doing it all in ``__init__`` put the full cost on
        # every cold ``agent_cache`` miss (i.e. on time-to-first-token), even
        # though a turn usually invokes zero or one subagent. We instead index
        # the raw specs here and compile each graph on first ``task(name)``
        # use, memoizing the result for the life of this (cached) instance.
        self._compiled: dict[str, Runnable] = {}
        self._lazy_specs: dict[str, dict[str, Any]] = {}
        # Subagents whose *spec itself* is built lazily (not just compiled).
        # Keyed by name → zero-arg factory returning the full spec dict. Used
        # for the write knowledge_base subagent, whose filesystem middleware
        # builds ~13 tool schemas (~2s) that almost never matter on turn 1.
        self._lazy_spec_factories: dict[str, Callable[[], dict[str, Any]]] = {}
        descriptors = self._build_subagent_registry()
        task_tool = build_task_tool_with_parent_config(
            descriptors,
            task_description,
            search_space_id=search_space_id,
            resolve_subagent=self._resolve_subagent,
        )
        if system_prompt and descriptors:
            agents_desc = "\n".join(
                f"- {s['name']}: {s['description']}" for s in descriptors
            )
            self.system_prompt = (
                system_prompt + "\n\nAvailable subagent types:\n" + agents_desc
            )
        else:
            self.system_prompt = system_prompt
        self.tools = [task_tool]
    def _build_subagent_registry(self) -> list[dict[str, Any]]:
        """Index subagents for lazy compilation; return lightweight descriptors.
        Pre-compiled specs (those carrying a ``runnable``) are seeded directly
        into the memo. Lazy specs are stashed by name and compiled on first
        ``task(...)`` use via :meth:`_resolve_subagent`. The returned
        descriptors carry only ``name``/``description`` plus the optional
        context-hint provider — everything the ``task`` tool needs to validate
        names, render its catalog, and run hints, without paying the
        ``create_agent`` cost up front.
        """
        descriptors: list[dict[str, Any]] = []
        for spec in self._subagents:
            # Provider may be ``None`` (no hint), in which case task_tool skips
            # the prepend step. We forward the key unconditionally so the
            # descriptor shape is uniform.
            hint_provider = cast(dict, spec).get(SURF_CONTEXT_HINT_PROVIDER_KEY)
            name = spec["name"]
            spec_factory = cast(dict, spec).get(SURF_LAZY_SPEC_FACTORY_KEY)
            if spec_factory is not None:
                # Descriptor-only entry: the spec dict is built on first use.
                self._lazy_spec_factories[name] = spec_factory
            elif "runnable" in spec:
                compiled = cast(CompiledSubAgent, spec)
                self._compiled[name] = compiled["runnable"]
            else:
                if "model" not in spec:
                    msg = f"SubAgent '{name}' must specify 'model'"
                    raise ValueError(msg)
                if "tools" not in spec:
                    msg = f"SubAgent '{name}' must specify 'tools'"
                    raise ValueError(msg)
                self._lazy_specs[name] = cast(dict, spec)
            descriptors.append(
                {
                    "name": name,
                    "description": spec["description"],
                    SURF_CONTEXT_HINT_PROVIDER_KEY: hint_provider,
                }
            )
        return descriptors
    def _resolve_subagent(self, name: str) -> Runnable:
        """Return the compiled subagent graph for ``name``, compiling on first use.
        Memoized: the ``create_agent`` cost is paid once per subagent per
        cached middleware instance. Raises ``KeyError`` for unknown names
        (callers in the ``task`` tool validate membership before resolving).
        """
        cached = self._compiled.get(name)
        if cached is not None:
            return cached
        spec = self._lazy_specs.get(name)
        if spec is None:
            factory = self._lazy_spec_factories.get(name)
            if factory is None:
                raise KeyError(name)
            # Build the spec on first use (pays the deferred construction cost
            # here, off the cold agent-build path), then compile and memoize.
            build_start = time.perf_counter()
            spec = factory()
            _perf_log.info(
                "[subagent_spec_lazy] name=%s (deferred spec build) in %.3fs",
                name,
                time.perf_counter() - build_start,
            )
        runnable = self._compile_one(spec)
        self._compiled[name] = runnable
        return runnable
    def _compile_one(self, spec: dict[str, Any]) -> Runnable:
        """Compile a single subagent graph against the parent checkpointer."""
        model = spec["model"]
        if isinstance(model, str):
            model = init_chat_model(model)
        middleware: list[Any] = list(spec.get("middleware", []))
        tools_count = len(spec.get("tools") or [])
        mw_count = len(middleware)
        compile_start = time.perf_counter()
        runnable = create_agent(
            model,
            system_prompt=spec["system_prompt"],
            tools=spec["tools"],
            middleware=middleware,
            name=spec["name"],
            checkpointer=self._surf_checkpointer,
        )
        _perf_log.info(
            "[subagent_compile_lazy] name=%s in %.3fs tools=%d mw=%d",
            spec["name"],
            time.perf_counter() - compile_start,
            tools_count,
            mw_count,
        )
        return runnable
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/checkpointed_subagent_middleware/propagation.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/checkpointed_subagent_middleware/propagation.py
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/checkpointed_subagent_middleware/resume.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/checkpointed_subagent_middleware/resume.py
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/checkpointed_subagent_middleware/resume_routing.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/checkpointed_subagent_middleware/resume_routing.py
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/checkpointed_subagent_middleware/spawn_paused.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/checkpointed_subagent_middleware/spawn_paused.py
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/checkpointed_subagent_middleware/task_description.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/checkpointed_subagent_middleware/task_description.py
@ -6,7 +6,7 @@ and the ``<tools>`` block render from the same source.
 from __future__ import annotations
-from app.agents.multi_agent_chat.main_agent.system_prompt.builder.load_md import (
+from app.agents.chat.multi_agent_chat.main_agent.system_prompt.builder.load_md import (
    read_prompt_md,
 )
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/checkpointed_subagent_middleware/task_tool.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/checkpointed_subagent_middleware/task_tool.py
@ -12,7 +12,7 @@ import asyncio
 import json
 import logging
 import time
-from collections.abc import Awaitable
+from collections.abc import Awaitable, Callable
 from typing import Annotated, Any, NoReturn, TypeVar
 from deepagents.middleware.subagents import TASK_TOOL_DESCRIPTION
@ -23,7 +23,11 @@ from langchain_core.tools import StructuredTool
 from langgraph.errors import GraphInterrupt
 from langgraph.types import Command, Interrupt
-from app.agents.multi_agent_chat.subagents.shared.spec import (
+from app.agents.chat.multi_agent_chat.subagents.shared.invocation import (
    EXCLUDED_STATE_KEYS,
    subagent_invoke_config,
 )
 from app.agents.chat.multi_agent_chat.subagents.shared.spec import (
    SURF_CONTEXT_HINT_PROVIDER_KEY,
    ContextHintProvider,
 )
@ -34,13 +38,11 @@ from .config import (
    consume_surfsense_resume,
    drain_parent_null_resume,
    has_surfsense_resume,
    subagent_invoke_config,
 )
 from .constants import (
    DEFAULT_SUBAGENT_BATCH_CONCURRENCY,
    DEFAULT_SUBAGENT_BILLABLE_THRESHOLD,
    DEFAULT_SUBAGENT_INVOKE_TIMEOUT_SECONDS,
    EXCLUDED_STATE_KEYS,
    MAX_SUBAGENT_BATCH_SIZE,
 )
 from .propagation import wrap_with_tool_call_id
@ -80,13 +82,10 @@ _T = TypeVar("_T")
 async def _ainvoke_with_timeout[T](
    coro: Awaitable[_T], *, subagent_type: str, started_at: float
 ) -> _T:
-    """Apply :data:`DEFAULT_SUBAGENT_INVOKE_TIMEOUT_SECONDS` to ``coro``.
+    """Apply the subagent invoke timeout to ``coro`` (non-positive disables it).
-    A non-positive timeout disables the cap (configurable via the
+    On expiry the task is cancelled and :class:`SubagentInvokeTimeoutError` is
-    ``SURFSENSE_SUBAGENT_INVOKE_TIMEOUT_SECONDS`` env var). On expiry the
+    raised for the caller to turn into a synthetic ToolMessage.
    underlying task is cancelled and :class:`SubagentInvokeTimeoutError` is
    raised — the caller wraps it into a synthetic ToolMessage so the
    orchestrator can decide what to do.
    """
    timeout = DEFAULT_SUBAGENT_INVOKE_TIMEOUT_SECONDS
    if timeout <= 0:
@ -144,17 +143,31 @@ def build_task_tool_with_parent_config(
    task_description: str | None = None,
    *,
    search_space_id: int | None = None,
    resolve_subagent: Callable[[str], Runnable] | None = None,
 ) -> BaseTool:
-    """Upstream ``_build_task_tool`` + parent ``runtime.config`` propagation + resume bridging."""
+    """Upstream ``_build_task_tool`` + parent ``runtime.config`` propagation + resume bridging.
-    subagent_graphs: dict[str, Runnable] = {
+
-        spec["name"]: spec["runnable"] for spec in subagents
+    ``subagents`` are lightweight descriptors (``name``/``description`` + the
-    }
+    optional context-hint provider); the actual compiled graph is fetched
-    # Per-subagent context-hint providers (see ``SurfSenseSubagentSpec``).
+    lazily via ``resolve_subagent(name)`` so subagent ``create_agent`` cost is
-    # The mapping is sparse: only routes that opted in via ``pack_subagent``
+    paid on first ``task(name)`` use rather than at graph-build time.
-    # appear here, and the value is invoked once per ``task(...)`` call to
+
-    # generate a short string prepended to the subagent's first
+    For backward compatibility (and tests), ``resolve_subagent`` may be omitted
-    # ``HumanMessage``. Failures are logged and swallowed — a broken hint
+    when every descriptor already carries a pre-compiled ``runnable``; in that
-    # provider must never prevent the underlying task from running.
+    case a trivial dict-backed resolver is used.
    """
    subagent_names: set[str] = {spec["name"] for spec in subagents}
    if resolve_subagent is None:
        _eager_graphs: dict[str, Runnable] = {
            spec["name"]: spec["runnable"] for spec in subagents if "runnable" in spec
        }
        def resolve_subagent(name: str) -> Runnable:
            return _eager_graphs[name]
    # Sparse map of opt-in context-hint providers; each runs once per task()
    # call to prepend a string to the subagent's first HumanMessage. Failures
    # are swallowed so a broken hint never blocks the task.
    subagent_hint_providers: dict[str, ContextHintProvider] = {
        spec["name"]: provider
        for spec in subagents
@ -176,24 +189,18 @@ def build_task_tool_with_parent_config(
    def _billable_call_update(
        subagent_type: str, runtime: ToolRuntime
    ) -> dict[str, Any]:
-        """Build the per-call ``billable_calls`` delta + an optional warning.
+        """Build the per-call ``billable_calls`` delta plus an optional soft-cap warning.
-        The orchestrator's ``billable_calls`` map is summed by
+        Always emits ``{subagent_type: 1}`` (a reducer accumulates it); when this
-        :func:`_int_counter_merge_reducer`, so we always emit
+        call would cross the threshold, also adds a soft ``messages`` entry so the
-        ``{subagent_type: 1}`` and let the reducer accumulate. If the
+        orchestrator self-limits on its next step.
        cumulative count *after* this call would cross the configured
        threshold, we also slip a soft ``messages`` entry into the update
        so the orchestrator can read it on its next step and self-limit.
        Returning a plain ``dict`` (vs. an extra :class:`Command`) keeps
        the helper composable with the existing single/batch return paths.
        """
        delta: dict[str, Any] = {"billable_calls": {subagent_type: 1}}
        threshold = DEFAULT_SUBAGENT_BILLABLE_THRESHOLD
        if threshold <= 0:
            return delta
        prior = runtime.state.get("billable_calls") or {}
-        # ``prior`` may be a plain dict or a reducer-managed mapping; only
+        # Count int values only so a malformed checkpoint can't crash us.
        # int values are counted so a malformed checkpoint can't crash us.
        prior_total = sum(v for v in prior.values() if isinstance(v, int))
        new_total = prior_total + 1
        if prior_total < threshold <= new_total:
@ -212,8 +219,7 @@ def build_task_tool_with_parent_config(
        """Merge the per-call billable counter (and warning) into ``cmd``."""
        delta = _billable_call_update(subagent_type, runtime)
        warn_text = delta.pop("_billable_warn_text", None)
-        # ``cmd.update`` may be a dict or LangGraph ``UpdateDict``; defensively
+        # Copy so we don't mutate state shared with other tool returns.
        # copy so we don't mutate state shared across other tool returns.
        update = dict(getattr(cmd, "update", {}) or {})
        for key, value in delta.items():
            update[key] = value
@ -226,14 +232,10 @@ def build_task_tool_with_parent_config(
        return Command(update=update)
    def _safe_message_text(msg: Any) -> str:
-        """Pull text out of a BaseMessage without trusting the ``.text`` property.
+        """Pull text out of a BaseMessage without using the ``.text`` property.
-        ``BaseMessage.text`` walks ``content_blocks`` and crashes with
+        ``.text`` crashes when ``content`` is ``None`` (common for tool-call
-        ``TypeError: 'NoneType' object is not iterable`` when ``content`` is
+        AIMessages), and ``getattr`` won't catch it, so read ``content`` directly.
        ``None`` (common for tool-call AIMessages whose payload is purely
        structured). ``getattr(msg, "text", None)`` does not catch this
        because Python evaluates the property body before falling back to
        the default. Read ``content`` directly and coerce defensively.
        """
        try:
            content = getattr(msg, "content", None)
@ -256,23 +258,18 @@ def build_task_tool_with_parent_config(
        return str(content)
    def _build_tool_trace(messages: list[Any]) -> list[dict[str, Any]]:
-        """Compress the subagent's message stream into a compact tool trace.
+        """Compress the subagent's messages into a compact tool trace.
-        Each entry is ``{"tool": <name>, "status": "ok"|"error", "preview":
+        Entries (``{tool, status, preview}``) ride on the ToolMessage's
-        <≤120 chars>}`` so the orchestrator can show "this is what your
+        ``additional_kwargs["surf_tool_trace"]`` for UI/observability; the LLM
-        specialist actually did" without dumping the full message stream
+        never sees them.
        back through the prompt. The list is attached to the returned
        ToolMessage's ``additional_kwargs`` (under ``"surf_tool_trace"``);
        the LLM never sees it, but UI / observability code can pluck it
        out of the checkpoint.
        """
        trace: list[dict[str, Any]] = []
        for msg in messages:
            tool_name = getattr(msg, "name", None)
            tool_call_id_attr = getattr(msg, "tool_call_id", None)
            if not tool_name and not tool_call_id_attr:
-                # Only ToolMessages have either field; skip AIMessage /
+                # Only ToolMessages carry either field.
                # HumanMessage / SystemMessage frames.
                continue
            status = getattr(msg, "status", None) or "ok"
            preview = _safe_message_text(msg).strip().replace("\n", " ")
@ -306,8 +303,7 @@ def build_task_tool_with_parent_config(
            )
            raise ValueError(msg)
        message_text = _safe_message_text(messages[-1]).rstrip()
-        # Tool-trace is purely observability — wrap defensively so a single
+        # Trace is observability-only; never let a bad frame kill the turn.
        # malformed frame never bubbles up and kills the whole user turn.
        try:
            tool_trace = _build_tool_trace(messages)
        except Exception:
@ -318,10 +314,7 @@ def build_task_tool_with_parent_config(
            tool_trace = []
        tool_msg = ToolMessage(message_text, tool_call_id=tool_call_id)
        if tool_trace:
-            # ``additional_kwargs`` is a free-form dict on BaseMessage; using
+            # surf_ prefix avoids collision with provider keys (e.g. cache_control).
            # a ``surf_`` prefix avoids collision with provider-specific keys
            # (e.g. Anthropic's ``cache_control``). The LLM doesn't see it;
            # consumers (UI, observability) read it off the checkpoint.
            tool_msg.additional_kwargs["surf_tool_trace"] = tool_trace
        return Command(
            update={
@ -353,15 +346,13 @@ def build_task_tool_with_parent_config(
    def _validate_and_prepare_state(
        subagent_type: str, description: str, runtime: ToolRuntime
    ) -> tuple[Runnable, dict]:
-        subagent = subagent_graphs[subagent_type]
+        subagent = resolve_subagent(subagent_type)
        subagent_state = {
            k: v for k, v in runtime.state.items() if k not in EXCLUDED_STATE_KEYS
        }
        hint = _resolve_context_hint(subagent_type, description, runtime)
        if hint:
-            # Prepend as a tagged block so the subagent prompt can pattern-match
+            # Tagged block so the subagent prompt can pattern-match the section.
            # on the section (and a future change can lift it into its own
            # ``SystemMessage`` if needed).
            payload = f"<context_hint>\n{hint}\n</context_hint>\n\n{description}"
        else:
            payload = description
@ -372,16 +363,12 @@ def build_task_tool_with_parent_config(
        results: list[tuple[int, str, dict | str, dict | None]],
        runtime: ToolRuntime,
    ) -> Command:
-        """Combine per-child results into one Command with a combined ToolMessage.
+        """Combine per-child results into one Command with an aggregate ToolMessage.
-        ``results`` is a list of ``(task_index, subagent_type,
+        ``results`` tuples are ``(task_index, subagent_type, payload_or_error,
-        payload_or_error_text, child_state_update)`` tuples — preserving the
+        child_state_update)``; output blocks are sorted by index so the LLM can
-        input order so the orchestrator can map each block back to the task
+        map them back to dispatch order, and each child contributes a
-        it dispatched. State updates are merged by reducer for keys outside
+        ``billable_calls`` increment to match single-mode accounting.
        :data:`EXCLUDED_STATE_KEYS`; everything else (``messages``, ``todos``,
        etc.) is replaced by the synthesized aggregate ToolMessage. Every
        child also contributes a ``billable_calls`` increment so cost
        accounting matches single-mode dispatch.
        """
        results.sort(key=lambda r: r[0])
        merged_state: dict[str, Any] = {}
@ -422,8 +409,8 @@ def build_task_tool_with_parent_config(
                }
            )
            if state_update:
-                # Naive merge: later tasks win on scalar collisions; reducer-backed
+                # Later tasks win on scalar collisions; reducer-backed fields
-                # fields (``receipts``, ``files`` etc.) accumulate at apply time.
+                # accumulate at apply time.
                merged_state.update(state_update)
        aggregate = "\n\n".join(message_blocks)
        aggregate_msg = ToolMessage(
@ -467,15 +454,13 @@ def build_task_tool_with_parent_config(
    ) -> tuple[int, str, dict | str, dict | None]:
        """Run one child of a batched ``task`` call under the concurrency cap.
-        Errors are returned as plain text in slot 2 so a single child's
+        Errors are returned as text (slot 2) so one child's failure doesn't abort
-        failure does not abort the whole batch. ``GraphInterrupt`` from a
+        the batch. A child's ``GraphInterrupt`` is a hard failure for that child:
-        batched child is currently treated as a hard failure for that child
+        batched HITL is intentionally out of scope.
        only — batched HITL is intentionally out of scope for the v1
        rollout (see plan tier 2 item 4 risks).
        """
        async with semaphore:
-            if subagent_type not in subagent_graphs:
+            if subagent_type not in subagent_names:
-                allowed_types = ", ".join([f"`{k}`" for k in subagent_graphs])
+                allowed_types = ", ".join([f"`{k}`" for k in subagent_names])
                return (
                    task_index,
                    subagent_type,
@ -505,8 +490,7 @@ def build_task_tool_with_parent_config(
                )
                return (task_index, subagent_type, str(exc), None)
            except GraphInterrupt:
-                # Batched HITL is unsupported in v1 — surface as a failure
+                # Batched HITL unsupported; fail this child so the batch finishes.
                # for this child so the rest of the batch still completes.
                logger.warning(
                    "Batch child %d (%s) raised GraphInterrupt; batched HITL "
                    "is not supported. Re-dispatch this task as a single "
@ -543,14 +527,11 @@ def build_task_tool_with_parent_config(
            return (task_index, subagent_type, result, child_state_update)
    def _coerce_batch_arg(tasks: Any) -> list[dict] | str:
-        """Rescue common LLM-side malformations of the ``tasks`` argument.
+        """Rescue common LLM malformations of the ``tasks`` argument.
-        Some providers serialise an array argument as a JSON-encoded string,
+        Recovers a JSON-encoded array string and a single dict (instead of a
-        and small models occasionally hand back a single ``{description,
+        1-element array), logging a WARN. Unrecoverable shapes return a string
-        subagent_type}`` dict instead of a one-element array. Both are
+        the caller surfaces as the tool error.
        recovered here with a WARN log so the issue is visible in metrics
        but the user's turn still completes; truly broken shapes return a
        plain string that the caller surfaces as the tool error.
        """
        if isinstance(tasks, list):
            return tasks
@ -585,13 +566,10 @@ def build_task_tool_with_parent_config(
    async def _adispatch_batch(
        tasks: list[dict], runtime: ToolRuntime
    ) -> Command | str:
-        """Fan-out helper for the ``tasks`` array shape.
+        """Fan out the ``tasks`` array (size- and concurrency-capped).
-        Bounded by :data:`MAX_SUBAGENT_BATCH_SIZE` and concurrency-capped
+        Returns one Command; the LLM sees one ``[task <index>]``-prefixed block
-        at :data:`DEFAULT_SUBAGENT_BATCH_CONCURRENCY`. Returns a single
+        per child, in input order.
        :class:`Command` that the LLM sees as one ToolMessage per child,
        prefixed with ``[task <index>]`` so it can map back to the input
        order.
        """
        if not tasks:
            return "tasks: array is empty; nothing to dispatch."
@ -657,8 +635,8 @@ def build_task_tool_with_parent_config(
                "task: must provide either single-mode (`description`+`subagent_type`) "
                "or batch-mode (`tasks`)."
            )
-        if subagent_type not in subagent_graphs:
+        if subagent_type not in subagent_names:
-            allowed_types = ", ".join([f"`{k}`" for k in subagent_graphs])
+            allowed_types = ", ".join([f"`{k}`" for k in subagent_names])
            return (
                f"We cannot invoke subagent {subagent_type} because it does not exist, "
                f"the only allowed types are {allowed_types}"
@ -701,17 +679,16 @@ def build_task_tool_with_parent_config(
        if pending_value is not None:
            resume_value = consume_surfsense_resume(runtime)
            if resume_value is None:
-                # Bridge invariant: a queued resume must accompany any pending
+                # A pending interrupt must have a queued resume; otherwise replay
-                # subagent interrupt. Fall-through replay would silently re-prompt
+                # would silently re-prompt the user. Raise instead.
                # the user; raise so the streaming layer surfaces a clear error.
                raise RuntimeError(
                    f"Subagent {subagent_type!r} has a pending interrupt but no "
                    "surfsense_resume_value on config; resume bridge is broken."
                )
            expected = hitlrequest_action_count(pending_value)
            resume_value = fan_out_decisions_to_match(resume_value, expected)
-            # Prevent the parent's resume payload from leaking into subagent
+            # Stop the parent's resume leaking into subagent interrupts via
-            # interrupts via langgraph's parent_scratchpad fallback.
+            # langgraph's parent_scratchpad fallback.
            drain_parent_null_resume(runtime)
            with ot.subagent_invoke_span(
                subagent_type=subagent_type, path=invoke_path
@ -827,10 +804,8 @@ def build_task_tool_with_parent_config(
        ] = None,
    ) -> str | Command:
        atask_start = time.perf_counter()
-        # Kill switch: when ops flips the spawn-paused flag for this
+        # Ops kill switch: short-circuit every task() call for this workspace
-        # workspace, every ``task(...)`` invocation (single- or batch-mode)
+        # so the orchestrator stops hammering downstream APIs.
        # short-circuits with a clear ToolMessage so the orchestrator can
        # tell the user what happened and stop hammering downstream APIs.
        if await is_spawn_paused(search_space_id):
            logger.warning(
                "[hitl_route] atask SPAWN_PAUSED: search_space_id=%s tool_call_id=%s",
@ -869,8 +844,8 @@ def build_task_tool_with_parent_config(
            subagent_type,
            runtime.tool_call_id,
        )
-        if subagent_type not in subagent_graphs:
+        if subagent_type not in subagent_names:
-            allowed_types = ", ".join([f"`{k}`" for k in subagent_graphs])
+            allowed_types = ", ".join([f"`{k}`" for k in subagent_names])
            return (
                f"We cannot invoke subagent {subagent_type} because it does not exist, "
                f"the only allowed types are {allowed_types}"
@ -921,8 +896,8 @@ def build_task_tool_with_parent_config(
                    )
                expected = hitlrequest_action_count(pending_value)
                resume_value = fan_out_decisions_to_match(resume_value, expected)
-                # Prevent the parent's resume payload from leaking into subagent
+                # Stop the parent's resume leaking into subagent interrupts via
-                # interrupts via langgraph's parent_scratchpad fallback.
+                # langgraph's parent_scratchpad fallback.
                drain_parent_null_resume(runtime)
                with ot.subagent_invoke_span(
                    subagent_type=subagent_type, path=invoke_path
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/context_editing/init.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/context_editing/init.py
@ -0,0 +1,15 @@
 """Context-editing middleware: spill + clear-tool-uses passes (impl + builder)."""
 from .builder import build_context_editing_mw
 from .middleware import (
    ClearToolUsesEdit,
    SpillingContextEditingMiddleware,
    SpillToBackendEdit,
 )
 __all__ = [
    "ClearToolUsesEdit",
    "SpillToBackendEdit",
    "SpillingContextEditingMiddleware",
    "build_context_editing_mw",
 ]
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/context_editing/builder.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/context_editing/builder.py
@ -7,18 +7,18 @@ from typing import Any
 from langchain_core.tools import BaseTool
-from app.agents.multi_agent_chat.main_agent.context_prune.prune_tool_names import (
+from app.agents.chat.multi_agent_chat.main_agent.context_prune.prune_tool_names import (
    safe_exclude_tools,
 )
-from app.agents.new_chat.feature_flags import AgentFeatureFlags
+from app.agents.chat.multi_agent_chat.shared.feature_flags import AgentFeatureFlags
-from app.agents.new_chat.middleware import (
+from app.agents.chat.multi_agent_chat.shared.middleware.flags import enabled
 from .middleware import (
    ClearToolUsesEdit,
    SpillingContextEditingMiddleware,
    SpillToBackendEdit,
 )
 from ..shared.flags import enabled
 def build_context_editing_mw(
    *,
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/context_editing/middleware.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/context_editing/middleware.py
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/dedup_hitl.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/dedup_hitl.py
@ -1,4 +1,4 @@
-"""Middleware that deduplicates HITL tool calls within a single LLM response.
+"""Drop duplicate HITL tool calls before execution.
 When the LLM emits multiple calls to the same HITL tool with the same
 primary argument (e.g. two ``delete_calendar_event("Doctor Appointment")``),
@ -9,72 +9,33 @@ the duplicate call is stripped from the AIMessage that gets checkpointed.
 That means it is also safe across LangGraph ``interrupt()`` boundaries:
 the removed call will never appear on graph resume.
-Dedup-key resolution order:
+Dedup-key resolution order (read from each tool's own ``metadata``):
-1. :class:`ToolDefinition.dedup_key` — callable provided by the registry
+1. ``tool.metadata["dedup_key"]`` — callable mapping the args dict to a
-   entry. This is the canonical mechanism.
+   stable signature string. This is the canonical mechanism.
-2. ``tool.metadata["hitl_dedup_key"]`` — string with a primary arg name;
+2. ``tool.metadata["hitl_dedup_key"]`` — string naming a primary arg;
-   used by MCP / Composio tools whose schemas the registry doesn't see.
+   used by MCP / Composio tools that only expose a single key field.
 A tool with no resolver from either path simply opts out of dedup.
 """
 from __future__ import annotations
 import json
 import logging
-from collections.abc import Callable
+from collections.abc import Sequence
 from typing import Any
 from langchain.agents.middleware import AgentMiddleware, AgentState
 from langchain_core.tools import BaseTool
 from langgraph.runtime import Runtime
 from app.agents.chat.multi_agent_chat.shared.middleware.dedup_tool_calls import (
    DedupResolver,
    wrap_dedup_key_by_arg_name,
 )
 logger = logging.getLogger(__name__)
 # Resolver type — given the tool ``args`` dict returns a stable
 # string used to dedupe consecutive calls. ``None`` means no dedup.
 DedupResolver = Callable[[dict[str, Any]], str]
 def wrap_dedup_key_by_arg_name(arg_name: str) -> DedupResolver:
    """Adapt a string-arg name into a :data:`DedupResolver`.
    Convenience helper used by registry entries that just want to dedupe
    on a single arg's lowercased value (the most common case for native
    HITL tools like ``send_gmail_email`` keyed on ``subject``).
    Example::
        ToolDefinition(
            name="send_gmail_email",
            ...,
            dedup_key=wrap_dedup_key_by_arg_name("subject"),
        )
    """
    def _resolver(args: dict[str, Any]) -> str:
        return str(args.get(arg_name, "")).lower()
    return _resolver
 def dedup_key_full_args(args: dict[str, Any]) -> str:
    """Resolver that collapses calls only when **every** argument is identical.
    Safe default for tools where no single field uniquely identifies a call
    (e.g. MCP tools whose first required field is a shared workspace id).
    """
    try:
        return json.dumps(args, sort_keys=True, default=str)
    except (TypeError, ValueError):
        return repr(sorted(args.items())) if isinstance(args, dict) else repr(args)
 # Backwards-compatible alias for code that imported the original
 # private name. New callers should use :func:`wrap_dedup_key_by_arg_name`.
 _wrap_string_key = wrap_dedup_key_by_arg_name
 class DedupHITLToolCallsMiddleware(AgentMiddleware):  # type: ignore[type-arg]
    """Remove duplicate HITL tool calls from a single LLM response.
@ -84,9 +45,8 @@ class DedupHITLToolCallsMiddleware(AgentMiddleware):  # type: ignore[type-arg]
    The dedup-resolver map is built from two sources, in priority order:
-    1. ``tool.metadata["dedup_key"]`` — callable provided by the registry's
+    1. ``tool.metadata["dedup_key"]`` — callable that receives the args dict
-       ``ToolDefinition.dedup_key``. Receives the args dict and returns
+       and returns a string signature. This is the canonical mechanism.
       a string signature. This is the canonical mechanism.
    2. ``tool.metadata["hitl_dedup_key"]`` — string with a primary arg
       name; primarily used by MCP / Composio tools.
    """
@ -162,3 +122,7 @@ class DedupHITLToolCallsMiddleware(AgentMiddleware):  # type: ignore[type-arg]
        updated_msg = last_msg.model_copy(update={"tool_calls": deduped})
        return {"messages": [updated_msg]}
 def build_dedup_hitl_mw(tools: Sequence[BaseTool]) -> DedupHITLToolCallsMiddleware:
    return DedupHITLToolCallsMiddleware(agent_tools=list(tools))
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/doom_loop/init.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/doom_loop/init.py
@ -0,0 +1,9 @@
 """Doom-loop middleware: detect repeated identical tool calls (impl + builder)."""
 from .builder import build_doom_loop_mw
 from .middleware import DoomLoopMiddleware
 __all__ = [
    "DoomLoopMiddleware",
    "build_doom_loop_mw",
 ]
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/doom_loop/builder.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/doom_loop/builder.py
@ -2,10 +2,10 @@
 from __future__ import annotations
-from app.agents.new_chat.feature_flags import AgentFeatureFlags
+from app.agents.chat.multi_agent_chat.shared.feature_flags import AgentFeatureFlags
-from app.agents.new_chat.middleware import DoomLoopMiddleware
+from app.agents.chat.multi_agent_chat.shared.middleware.flags import enabled
-from ..shared.flags import enabled
+from .middleware import DoomLoopMiddleware
 def build_doom_loop_mw(flags: AgentFeatureFlags) -> DoomLoopMiddleware | None:
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/doom_loop/middleware.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/doom_loop/middleware.py
@ -16,7 +16,7 @@ This ships **OFF by default** until the frontend explicitly handles
 ``context.permission == "doom_loop"`` interrupts.
 Wire format: uses SurfSense's existing ``interrupt()`` payload shape
-(see ``app/agents/new_chat/tools/hitl.py``):
+(see ``app/agents/shared/tools/hitl.py``):
    {
        "type": "permission_ask",
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/kb_persistence/init.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/kb_persistence/init.py
@ -0,0 +1,13 @@
 """End-of-turn KB persistence middleware (main-agent only)."""
 from .builder import build_kb_persistence_mw
 from .middleware import (
    KnowledgeBasePersistenceMiddleware,
    commit_staged_filesystem_state,
 )
 __all__ = [
    "KnowledgeBasePersistenceMiddleware",
    "build_kb_persistence_mw",
    "commit_staged_filesystem_state",
 ]
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/kb_persistence/builder.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/kb_persistence/builder.py
@ -2,8 +2,11 @@
 from __future__ import annotations
-from app.agents.new_chat.filesystem_selection import FilesystemMode
+from app.agents.chat.multi_agent_chat.shared.filesystem_selection import FilesystemMode
-from app.agents.new_chat.middleware import KnowledgeBasePersistenceMiddleware
+
 from .middleware import (
    KnowledgeBasePersistenceMiddleware,
 )
 def build_kb_persistence_mw(
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/kb_persistence/middleware.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/kb_persistence/middleware.py
@ -1,33 +1,19 @@
 """End-of-turn persistence for the cloud-mode SurfSense filesystem.
-This middleware runs ``aafter_agent`` once per turn (cloud only). It commits
+Runs ``aafter_agent`` once per turn (cloud only), committing staged folder
-all staged folder creations, file moves, content writes/edits, file deletes
+creates, moves, writes/edits, and ``rm``/``rmdir`` to Postgres in one ordered
-(``rm``), and directory deletes (``rmdir``) to Postgres in a single ordered
+pass. Order matters: moves resolve before writes (so write-then-move lands at
-pass:
+the final path), and file deletes run before directory deletes (so a same-turn
 ``rm /a/x.md`` + ``rmdir /a`` works).
-1. Materialize ``staged_dirs`` into ``Folder`` rows.
+When ``flags.enable_action_log`` is on, each destructive op also snapshots a
-2. Apply ``pending_moves`` in order (chained moves resolved via
+``DocumentRevision`` / ``FolderRevision`` for revert. For ``rm``/``rmdir`` the
-   ``doc_id_by_path``).
+snapshot and DELETE share a SAVEPOINT, so a failed snapshot aborts the delete
-3. Normalize ``dirty_paths`` through ``pending_moves`` so write-then-move
+rather than making the data silently irreversible.
   sequences commit at the final path. Paths queued for ``rm`` this turn
   are dropped here so a write+rm sequence doesn't recreate the doc.
 4. Commit content writes / edits for ``/documents/*`` paths, skipping
   ``temp_*`` basenames.
 5. Apply ``pending_deletes`` (``rm``) — file deletes run BEFORE directory
   deletes so a same-turn ``rm /a/x.md`` + ``rmdir /a`` sequence works.
 6. Apply ``pending_dir_deletes`` (``rmdir``); re-verifies emptiness against
   the post-step-5 DB state.
-When ``flags.enable_action_log`` is on every destructive op also writes a
+The commit body is a free function (``commit_staged_filesystem_state``) so the
-``DocumentRevision`` / ``FolderRevision`` snapshot bound to the
+stream-task fallback can run the identical routine when ``aafter_agent`` was
-originating ``AgentActionLog`` row via ``tool_call_id``. ``rm``/``rmdir``
+skipped (e.g. client disconnect).
 share a single ``SAVEPOINT`` with their snapshot — if the snapshot fails
 the DELETE rolls back and we surface the error rather than silently
 making the data irreversible.
 The commit body is exposed as a free function ``commit_staged_filesystem_state``
 so the optional stream-task fallback (``stream_new_chat.py``) can call the
 exact same routine when ``aafter_agent`` was skipped (e.g. client disconnect).
 """
 from __future__ import annotations
@ -40,22 +26,28 @@ from typing import Any
 from fractional_indexing import generate_key_between
 from langchain.agents.middleware import AgentMiddleware, AgentState
 from langchain_core.callbacks import adispatch_custom_event, dispatch_custom_event
 from langgraph.config import get_config
 from langgraph.runtime import Runtime
 from sqlalchemy import delete, select, update
 from sqlalchemy.exc import IntegrityError
 from sqlalchemy.ext.asyncio import AsyncSession
-from app.agents.new_chat.feature_flags import get_flags
+from app.agents.chat.multi_agent_chat.shared.feature_flags import get_flags
-from app.agents.new_chat.filesystem_selection import FilesystemMode
+from app.agents.chat.multi_agent_chat.shared.filesystem_selection import FilesystemMode
-from app.agents.new_chat.filesystem_state import SurfSenseFilesystemState
+from app.agents.chat.multi_agent_chat.shared.receipts.receipt import (
-from app.agents.new_chat.path_resolver import (
+    Receipt,
    make_receipt,
 )
 from app.agents.chat.multi_agent_chat.shared.state.filesystem_state import (
    SurfSenseFilesystemState,
 )
 from app.agents.chat.multi_agent_chat.shared.state.reducers import _CLEAR
 from app.agents.chat.runtime.path_resolver import (
    DOCUMENTS_ROOT,
    parse_documents_path,
    safe_folder_segment,
    virtual_path_to_doc,
 )
 from app.agents.new_chat.state_reducers import _CLEAR
 from app.agents.shared.receipt import Receipt, make_receipt
 from app.db import (
    AgentActionLog,
    Chunk,
@ -211,11 +203,9 @@ async def _create_document(
        virtual_path,
        search_space_id,
    )
-    # Filesystem-parity invariant: the only thing that *must* be unique is
+    # Pre-check the path-derived unique_identifier_hash so a duplicate path
-    # the path. Two notes can legitimately share content (e.g. ``cp a b``).
+    # surfaces as a clean ValueError instead of an INSERT IntegrityError that
-    # Guard against the path-derived ``unique_identifier_hash`` constraint
+    # poisons the session. Content is intentionally not unique (cp a b).
    # so we surface a clean ValueError instead of letting the INSERT poison
    # the session with an IntegrityError.
    path_collision = await session.execute(
        select(Document.id).where(
            Document.search_space_id == search_space_id,
@ -227,13 +217,6 @@ async def _create_document(
            f"a document already exists at path '{virtual_path}' "
            "(unique_identifier_hash collision)"
        )
    # ``content_hash`` is intentionally NOT checked for uniqueness here.
    # In a real filesystem two files at different paths can hold identical
    # bytes, and the agent's ``write_file`` path needs that semantic to
    # support copy/duplicate operations. The hash remains useful as a
    # change-detection hint for connector indexers, which still consult it
    # via :func:`check_duplicate_document` but do so with a non-unique
    # lookup (``.first()``).
    content_hash = generate_content_hash(content, search_space_id)
    doc = Document(
        title=title,
@ -430,15 +413,9 @@ async def _mark_action_reversible(
 ) -> None:
    """Flip ``agent_action_log.reversible = TRUE`` for ``action_id``.
-    Best-effort: caller may invoke from inside a SAVEPOINT and treat
+    Pair with ``_dispatch_reversibility_update`` *after* the enclosing
-    failure as a soft demotion (snapshot persists, just no Revert button).
+    SAVEPOINT commits, so the UI never sees ``reversible=true`` for a row whose
-
+    update later rolls back.
    Callers should also call ``_dispatch_reversibility_update`` (defined
    below) AFTER the enclosing SAVEPOINT block exits successfully so the
    chat tool card can light up its Revert button without
    re-fetching ``GET /threads/.../actions``. Dispatching from inside the
    SAVEPOINT would risk emitting "reversible=true" for rows whose
    update gets rolled back if the surrounding destructive op fails.
    """
    if action_id is None:
        return
@ -450,22 +427,11 @@ async def _mark_action_reversible(
 async def _dispatch_reversibility_update(action_id: int | None) -> None:
-    """Best-effort dispatch of an ``action_log_updated`` custom event.
+    """Emit an ``action_log_updated`` SSE event so the Revert button lights up.
-    Surfaces the post-SAVEPOINT reversibility flip to the SSE layer so
+    Best-effort (failures swallowed; the REST actions endpoint is
-    the chat tool card can flip its Revert button live. Defensive:
+    authoritative). Inside :func:`commit_staged_filesystem_state` this is
-    failures are logged at debug level and swallowed; the
+    deferred until after the outer commit via ``deferred_dispatches``.
    REST endpoint ``GET /threads/.../actions`` is still authoritative.
    .. warning::
        Inside :func:`commit_staged_filesystem_state` we DEFER all
        dispatches until the outer ``session.commit()`` succeeds — see
        the ``deferred_dispatches`` queue in that function. Dispatching
        from inside a SAVEPOINT block while the outer transaction is
        still pending would emit ``reversible=true`` for rows whose
        snapshots get rolled back if the outer commit fails. Direct
        callers (e.g. the optional stream-task fallback) that own the
        full session lifetime can still call this helper inline.
    """
    if action_id is None:
        return
@ -484,12 +450,9 @@ async def _dispatch_reversibility_update(action_id: int | None) -> None:
 # ---------------------------------------------------------------------------
 # Snapshot helpers
 # ---------------------------------------------------------------------------
-#
+# Best-effort variants (write/edit/move/mkdir) swallow failures. Strict
-# Best-effort helpers swallow + log so a snapshot failure can never break
+# variants (rm/rmdir) share the destructive op's SAVEPOINT so a snapshot
-# the destructive op for non-destructive tools (write/edit/move/mkdir).
+# failure aborts the delete instead of making it silently irreversible.
 # Strict helpers run inside the SAME ``begin_nested()`` SAVEPOINT as the
 # destructive DELETE — failure aborts the savepoint and leaves the doc /
 # folder intact, so revertable ops never become irreversible silently.
 def _doc_revision_payload(
@ -699,15 +662,9 @@ async def commit_staged_filesystem_state(
 ) -> dict[str, Any] | None:
    """Commit all staged filesystem changes; return the state delta for reducers.
-    Shared between :class:`KnowledgeBasePersistenceMiddleware.aafter_agent`
+    Shared between :class:`KnowledgeBasePersistenceMiddleware.aafter_agent` and
-    and the optional stream-task fallback.
+    the stream-task fallback. See the module docstring for ordering and the
-
+    action-log snapshot/revert semantics.
    When ``flags.enable_action_log`` is on every destructive op also writes
    a ``DocumentRevision`` / ``FolderRevision`` snapshot bound to the
    originating ``AgentActionLog`` row via ``tool_call_id``. Snapshot
    durability is best-effort for non-destructive ops and STRICT for
    ``rm``/``rmdir`` (snapshot + DELETE share a SAVEPOINT — snapshot
    failure aborts the delete).
    """
    if filesystem_mode != FilesystemMode.CLOUD:
        return None
@ -766,8 +723,7 @@ async def commit_staged_filesystem_state(
    flags = get_flags()
    snapshot_enabled = flags.enable_action_log
-    # De-duplicate pending deletes per-path while preserving the latest
+    # De-dup deletes per-path, keeping the latest tool_call_id (likeliest revert).
    # tool_call_id (the one the user is most likely to revert via the UI).
    file_delete_paths: dict[str, str] = {}
    for entry in pending_deletes:
        if not isinstance(entry, dict):
@ -791,22 +747,14 @@ async def commit_staged_filesystem_state(
    applied_moves: list[dict[str, Any]] = []
    doc_id_path_tombstones: dict[str, int | None] = {}
    tree_changed = False
-    # Reversibility-flip dispatches are deferred until AFTER the outer
+    # Reversibility-flip dispatches are drained only after the outer commit
-    # ``session.commit()`` succeeds. Dispatching from inside the
+    # succeeds (and abandoned on rollback), so the UI never sees reversible=true
-    # SAVEPOINT chain while the outer transaction is still pending
+    # for a snapshot that didn't durably land.
    # would emit ``reversible=true`` for rows whose snapshots get rolled
    # back if the final commit raises. Snapshot helpers append on
    # success; we drain this list after commit and silently abandon it
    # on rollback so the UI stays consistent with durable state.
    deferred_dispatches: list[int] = []
    try:
        async with shielded_async_session() as session:
-            # ------------------------------------------------------------------
+            # Resolve all action-id bindings in one SELECT per turn, not per op.
            # Resolve action-id bindings up front. One SELECT per turn for all
            # tool_call_ids, NOT one per op — important because a turn that
            # touches 50 paths would otherwise issue 50 lookups.
            # ------------------------------------------------------------------
            action_id_by_call: dict[str, int] = {}
            if snapshot_enabled and thread_id is not None:
                tool_call_ids: set[str] = set()
@ -839,10 +787,7 @@ async def commit_staged_filesystem_state(
                next(iter(action_id_by_call), None) if action_id_by_call else None
            )
-            # ------------------------------------------------------------------
+            # 1. staged_dirs -> Folder rows (snapshot post-flush for the FK).
            # 1. staged_dirs -> Folder rows. Snapshot post-flush so the new
            # folder_id is available for the FK.
            # ------------------------------------------------------------------
            for folder_path in staged_dirs:
                if not isinstance(folder_path, str):
                    continue
@ -863,7 +808,6 @@ async def commit_staged_filesystem_state(
                    tcid = staged_dir_tool_calls.get(folder_path)
                    action_id = _action_id_for(tcid)
                    if action_id is not None:
                        # Re-read the folder for the snapshot.
                        result = await session.execute(
                            select(Folder).where(Folder.id == folder_id)
                        )
@ -878,16 +822,13 @@ async def commit_staged_filesystem_state(
                                deferred_dispatches=deferred_dispatches,
                            )
-            # ------------------------------------------------------------------
+            # 2. pending_moves (snapshot pre-move for in-place restore on revert).
            # 2. pending_moves. Snapshot pre-move (in-place restore on revert).
            # ------------------------------------------------------------------
            for move in pending_moves:
                source = str(move.get("source") or "")
                if snapshot_enabled and source:
                    tcid = str(move.get("tool_call_id") or "")
                    action_id = _action_id_for(tcid)
                    if action_id is not None:
                        # Resolve the doc to snapshot BEFORE we mutate it.
                        doc_id_pre = doc_id_by_path.get(source)
                        document_pre: Document | None = None
                        if doc_id_pre is not None:
@ -937,10 +878,8 @@ async def commit_staged_filesystem_state(
                    path = move_alias[path]
                return path
-            # ------------------------------------------------------------------
+            # 3. dirty_paths -> writes/edits. Paths queued for rm this turn are
-            # 3. dirty_paths -> writes/edits. Skip any path queued for ``rm``
+            # skipped so a write+rm sequence doesn't recreate the doc.
            # this turn so a write+rm sequence doesn't recreate the doc.
            # ------------------------------------------------------------------
            kb_dirty_seen: set[str] = set()
            kb_dirty: list[str] = []
            kb_dirty_origin: dict[str, str] = {}
@ -969,9 +908,7 @@ async def commit_staged_filesystem_state(
                    continue
                content = "\n".join(file_data.get("content") or [])
                doc_id = doc_id_by_path.get(path)
-                # Path ↔ tool_call_id binding: the dirty_paths list dedupes via
+                # Look up tool_call_id by final path or its pre-rename origin.
                # _add_unique_reducer, so we look up the latest tool_call_id by
                # path (or by the un-renamed origin).
                origin = kb_dirty_origin.get(path, path)
                tcid = dirty_path_tool_calls.get(path) or dirty_path_tool_calls.get(
                    origin
@ -979,12 +916,9 @@ async def commit_staged_filesystem_state(
                action_id = _action_id_for(tcid)
                if doc_id is None:
-                    # The in-memory ``doc_id_by_path`` is per-thread and starts
+                    # doc_id_by_path is per-thread and empty in a new chat, so a
-                    # empty in every new chat. If the agent writes to a path
+                    # write to a path already in the DB must update in place, not
-                    # that already exists in the DB (e.g. a previous chat's
+                    # INSERT (which would hit the path-derived unique hash).
                    # ``notes.md``), we must NOT try to INSERT — it would hit
                    # ``unique_identifier_hash`` (path-derived). Look up the
                    # existing doc and update it in place instead.
                    existing = await virtual_path_to_doc(
                        session,
                        search_space_id=search_space_id,
@ -1033,12 +967,9 @@ async def commit_staged_filesystem_state(
                            }
                        )
                else:
-                    # Fresh create. Wrap each create in a SAVEPOINT so a
+                    # Fresh create, wrapped in a SAVEPOINT so a residual
-                    # residual ``IntegrityError`` (e.g. a deployment that
+                    # IntegrityError (e.g. pre-migration-133 content_hash UNIQUE)
-                    # hasn't run migration 133 yet, where
+                    # rolls back only this create, not the whole turn.
                    # ``documents.content_hash`` still carries its legacy
                    # global UNIQUE constraint) rolls back only this one
                    # create instead of poisoning the whole turn.
                    placeholder_revision_id: int | None = None
                    if snapshot_enabled and action_id is not None:
                        placeholder_revision_id = await _snapshot_document_pre_create(
@ -1061,8 +992,7 @@ async def commit_staged_filesystem_state(
                        logger.warning(
                            "kb_persistence: skipping %s create: %s", path, exc
                        )
-                        # Roll back the placeholder revision since the create
+                        # Create never happened; drop its placeholder revision.
                        # never happened.
                        if placeholder_revision_id is not None:
                            await session.execute(
                                delete(DocumentRevision).where(
@ -1109,19 +1039,14 @@ async def commit_staged_filesystem_state(
                    )
                    tree_changed = True
-            # ------------------------------------------------------------------
+            # 4. pending_deletes -> rm. Strict: snapshot + DELETE share a
-            # 4. pending_deletes -> ``rm``. STRICT durability: snapshot + DELETE
+            # SAVEPOINT, so a failed snapshot rolls the delete back too.
            # share a SAVEPOINT. If the snapshot insert fails, the DELETE
            # rolls back too and we surface the error rather than silently
            # making the data irreversible.
            # ------------------------------------------------------------------
            for raw_path, tcid in file_delete_paths.items():
                final = _final_path(raw_path)
                if not final.startswith(DOCUMENTS_ROOT + "/"):
                    continue
                action_id = _action_id_for(tcid)
                # Resolve the doc.
                doc_id_for_delete = doc_id_by_path.get(final)
                document_to_delete: Document | None = None
                if doc_id_for_delete is not None:
@ -1150,7 +1075,6 @@ async def commit_staged_filesystem_state(
                try:
                    async with session.begin_nested():
                        # Strict: snapshot first; failure aborts the delete.
                        if snapshot_enabled and action_id is not None:
                            chunks = await _load_chunks_for_snapshot(
                                session, doc_id=doc_pk
@ -1179,10 +1103,7 @@ async def commit_staged_filesystem_state(
                    )
                    continue
-                # B1 — SAVEPOINT released. Defer the reversibility-flip
+                # Defer the reversibility flip until after the outer commit.
                # dispatch until AFTER the outer commit succeeds so we
                # never tell the UI a row is reversible if its snapshot
                # gets rolled back.
                if snapshot_enabled and action_id is not None:
                    deferred_dispatches.append(int(action_id))
@ -1201,11 +1122,8 @@ async def commit_staged_filesystem_state(
                )
                tree_changed = True
-            # ------------------------------------------------------------------
+            # 5. pending_dir_deletes -> rmdir. Strict, and re-checks emptiness
-            # 5. pending_dir_deletes -> ``rmdir``. STRICT durability + final
+            # against post-step-4 DB state.
            # emptiness check (after step 4's deletes have run, an "empty
            # mid-turn" directory really IS empty in DB now).
            # ------------------------------------------------------------------
            for raw_path, tcid in dir_delete_paths.items():
                final = _final_path(raw_path)
                if not final.startswith(DOCUMENTS_ROOT + "/"):
@ -1226,7 +1144,6 @@ async def commit_staged_filesystem_state(
                    )
                    continue
                # Re-check emptiness against in-DB state.
                docs_in_folder = await session.execute(
                    select(Document.id)
                    .where(Document.folder_id == folder_id)
@ -1291,10 +1208,7 @@ async def commit_staged_filesystem_state(
                    )
                    continue
-                # B1 — SAVEPOINT released. Defer the reversibility-flip
+                # Defer the reversibility flip until after the outer commit.
                # dispatch until AFTER the outer commit succeeds so we
                # never tell the UI a row is reversible if its snapshot
                # gets rolled back.
                if snapshot_enabled and action_id is not None:
                    deferred_dispatches.append(int(action_id))
@ -1314,18 +1228,13 @@ async def commit_staged_filesystem_state(
        logger.exception(
            "kb_persistence: commit failed (search_space=%s)", search_space_id
        )
-        # Outer commit raised — every SAVEPOINT-released change above
+        # Outer commit raised: everything above rolled back, so drop the
-        # (snapshots + reversibility flips) is now rolled back. Drop
+        # deferred dispatches.
        # the deferred SSE dispatches so the UI stays consistent with
        # durable state.
        deferred_dispatches.clear()
        return None
-    # Outer commit succeeded; flush deferred reversibility-flip
+    # Commit succeeded; flush deferred reversibility flips (de-duped, since
-    # dispatches now so the chat tool card can light up its Revert
+    # write-then-rm in one turn appends an id per snapshot site).
    # button without re-fetching ``GET /threads/.../actions``. De-dup
    # to avoid emitting the same id twice (e.g. write-then-rm in the
    # same turn dispatches once for each snapshot site).
    if deferred_dispatches and dispatch_events:
        for action_id in dict.fromkeys(deferred_dispatches):
            try:
@ -1371,9 +1280,8 @@ async def commit_staged_filesystem_state(
        p for p in files if isinstance(p, str) and _basename(p).startswith(_TEMP_PREFIX)
    ]
-    # Tombstone every committed-delete path so a stale ``state["files"]`` entry
+    # Tombstone committed-delete paths so a stale state["files"] entry can't
-    # (which als_info would otherwise interpret as content) cannot survive into
+    # survive into the next turn and make a now-empty folder look non-empty.
    # the next turn and make a now-empty folder look non-empty.
    deleted_file_paths = [
        str(payload.get("virtualPath") or "")
        for payload in committed_deletes
@ -1394,11 +1302,8 @@ async def commit_staged_filesystem_state(
        "dirty_path_tool_calls": {_CLEAR: True},
    }
-    # Emit one Receipt per committed mutation, folded into ``state['receipts']``
+    # One Receipt per committed mutation: ground truth (post-savepoint) for the
-    # via ``_list_append_reducer``. The receipts surface what actually committed
+    # orchestrator's <verification> teaching. KB writes have no public URL.
    # (post-savepoint) rather than what the LLM intended; the orchestrator uses
    # them as ground truth in the ``<verification>`` teaching. KB writes do not
    # have public verifiable URLs, so ``verifiable_url`` stays unset.
    receipts: list[Receipt] = []
    def _kb_receipt(
@ -1439,8 +1344,6 @@ async def commit_staged_filesystem_state(
            external_id=payload.get("id"),
        )
    for payload in applied_moves:
        # ``applied_moves`` rows carry the destination ``virtualPath`` because
        # the move has already landed in the DB by the time we reach this code.
        path = str(payload.get("virtualPath") or "")
        _kb_receipt(
            type="file",
@ -1480,9 +1383,7 @@ async def commit_staged_filesystem_state(
    if tree_changed:
        delta["tree_version"] = int(state_dict.get("tree_version") or 0) + 1
-    # Avoid 'unused' lint when turn_id_for_revision was only useful for
+    _ = turn_id_for_revision  # diagnostic-only; silence unused lint
    # diagnostic purposes inside the SAVEPOINT chain above.
    _ = turn_id_for_revision
    logger.info(
        "kb_persistence: commit (search_space=%s) creates=%d updates=%d "
@ -1536,9 +1437,33 @@ class KnowledgeBasePersistenceMiddleware(AgentMiddleware):  # type: ignore[type-
            search_space_id=self.search_space_id,
            created_by_id=self.created_by_id,
            filesystem_mode=self.filesystem_mode,
-            thread_id=self.thread_id,
+            thread_id=self._resolve_thread_id(),
        )
    def _resolve_thread_id(self) -> int | None:
        """Resolve the live thread id from the active ``RunnableConfig``.
        ``aafter_agent`` only receives a ``Runtime`` (which does NOT carry the
        config), so we read ``configurable.thread_id`` via
        :func:`langgraph.config.get_config` — the same node-context pattern used
        by ``BusyMutexMiddleware``. Resolving at runtime (rather than using the
        value captured at ``__init__``) lets one cached compiled graph commit
        staged writes against the correct thread across many chats. Falls back
        to the constructor value for legacy/test runtimes.
        """
        try:
            config = get_config()
        except Exception:
            config = None
        if isinstance(config, dict):
            value = (config.get("configurable") or {}).get("thread_id")
            if value is not None:
                try:
                    return int(value)
                except (TypeError, ValueError):
                    return None
        return self.thread_id
 __all__ = [
    "KnowledgeBasePersistenceMiddleware",
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/knowledge_priority.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/knowledge_priority.py
@ -4,8 +4,10 @@ from __future__ import annotations
 from langchain_core.language_models import BaseChatModel
-from app.agents.new_chat.filesystem_selection import FilesystemMode
+from app.agents.chat.multi_agent_chat.shared.filesystem_selection import FilesystemMode
-from app.agents.new_chat.middleware import KnowledgePriorityMiddleware
+from app.agents.chat.multi_agent_chat.shared.middleware.knowledge_search import (
    KnowledgePriorityMiddleware,
 )
 from app.services.llm_service import get_planner_llm
@ -17,7 +19,16 @@ def build_knowledge_priority_mw(
    available_connectors: list[str] | None,
    available_document_types: list[str] | None,
    mentioned_document_ids: list[int] | None,
    preinjection_enabled: bool = True,
 ) -> KnowledgePriorityMiddleware:
    """Build the KB priority middleware.
    When ``preinjection_enabled`` is False (the lazy default), the middleware
    runs in mentions-only mode: it skips the expensive planner LLM + embedding
    + hybrid search and only surfaces explicit @-mentions. The main agent is
    expected to pull relevant KB content on demand via the
    ``search_knowledge_base`` tool instead.
    """
    return KnowledgePriorityMiddleware(
        llm=llm,
        planner_llm=get_planner_llm(),
@ -27,4 +38,5 @@ def build_knowledge_priority_mw(
        available_document_types=available_document_types,
        mentioned_document_ids=mentioned_document_ids,
        inject_system_message=False,
        mentions_only=not preinjection_enabled,
    )
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/knowledge_tree/init.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/knowledge_tree/init.py
@ -0,0 +1,9 @@
 """Knowledge-tree middleware: <workspace_tree> injection, cloud only (impl + builder)."""
 from .builder import build_knowledge_tree_mw
 from .middleware import KnowledgeTreeMiddleware
 __all__ = [
    "KnowledgeTreeMiddleware",
    "build_knowledge_tree_mw",
 ]
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/knowledge_tree/builder.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/knowledge_tree/builder.py
@ -4,8 +4,9 @@ from __future__ import annotations
 from langchain_core.language_models import BaseChatModel
-from app.agents.new_chat.filesystem_selection import FilesystemMode
+from app.agents.chat.multi_agent_chat.shared.filesystem_selection import FilesystemMode
-from app.agents.new_chat.middleware import KnowledgeTreeMiddleware
+
 from .middleware import KnowledgeTreeMiddleware
 def build_knowledge_tree_mw(
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/knowledge_tree/middleware.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/knowledge_tree/middleware.py
@ -33,9 +33,11 @@ from langchain_core.messages import SystemMessage
 from langgraph.runtime import Runtime
 from sqlalchemy import select
-from app.agents.new_chat.filesystem_selection import FilesystemMode
+from app.agents.chat.multi_agent_chat.shared.filesystem_selection import FilesystemMode
-from app.agents.new_chat.filesystem_state import SurfSenseFilesystemState
+from app.agents.chat.multi_agent_chat.shared.state.filesystem_state import (
-from app.agents.new_chat.path_resolver import (
+    SurfSenseFilesystemState,
 )
 from app.agents.chat.runtime.path_resolver import (
    DOCUMENTS_ROOT,
    PathIndex,
    build_path_index,
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/memory/init.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/memory/init.py
@ -0,0 +1,5 @@
 """User/team memory injection middleware (main-agent only)."""
 from .builder import build_memory_mw
 __all__ = ["build_memory_mw"]
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/memory/builder.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/memory/builder.py
@ -2,9 +2,10 @@
 from __future__ import annotations
 from app.agents.new_chat.middleware import MemoryInjectionMiddleware
 from app.db import ChatVisibility
 from .middleware import MemoryInjectionMiddleware
 def build_memory_mw(
    *,
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/memory/middleware.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/memory/middleware.py
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/noop_injection/init.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/noop_injection/init.py
@ -0,0 +1,9 @@
 """Noop-injection middleware: provider-compat _noop tool (impl + builder)."""
 from .builder import build_noop_injection_mw
 from .middleware import NoopInjectionMiddleware
 __all__ = [
    "NoopInjectionMiddleware",
    "build_noop_injection_mw",
 ]
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/noop_injection/builder.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/noop_injection/builder.py
@ -2,10 +2,10 @@
 from __future__ import annotations
-from app.agents.new_chat.feature_flags import AgentFeatureFlags
+from app.agents.chat.multi_agent_chat.shared.feature_flags import AgentFeatureFlags
-from app.agents.new_chat.middleware import NoopInjectionMiddleware
+from app.agents.chat.multi_agent_chat.shared.middleware.flags import enabled
-from ..shared.flags import enabled
+from .middleware import NoopInjectionMiddleware
 def build_noop_injection_mw(flags: AgentFeatureFlags) -> NoopInjectionMiddleware | None:
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/noop_injection/middleware.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/noop_injection/middleware.py
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/otel_span/init.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/otel_span/init.py
@ -0,0 +1,9 @@
 """OTel-span middleware: spans on model and tool calls (impl + builder)."""
 from .builder import build_otel_mw
 from .middleware import OtelSpanMiddleware
 __all__ = [
    "OtelSpanMiddleware",
    "build_otel_mw",
 ]
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/otel_span/builder.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/otel_span/builder.py
@ -2,10 +2,10 @@
 from __future__ import annotations
-from app.agents.new_chat.feature_flags import AgentFeatureFlags
+from app.agents.chat.multi_agent_chat.shared.feature_flags import AgentFeatureFlags
-from app.agents.new_chat.middleware import OtelSpanMiddleware
+from app.agents.chat.multi_agent_chat.shared.middleware.flags import enabled
-from ..shared.flags import enabled
+from .middleware import OtelSpanMiddleware
 def build_otel_mw(flags: AgentFeatureFlags) -> OtelSpanMiddleware | None:
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/otel_span/middleware.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/otel_span/middleware.py
@ -24,6 +24,7 @@ from langchain.agents.middleware import AgentMiddleware
 from langchain_core.messages import AIMessage, ToolMessage
 from app.observability import metrics as ot_metrics, otel as ot
 from app.utils.perf import get_perf_logger
 if TYPE_CHECKING:  # pragma: no cover — type-only
    from langchain.agents.middleware.types import (
@ -34,6 +35,7 @@ if TYPE_CHECKING:  # pragma: no cover — type-only
    from langgraph.types import Command
 logger = logging.getLogger(__name__)
 _perf_log = get_perf_logger()
 class OtelSpanMiddleware(AgentMiddleware):
@ -60,7 +62,23 @@ class OtelSpanMiddleware(AgentMiddleware):
        handler: Callable[[ModelRequest], Awaitable[ModelResponse | AIMessage | Any]],
    ) -> ModelResponse | AIMessage | Any:
        if not ot.is_enabled():
-            return await handler(request)
+            # Always emit a [PERF] line for the model step even when OTel is
            # disabled. This isolates provider/model latency from the agent's
            # pre-flight (before_agent KB-priority/memory/tree) work, which is
            # the usual culprit when the multi-agent path feels slow to start.
            # ``perf_counter`` at entry doubles as the "before_agent finished /
            # model call started" marker on the first step of a turn.
            model_id, _provider = _resolve_model_attrs(request)
            _t0 = time.perf_counter()
            _perf_log.info("[model_call] start model=%s", model_id)
            try:
                return await handler(request)
            finally:
                _perf_log.info(
                    "[model_call] done model=%s elapsed=%.3fs",
                    model_id,
                    time.perf_counter() - _t0,
                )
        model_id, provider = _resolve_model_attrs(request)
        t0 = time.perf_counter()
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/plugins.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/plugins.py
@ -7,15 +7,15 @@ from typing import Any
 from langchain_core.language_models import BaseChatModel
-from app.agents.new_chat.feature_flags import AgentFeatureFlags
+from app.agents.chat.multi_agent_chat.shared.feature_flags import AgentFeatureFlags
-from app.agents.new_chat.plugin_loader import (
+from app.agents.chat.multi_agent_chat.shared.middleware.flags import enabled
 from app.db import ChatVisibility
 from ..plugins.loader import (
    PluginContext,
    load_allowed_plugin_names_from_env,
    load_plugin_middlewares,
 )
 from app.db import ChatVisibility
 from ..shared.flags import enabled
 def build_plugin_middlewares(
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/skills.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/skills.py
@ -6,14 +6,11 @@ import logging
 from deepagents.middleware.skills import SkillsMiddleware
-from app.agents.new_chat.feature_flags import AgentFeatureFlags
+from app.agents.chat.multi_agent_chat.shared.feature_flags import AgentFeatureFlags
-from app.agents.new_chat.filesystem_selection import FilesystemMode
+from app.agents.chat.multi_agent_chat.shared.filesystem_selection import FilesystemMode
-from app.agents.new_chat.middleware import (
+from app.agents.chat.multi_agent_chat.shared.middleware.flags import enabled
    build_skills_backend_factory,
    default_skills_sources,
 )
-from ..shared.flags import enabled
+from ..skills.backends import build_skills_backend_factory, default_skills_sources
 def build_skills_mw(
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/stack.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/stack.py
@ -0,0 +1,314 @@
 """Main-agent middleware list assembly: one line per slot.
 The main agent is a pure router — filesystem reads/writes are owned by the
 ``knowledge_base`` subagent and delegated via the ``task`` tool. The stack
 here only renders KB context (workspace tree + priority docs), projects it
 into system messages, and commits any subagent-side staged writes at end of
 turn (cloud mode).
 """
 from __future__ import annotations
 import logging
 import time
 from collections.abc import Sequence
 from typing import Any, cast
 from deepagents import SubAgent
 from deepagents.backends import StateBackend
 from langchain.agents import create_agent
 from langchain_core.language_models import BaseChatModel
 from langchain_core.runnables import Runnable
 from langchain_core.tools import BaseTool
 from langgraph.types import Checkpointer
 from app.agents.chat.multi_agent_chat.main_agent.middleware.memory import (
    build_memory_mw,
 )
 from app.agents.chat.multi_agent_chat.shared.feature_flags import AgentFeatureFlags
 from app.agents.chat.multi_agent_chat.shared.filesystem_selection import FilesystemMode
 from app.agents.chat.multi_agent_chat.shared.middleware.anthropic_cache import (
    build_anthropic_cache_mw,
 )
 from app.agents.chat.multi_agent_chat.shared.middleware.compaction import (
    build_compaction_mw,
 )
 from app.agents.chat.multi_agent_chat.shared.middleware.kb_context_projection import (
    build_kb_context_projection_mw,
 )
 from app.agents.chat.multi_agent_chat.shared.middleware.patch_tool_calls import (
    build_patch_tool_calls_mw,
 )
 from app.agents.chat.multi_agent_chat.shared.middleware.resilience import (
    build_resilience_middlewares,
 )
 from app.agents.chat.multi_agent_chat.shared.middleware.todos import build_todos_mw
 from app.agents.chat.multi_agent_chat.shared.permissions import (
    build_permission_mw,
 )
 from app.agents.chat.multi_agent_chat.subagents import (
    build_subagents,
    get_subagents_to_exclude,
 )
 from app.agents.chat.multi_agent_chat.subagents.builtins.knowledge_base.agent import (
    NAME as KB_WRITE_NAME,
    READONLY_NAME as KB_READONLY_NAME,
    build_readonly_subagent as build_kb_readonly_subagent,
    build_subagent as build_kb_write_subagent,
 )
 from app.agents.chat.multi_agent_chat.subagents.builtins.knowledge_base.ask_knowledge_base_tool import (
    build_ask_knowledge_base_tool,
 )
 from app.agents.chat.multi_agent_chat.subagents.builtins.knowledge_base.prompts import (
    load_description as load_kb_write_description,
 )
 from app.agents.chat.multi_agent_chat.subagents.middleware_stack import (
    build_subagent_middleware_stack,
 )
 from app.agents.chat.multi_agent_chat.subagents.shared.spec import (
    SURF_LAZY_SPEC_FACTORY_KEY,
 )
 from app.db import ChatVisibility
 from app.utils.perf import get_perf_logger
 from .action_log import build_action_log_mw
 from .anonymous_document import build_anonymous_doc_mw
 from .busy_mutex import build_busy_mutex_mw
 from .checkpointed_subagent_middleware import (
    SurfSenseCheckpointedSubAgentMiddleware,
 )
 from .checkpointed_subagent_middleware.task_description import (
    TASK_TOOL_DESCRIPTION,
 )
 from .context_editing import build_context_editing_mw
 from .dedup_hitl import build_dedup_hitl_mw
 from .doom_loop import build_doom_loop_mw
 from .kb_persistence import build_kb_persistence_mw
 from .knowledge_priority import build_knowledge_priority_mw
 from .knowledge_tree import build_knowledge_tree_mw
 from .noop_injection import build_noop_injection_mw
 from .otel_span import build_otel_mw
 from .plugins import build_plugin_middlewares
 from .skills import build_skills_mw
 from .tool_call_repair import build_repair_mw
 _perf_log = get_perf_logger()
 def build_main_agent_deepagent_middleware(
    *,
    llm: BaseChatModel,
    tools: Sequence[BaseTool],
    backend_resolver: Any,
    filesystem_mode: FilesystemMode,
    search_space_id: int,
    user_id: str | None,
    thread_id: int | None,
    visibility: ChatVisibility,
    anon_session_id: str | None,
    available_connectors: list[str] | None,
    available_document_types: list[str] | None,
    mentioned_document_ids: list[int] | None,
    max_input_tokens: int | None,
    flags: AgentFeatureFlags,
    subagent_dependencies: dict[str, Any],
    checkpointer: Checkpointer,
    mcp_tools_by_agent: dict[str, list[BaseTool]] | None = None,
    disabled_tools: list[str] | None = None,
 ) -> list[Any]:
    """Ordered middleware for ``create_agent`` (None entries already stripped)."""
    stack_build_start = time.perf_counter()
    resilience = build_resilience_middlewares(flags)
    memory_mw = build_memory_mw(
        user_id=user_id,
        search_space_id=search_space_id,
        visibility=visibility,
    )
    subagent_dependencies = {
        **subagent_dependencies,
        "backend_resolver": backend_resolver,
        "filesystem_mode": filesystem_mode,
        "flags": flags,
    }
    shared_mw_start = time.perf_counter()
    shared_subagent_middleware = build_subagent_middleware_stack(
        resilience=resilience,
        flags=flags,
    )
    shared_mw_elapsed = time.perf_counter() - shared_mw_start
    def _compile_kb_readonly() -> Runnable:
        """Build *and* compile the read-only KB graph on first ``ask_knowledge_base`` use.
        Both the spec build (``build_kb_readonly_subagent`` — middleware +
        tool-schema construction, ~the same cost as one regular subagent) and
        the ``create_agent`` compile are deferred here (memoized by
        ``build_ask_knowledge_base_tool``) so neither is paid on the cold
        agent-build / TTFT path; most first turns never call a subagent.
        """
        build_start = time.perf_counter()
        kb_readonly_spec = build_kb_readonly_subagent(
            dependencies=subagent_dependencies,
            model=llm,
            middleware_stack=shared_subagent_middleware,
        ).spec
        runnable = create_agent(
            llm,
            system_prompt=kb_readonly_spec["system_prompt"],
            tools=kb_readonly_spec["tools"],
            middleware=kb_readonly_spec["middleware"],
            name=KB_READONLY_NAME,
            checkpointer=checkpointer,
        )
        _perf_log.info(
            "[subagent_compile_lazy] name=%s (spec+compile) in %.3fs",
            KB_READONLY_NAME,
            time.perf_counter() - build_start,
        )
        return runnable
    ask_kb_tool = build_ask_knowledge_base_tool(_compile_kb_readonly)
    def _build_kb_write_spec() -> dict[str, Any]:
        """Build the *write* knowledge_base subagent spec on first ``task`` use.
        The KB filesystem middleware builds ~13 tool schemas at ~150ms each
        (~2s total), all of which used to land on the cold agent-build / TTFT
        path even though ``task("knowledge_base")`` is essentially never the
        first thing a turn does. Deferring the whole spec build here (memoized
        by the checkpointed subagent middleware) moves that cost to the first
        actual KB-write delegation. Captures the same ``subagent_dependencies``
        the eager build would have used, so cross-thread cache behaviour is
        unchanged.
        """
        spec = build_kb_write_subagent(
            dependencies=subagent_dependencies,
            model=llm,
            middleware_stack=shared_subagent_middleware,
        ).spec
        if disabled_tools:
            disabled = frozenset(disabled_tools)
            tools = spec.get("tools")  # type: ignore[typeddict-item]
            if isinstance(tools, list):
                spec["tools"] = [  # type: ignore[typeddict-unknown-key]
                    t for t in tools if getattr(t, "name", None) not in disabled
                ]
        return cast(dict[str, Any], spec)
    subagents_start = time.perf_counter()
    # The write knowledge_base subagent is excluded from the eager build and
    # registered as a lazy descriptor (name + description cheap; spec built on
    # first ``task("knowledge_base")`` use) — see ``_build_kb_write_spec``.
    exclude_names = [*get_subagents_to_exclude(available_connectors), KB_WRITE_NAME]
    subagents: list[SubAgent] = build_subagents(
        dependencies=subagent_dependencies,
        model=llm,
        middleware_stack=shared_subagent_middleware,
        mcp_tools_by_agent=mcp_tools_by_agent or {},
        exclude=exclude_names,
        disabled_tools=disabled_tools,
        ask_kb_tool=ask_kb_tool,
    )
    kb_write_descriptor = cast(
        SubAgent,
        {
            "name": KB_WRITE_NAME,
            "description": load_kb_write_description(),
            SURF_LAZY_SPEC_FACTORY_KEY: _build_kb_write_spec,
        },
    )
    subagents.append(kb_write_descriptor)
    subagents_elapsed = time.perf_counter() - subagents_start
    logging.debug("Subagents registry: %s", [s["name"] for s in subagents])
    assembly_start = time.perf_counter()
    stack: list[Any] = [
        build_busy_mutex_mw(flags),
        build_otel_mw(flags),
        build_todos_mw(system_prompt=""),
        memory_mw,
        build_anonymous_doc_mw(
            filesystem_mode=filesystem_mode, anon_session_id=anon_session_id
        ),
        build_knowledge_tree_mw(
            filesystem_mode=filesystem_mode,
            search_space_id=search_space_id,
            llm=llm,
        ),
        build_knowledge_priority_mw(
            llm=llm,
            search_space_id=search_space_id,
            filesystem_mode=filesystem_mode,
            available_connectors=available_connectors,
            available_document_types=available_document_types,
            mentioned_document_ids=mentioned_document_ids,
            preinjection_enabled=flags.enable_kb_priority_preinjection,
        ),
        build_kb_context_projection_mw(),
        build_kb_persistence_mw(
            filesystem_mode=filesystem_mode,
            search_space_id=search_space_id,
            user_id=user_id,
            thread_id=thread_id,
        ),
        build_skills_mw(
            flags=flags,
            filesystem_mode=filesystem_mode,
            search_space_id=search_space_id,
        ),
        SurfSenseCheckpointedSubAgentMiddleware(
            checkpointer=checkpointer,
            backend=StateBackend,
            subagents=subagents,
            system_prompt=None,
            task_description=TASK_TOOL_DESCRIPTION,
            search_space_id=search_space_id,
        ),
        resilience.model_call_limit,
        resilience.tool_call_limit,
        build_context_editing_mw(
            flags=flags,
            max_input_tokens=max_input_tokens,
            tools=tools,
            backend_resolver=backend_resolver,
        ),
        build_compaction_mw(llm),
        build_noop_injection_mw(flags),
        resilience.retry,
        resilience.fallback,
        build_repair_mw(flags=flags, tools=tools),
        build_permission_mw(flags=flags),
        build_doom_loop_mw(flags),
        build_action_log_mw(
            flags=flags,
            thread_id=thread_id,
            search_space_id=search_space_id,
            user_id=user_id,
        ),
        build_patch_tool_calls_mw(),
        build_dedup_hitl_mw(tools),
        *build_plugin_middlewares(
            flags=flags,
            search_space_id=search_space_id,
            user_id=user_id,
            visibility=visibility,
            llm=llm,
        ),
        build_anthropic_cache_mw(),
    ]
    result = [m for m in stack if m is not None]
    assembly_elapsed = time.perf_counter() - assembly_start
    _perf_log.info(
        "[stack_build] total=%.3fs shared_subagent_mw=%.3fs "
        "build_subagents=%.3fs stack_assembly=%.3fs subagents=%d mw=%d "
        "(kb_readonly deferred to first ask_knowledge_base)",
        time.perf_counter() - stack_build_start,
        shared_mw_elapsed,
        subagents_elapsed,
        assembly_elapsed,
        len(subagents),
        len(result),
    )
    return result
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/tool_call_repair/init.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/tool_call_repair/init.py
@ -0,0 +1,9 @@
 """Tool-call-repair middleware: fix miscased/unknown tool names (impl + builder)."""
 from .builder import build_repair_mw
 from .middleware import ToolCallNameRepairMiddleware
 __all__ = [
    "ToolCallNameRepairMiddleware",
    "build_repair_mw",
 ]
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/tool_call_repair/builder.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/tool_call_repair/builder.py
@ -6,10 +6,10 @@ from collections.abc import Sequence
 from langchain_core.tools import BaseTool
-from app.agents.new_chat.feature_flags import AgentFeatureFlags
+from app.agents.chat.multi_agent_chat.shared.feature_flags import AgentFeatureFlags
-from app.agents.new_chat.middleware import ToolCallNameRepairMiddleware
+from app.agents.chat.multi_agent_chat.shared.middleware.flags import enabled
-from ..shared.flags import enabled
+from .middleware import ToolCallNameRepairMiddleware
 # deepagents-built-in tool names the repair pass treats as known.
 _DEEPAGENT_BUILTIN_TOOL_NAMES: frozenset[str] = frozenset(
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/tool_call_repair/middleware.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/tool_call_repair/middleware.py
@ -34,8 +34,6 @@ from langchain.agents.middleware.types import (
 from langchain_core.messages import AIMessage
 from langgraph.runtime import Runtime
 from app.agents.new_chat.tools.invalid_tool import INVALID_TOOL_NAME
 logger = logging.getLogger(__name__)
@ -120,6 +118,12 @@ class ToolCallNameRepairMiddleware(
                return call
        # Stage 2 — invalid fallback
        # Local import keeps the middleware module import-light and avoids any
        # tools <-> middleware import-order coupling at module scope.
        from app.agents.chat.multi_agent_chat.main_agent.tools.invalid_tool import (
            INVALID_TOOL_NAME,
        )
        if INVALID_TOOL_NAME in registered:
            original_args = call.get("args") or {}
            error_msg = (
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/plugins/init.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/plugins/init.py
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/plugins/loader.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/plugins/loader.py
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/plugins/year_substituter.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/plugins/year_substituter.py
@ -17,7 +17,7 @@ Wire-up in ``pyproject.toml`` (illustrative; the in-repo plugin doesn't
 need this -- it's already on the import path)::
    [project.entry-points."surfsense.plugins"]
-    year_substituter = "app.agents.new_chat.plugins.year_substituter:make_middleware"
+    year_substituter = "app.agents.chat.multi_agent_chat.main_agent.plugins.year_substituter:make_middleware"
 """
 from __future__ import annotations
@ -34,7 +34,7 @@ if TYPE_CHECKING:  # pragma: no cover - type-only
    from langchain_core.messages import ToolMessage
    from langgraph.types import Command
-    from app.agents.new_chat.plugin_loader import PluginContext
+    from .loader import PluginContext
 logger = logging.getLogger(__name__)
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/runtime/init.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/runtime/init.py
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/runtime/agent_cache.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/runtime/agent_cache.py
@ -10,18 +10,18 @@ from langchain_core.language_models import BaseChatModel
 from langchain_core.tools import BaseTool
 from langgraph.types import Checkpointer
-from app.agents.new_chat.agent_cache import (
+from app.agents.chat.multi_agent_chat.shared.feature_flags import AgentFeatureFlags
 from app.agents.chat.multi_agent_chat.shared.filesystem_selection import FilesystemMode
 from app.db import ChatVisibility
 from ..graph.compile_graph_sync import build_compiled_agent_graph_sync
 from .agent_cache_store import (
    flags_signature,
    get_cache,
    stable_hash,
    system_prompt_hash,
    tools_signature,
 )
 from app.agents.new_chat.feature_flags import AgentFeatureFlags
 from app.agents.new_chat.filesystem_selection import FilesystemMode
 from app.db import ChatVisibility
 from ..graph.compile_graph_sync import build_compiled_agent_graph_sync
 def mcp_signature(mcp_tools_by_agent: dict[str, list[BaseTool]]) -> str:
@ -91,10 +91,18 @@ async def build_agent_with_cache(
    # Every per-request value any middleware closes over at __init__ must be in
    # the key, otherwise a hit will leak state across threads. Bump the schema
    # version when the component list changes shape.
    #
    # Cross-thread reuse: when enabled, ``thread_id`` is dropped from the key so
    # one compiled graph serves all of a user's (same space/config/visibility)
    # chats. This is only safe because ActionLog, KB-persistence, and the
    # deliverables tools now resolve the chat thread from the live
    # RunnableConfig instead of a constructor closure; the schema tag is bumped
    # so v2 (per-thread) entries are never confused with v3 (shared) ones.
    cross_thread = flags.enable_cross_thread_agent_cache
    cache_key = stable_hash(
-        "multi-agent-v2",
+        "multi-agent-v3" if cross_thread else "multi-agent-v2",
        config_id,
-        thread_id,
+        None if cross_thread else thread_id,
        user_id,
        search_space_id,
        visibility,
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/runtime/agent_cache_store.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/runtime/agent_cache_store.py
@ -67,13 +67,13 @@ from __future__ import annotations
 import asyncio
 import hashlib
 import logging
 import os
 import time
 from collections import OrderedDict
 from collections.abc import Awaitable, Callable
 from dataclasses import dataclass
 from typing import Any
 from app.config import config
 from app.utils.perf import get_perf_logger
 logger = logging.getLogger(__name__)
@ -113,12 +113,11 @@ def tools_signature(
      MCP tools loaded for the user changes, gating rules flip, etc.).
    * The available connectors / document types for the search space
      change (new connector added, last connector removed, new document
-      type indexed). Because :func:`get_connector_gated_tools` derives
+      type indexed). Connector gating derives disabled tools from
-      ``modified_disabled_tools`` from ``available_connectors``, the
+      ``available_connectors``, so the tool surface is technically already
-      tool surface is technically already covered — but we hash the
+      covered — but we hash the connector list separately so an empty-list
-      connector list separately so an empty-list "no tools changed"
+      "no tools changed" situation still rotates the key when, say, the user
-      situation still rotates the key when, say, the user re-adds a
+      re-adds a connector that gates a tool we were already not exposing.
      connector that gates a tool we were already not exposing.
    Stays stable across:
@ -329,8 +328,8 @@ def _short(key: str, n: int = 16) -> str:
 # Module-level singleton
 # ---------------------------------------------------------------------------
-_DEFAULT_MAXSIZE = int(os.getenv("SURFSENSE_AGENT_CACHE_MAXSIZE", "256"))
+_DEFAULT_MAXSIZE = config.AGENT_CACHE_MAXSIZE
-_DEFAULT_TTL = float(os.getenv("SURFSENSE_AGENT_CACHE_TTL_SECONDS", "1800"))
+_DEFAULT_TTL = config.AGENT_CACHE_TTL_SECONDS
 _cache: _AgentCache = _AgentCache(maxsize=_DEFAULT_MAXSIZE, ttl_seconds=_DEFAULT_TTL)
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/runtime/connector_searchable_types.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/runtime/connector_searchable_types.py
@ -0,0 +1,100 @@
 """Map configured connectors to the searchable document/connector types.
 This is agent-agnostic infrastructure shared by every agent factory (single-
 and multi-agent). It translates the connectors a search space has enabled into
 the set of searchable type strings that pre-search middleware and ``web_search``
 understand, and always layers in the document types that exist independently of
 any connector (uploads, notes, extension captures, YouTube).
 It lives in its own module — rather than inside a specific agent factory — so
 that retiring or moving any single agent never disturbs the others' access to
 this mapping.
 """
 from __future__ import annotations
 from typing import Any
 # Maps SearchSourceConnectorType enum values to the searchable document/connector types
 # used by pre-search middleware and web_search.
 # Live search connectors (TAVILY_API, LINKUP_API, BAIDU_SEARCH_API) are routed to
 # the web_search tool; all others are considered local/indexed data.
 _CONNECTOR_TYPE_TO_SEARCHABLE: dict[str, str] = {
    # Live search connectors (handled by web_search tool)
    "TAVILY_API": "TAVILY_API",
    "LINKUP_API": "LINKUP_API",
    "BAIDU_SEARCH_API": "BAIDU_SEARCH_API",
    # Local/indexed connectors (handled by KB pre-search middleware)
    "SLACK_CONNECTOR": "SLACK_CONNECTOR",
    "TEAMS_CONNECTOR": "TEAMS_CONNECTOR",
    "NOTION_CONNECTOR": "NOTION_CONNECTOR",
    "GITHUB_CONNECTOR": "GITHUB_CONNECTOR",
    "LINEAR_CONNECTOR": "LINEAR_CONNECTOR",
    "DISCORD_CONNECTOR": "DISCORD_CONNECTOR",
    "JIRA_CONNECTOR": "JIRA_CONNECTOR",
    "CONFLUENCE_CONNECTOR": "CONFLUENCE_CONNECTOR",
    "CLICKUP_CONNECTOR": "CLICKUP_CONNECTOR",
    "GOOGLE_CALENDAR_CONNECTOR": "GOOGLE_CALENDAR_CONNECTOR",
    "GOOGLE_GMAIL_CONNECTOR": "GOOGLE_GMAIL_CONNECTOR",
    "GOOGLE_DRIVE_CONNECTOR": "GOOGLE_DRIVE_FILE",  # Connector type differs from document type
    "AIRTABLE_CONNECTOR": "AIRTABLE_CONNECTOR",
    "LUMA_CONNECTOR": "LUMA_CONNECTOR",
    "ELASTICSEARCH_CONNECTOR": "ELASTICSEARCH_CONNECTOR",
    "WEBCRAWLER_CONNECTOR": "CRAWLED_URL",  # Maps to document type
    "BOOKSTACK_CONNECTOR": "BOOKSTACK_CONNECTOR",
    "CIRCLEBACK_CONNECTOR": "CIRCLEBACK",  # Connector type differs from document type
    "OBSIDIAN_CONNECTOR": "OBSIDIAN_CONNECTOR",
    "DROPBOX_CONNECTOR": "DROPBOX_FILE",  # Connector type differs from document type
    "ONEDRIVE_CONNECTOR": "ONEDRIVE_FILE",  # Connector type differs from document type
    # Composio connectors (unified to native document types).
    # Reverse of NATIVE_TO_LEGACY_DOCTYPE in app.db.
    "COMPOSIO_GOOGLE_DRIVE_CONNECTOR": "GOOGLE_DRIVE_FILE",
    "COMPOSIO_GMAIL_CONNECTOR": "GOOGLE_GMAIL_CONNECTOR",
    "COMPOSIO_GOOGLE_CALENDAR_CONNECTOR": "GOOGLE_CALENDAR_CONNECTOR",
 }
 # Document types that don't come from SearchSourceConnector but should always be searchable
 _ALWAYS_AVAILABLE_DOC_TYPES: list[str] = [
    "EXTENSION",  # Browser extension data
    "FILE",  # Uploaded files
    "NOTE",  # User notes
    "YOUTUBE_VIDEO",  # YouTube videos
 ]
 def map_connectors_to_searchable_types(
    connector_types: list[Any],
 ) -> list[str]:
    """
    Map SearchSourceConnectorType enums to searchable document/connector types.
    This function:
    1. Converts connector type enums to their searchable counterparts
    2. Includes always-available document types (EXTENSION, FILE, NOTE, YOUTUBE_VIDEO)
    3. Deduplicates while preserving order
    Args:
        connector_types: List of SearchSourceConnectorType enum values
    Returns:
        List of searchable connector/document type strings
    """
    result_set: set[str] = set()
    result_list: list[str] = []
    # Add always-available document types first
    for doc_type in _ALWAYS_AVAILABLE_DOC_TYPES:
        if doc_type not in result_set:
            result_set.add(doc_type)
            result_list.append(doc_type)
    # Map each connector type to its searchable equivalent
    for ct in connector_types:
        # Handle both enum and string types
        ct_str = ct.value if hasattr(ct, "value") else str(ct)
        searchable = _CONNECTOR_TYPE_TO_SEARCHABLE.get(ct_str)
        if searchable and searchable not in result_set:
            result_set.add(searchable)
            result_list.append(searchable)
    return result_list
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/runtime/factory.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/runtime/factory.py
@ -12,21 +12,28 @@ from langchain_core.tools import BaseTool
 from langgraph.types import Checkpointer
 from sqlalchemy.ext.asyncio import AsyncSession
-from app.agents.multi_agent_chat.subagents import (
+from app.agents.chat.multi_agent_chat.shared.feature_flags import (
    AgentFeatureFlags,
    get_flags,
 )
 from app.agents.chat.multi_agent_chat.shared.filesystem_selection import (
    FilesystemMode,
    FilesystemSelection,
 )
 from app.agents.chat.multi_agent_chat.shared.middleware.filesystem.backends.resolver import (
    build_backend_resolver,
 )
 from app.agents.chat.multi_agent_chat.subagents import (
    get_subagents_to_exclude,
    main_prompt_registry_subagent_lines,
 )
-from app.agents.multi_agent_chat.subagents.mcp_tools.index import (
+from app.agents.chat.multi_agent_chat.subagents.mcp_tools.index import (
    load_mcp_tools_by_connector,
 )
-from app.agents.new_chat.chat_deepagent import _map_connectors_to_searchable_types
+from app.agents.chat.runtime.llm_config import AgentConfig
-from app.agents.new_chat.feature_flags import AgentFeatureFlags, get_flags
+from app.agents.chat.runtime.prompt_caching import (
-from app.agents.new_chat.filesystem_backends import build_backend_resolver
+    apply_litellm_prompt_caching,
-from app.agents.new_chat.filesystem_selection import FilesystemMode, FilesystemSelection
+)
 from app.agents.new_chat.llm_config import AgentConfig
 from app.agents.new_chat.prompt_caching import apply_litellm_prompt_caching
 from app.agents.new_chat.tools.invalid_tool import INVALID_TOOL_NAME, invalid_tool
 from app.agents.new_chat.tools.registry import build_tools_async
 from app.db import ChatVisibility
 from app.services.connector_service import ConnectorService
 from app.services.user_tool_allowlist import (
@ -40,7 +47,10 @@ from ..tools import (
    MAIN_AGENT_SURFSENSE_TOOL_NAMES,
    MAIN_AGENT_SURFSENSE_TOOL_NAMES_ORDERED,
 )
 from ..tools.invalid_tool import INVALID_TOOL_NAME, invalid_tool
 from ..tools.registry import build_main_agent_tools
 from .agent_cache import build_agent_with_cache
 from .connector_searchable_types import map_connectors_to_searchable_types
 _perf_log = get_perf_logger()
@ -90,7 +100,7 @@ async def create_multi_agent_chat_deep_agent(
        connector_types = await connector_service.get_available_connectors(
            search_space_id
        )
-        available_connectors = _map_connectors_to_searchable_types(connector_types)
+        available_connectors = map_connectors_to_searchable_types(connector_types)
        available_document_types = await connector_service.get_available_document_types(
            search_space_id
@ -199,9 +209,6 @@ async def create_multi_agent_chat_deep_agent(
    modified_disabled_tools = list(disabled_tools) if disabled_tools else []
    if "search_knowledge_base" not in modified_disabled_tools:
        modified_disabled_tools.append("search_knowledge_base")
    if enabled_tools is not None:
        main_agent_enabled_tools = [
            n for n in enabled_tools if n in MAIN_AGENT_SURFSENSE_TOOL_NAMES
@ -210,12 +217,14 @@ async def create_multi_agent_chat_deep_agent(
        main_agent_enabled_tools = list(MAIN_AGENT_SURFSENSE_TOOL_NAMES_ORDERED)
    _t0 = time.perf_counter()
-    tools = await build_tools_async(
+    # Main agent builds only its own small SurfSense toolset via the SRP
    # main-agent registry; connectors/MCP/deliverables are delegated to
    # subagents, so no MCP loading or connector construction happens here.
    tools = build_main_agent_tools(
        dependencies=dependencies,
        enabled_tools=main_agent_enabled_tools,
        disabled_tools=modified_disabled_tools,
        additional_tools=list(additional_tools) if additional_tools else None,
        include_mcp_tools=False,
    )
    _flags: AgentFeatureFlags = get_flags()
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/skills/init.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/skills/init.py
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/skills/backends.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/skills/backends.py
@ -16,7 +16,7 @@ prompt at agent build time, not edited at runtime.
 Two backends are provided:
 * :class:`BuiltinSkillsBackend` — disk-backed read of bundled skills from
-  ``app/agents/new_chat/skills/builtin/``.
+  ``app/agents/shared/skills/builtin/``.
 * :class:`SearchSpaceSkillsBackend` — a thin read-only wrapper over
  :class:`KBPostgresBackend` that filters notes under the privileged folder
  ``/documents/_skills/``.
@ -47,7 +47,9 @@ from deepagents.backends.state import StateBackend
 if TYPE_CHECKING:
    from langchain.tools import ToolRuntime
-    from app.agents.new_chat.middleware.kb_postgres_backend import KBPostgresBackend
+    from app.agents.chat.multi_agent_chat.shared.middleware.filesystem.backends.kb_postgres import (
        KBPostgresBackend,
    )
 logger = logging.getLogger(__name__)
@ -59,9 +61,10 @@ _MAX_SKILL_FILE_SIZE = 10 * 1024 * 1024
 def _default_builtin_root() -> Path:
    """Return the absolute path to the bundled builtin skills directory.
-    Located at ``app/agents/new_chat/skills/builtin/`` relative to this module.
+    Located at ``builtin/`` next to this module (this module lives at
    ``app/agents/multi_agent_chat/main_agent/skills/backends.py``).
    """
-    return (Path(__file__).resolve().parent.parent / "skills" / "builtin").resolve()
+    return (Path(__file__).resolve().parent / "builtin").resolve()
 class BuiltinSkillsBackend(BackendProtocol):
@ -121,6 +124,8 @@ class BuiltinSkillsBackend(BackendProtocol):
            else ("/" + str(target.relative_to(self.root)).replace("\\", "/"))
        )
        for child in sorted(target.iterdir()):
            if child.name == "__pycache__" or child.name.startswith("."):
                continue
            child_virtual = (
                target_virtual.rstrip("/") + "/" + child.name
                if target_virtual != "/"
@ -305,7 +310,7 @@ def build_skills_backend_factory(
        # Imported lazily to avoid a hard dependency at module import time:
        # ``KBPostgresBackend`` pulls in DB models, which are unnecessary for
        # the unit-tested builtin path.
-        from app.agents.new_chat.middleware.kb_postgres_backend import (
+        from app.agents.chat.multi_agent_chat.shared.middleware.filesystem.backends.kb_postgres import (
            KBPostgresBackend,
        )
--- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/providers/init.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/providers/init.py
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/skills/builtin/email-drafting/SKILL.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/skills/builtin/email-drafting/SKILL.md
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/skills/builtin/kb-research/SKILL.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/skills/builtin/kb-research/SKILL.md
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/skills/builtin/meeting-prep/SKILL.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/skills/builtin/meeting-prep/SKILL.md
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/skills/builtin/report-writing/SKILL.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/skills/builtin/report-writing/SKILL.md
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/skills/builtin/slack-summary/SKILL.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/skills/builtin/slack-summary/SKILL.md
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/init.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/init.py
--- a/Show more
+++ b/Show more