Merge pull request #1377 from AnishSarkar22/feat/e2e-testing-ci

feat: add E2E CI and harden Docker build migrations
2026-05-17 18:35:19 +02:00 · 2026-05-15 04:47:26 -07:00 · 2026-05-15 04:47:26 -07:00 · 4db3cf7fd5
commit 4db3cf7fd5
parent e8aad48ddf 883c72396c
45 changed files with 1733 additions and 495 deletions
--- a/.github/workflows/docker-build.yml
+++ b/.github/workflows/docker-build.yml
@ -31,7 +31,7 @@ jobs:
      new_tag: ${{ steps.tag_version.outputs.next_version }}
    steps:
      - name: Checkout code
-        uses: actions/checkout@v4
+        uses: actions/checkout@v6
        with:
          fetch-depth: 0
          ref: ${{ github.event.inputs.branch }}
@ -108,16 +108,18 @@ jobs:
            name: surfsense-backend
            context: ./surfsense_backend
            file: ./surfsense_backend/Dockerfile
+            target: production
          - image: web
            name: surfsense-web
            context: ./surfsense_web
            file: ./surfsense_web/Dockerfile
+            target: runner
    env:
      REGISTRY_IMAGE: ghcr.io/${{ github.repository_owner }}/${{ matrix.name }}

    steps:
      - name: Checkout code
-        uses: actions/checkout@v4
+        uses: actions/checkout@v6

      - name: Set lowercase image name
        id: image
@ -125,19 +127,19 @@ jobs:

      - name: Docker meta
        id: meta
-        uses: docker/metadata-action@v5
+        uses: docker/metadata-action@v6
        with:
          images: ${{ steps.image.outputs.name }}

      - name: Login to GitHub Container Registry
-        uses: docker/login-action@v3
+        uses: docker/login-action@v4
        with:
          registry: ghcr.io
          username: ${{ github.repository_owner }}
          password: ${{ secrets.GITHUB_TOKEN }}

      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
+        uses: docker/setup-buildx-action@v4

      - name: Free up disk space
        run: |
@ -149,10 +151,11 @@ jobs:

      - name: Build and push by digest ${{ matrix.name }} (${{ matrix.suffix }})
        id: build
-        uses: docker/build-push-action@v6
+        uses: docker/build-push-action@v7
        with:
          context: ${{ matrix.context }}
          file: ${{ matrix.file }}
+          target: ${{ matrix.target }}
          labels: ${{ steps.meta.outputs.labels }}
          tags: ${{ steps.image.outputs.name }}
          outputs: type=image,push-by-digest=true,name-canonical=true,push=true
@ -174,7 +177,7 @@ jobs:
          touch "/tmp/digests/${digest#sha256:}"

      - name: Upload digest
-        uses: actions/upload-artifact@v4
+        uses: actions/upload-artifact@v7
        with:
          name: digests-${{ matrix.image }}-${{ matrix.suffix }}
          path: /tmp/digests/*
@ -205,22 +208,22 @@ jobs:
        run: echo "name=${REGISTRY_IMAGE,,}" >> $GITHUB_OUTPUT

      - name: Download amd64 digest
-        uses: actions/download-artifact@v4
+        uses: actions/download-artifact@v8
        with:
          name: digests-${{ matrix.image }}-amd64
          path: /tmp/digests

      - name: Download arm64 digest
-        uses: actions/download-artifact@v4
+        uses: actions/download-artifact@v8
        with:
          name: digests-${{ matrix.image }}-arm64
          path: /tmp/digests

      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
+        uses: docker/setup-buildx-action@v4

      - name: Login to GitHub Container Registry
-        uses: docker/login-action@v3
+        uses: docker/login-action@v4
        with:
          registry: ghcr.io
          username: ${{ github.repository_owner }}
@ -239,7 +242,7 @@ jobs:

      - name: Docker meta
        id: meta
-        uses: docker/metadata-action@v5
+        uses: docker/metadata-action@v6
        with:
          images: ${{ steps.image.outputs.name }}
          tags: |
--- a/.github/workflows/e2e-tests.yml
+++ b/.github/workflows/e2e-tests.yml
@ -0,0 +1,174 @@
+name: E2E Tests
+
+on:
+  pull_request:
+    branches: [main, dev]
+    types: [opened, synchronize, reopened, ready_for_review]
+    paths:
+      - 'surfsense_web/**'
+      - 'surfsense_backend/**'
+      - 'docker/docker-compose.e2e.yml'
+      - '.github/workflows/e2e-tests.yml'
+  workflow_dispatch:
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  e2e:
+    name: Journey
+    runs-on: ubuntu-latest
+    if: github.event.pull_request.draft == false
+    timeout-minutes: 30
+
+    env:
+      # Test user that the backend creates via /auth/register before Playwright runs.
+      PLAYWRIGHT_TEST_EMAIL: e2e-test@surfsense.net
+      PLAYWRIGHT_TEST_PASSWORD: E2eTestPassword123!
+      # Frontend env: Playwright's webServer (surfsense_web/playwright.config.ts)
+      # spawns `pnpm build && pnpm start` in CI; these get baked into the build.
+      NEXT_PUBLIC_FASTAPI_BACKEND_URL: http://localhost:8000
+      NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE: LOCAL
+      # Shared secret for the test-only POST /__e2e__/auth/token endpoint.
+      # Must match docker-compose.e2e.yml's backend env (x-backend-env).
+      E2E_MINT_SECRET: e2e-mint-secret-not-for-production
+
+    steps:
+      - uses: actions/checkout@v6
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v4
+
+      # ─── Backend stack ─────────────────────────────────────────────────
+      # Builds the e2e image (multi-stage, deps cached via GHA), brings up
+      # db + redis + backend + celery_worker, blocks until every healthcheck
+      # is green. No `uv` invocation on the runner; no PID files; no curl
+      # polling loops; readiness is gated by Docker healthchecks.
+      - name: Build & start backend stack
+        run: |
+          docker compose -f docker/docker-compose.e2e.yml \
+            up -d --build --wait --wait-timeout 300
+
+      - name: Show backend stack status
+        if: always()
+        run: docker compose -f docker/docker-compose.e2e.yml ps
+
+      - name: Register E2E test user
+        run: |
+          # 200/201 = created, 400 = already exists (idempotent across reruns).
+          STATUS=$(curl -s -o /tmp/register.json -w "%{http_code}" \
+            -X POST http://localhost:8000/auth/register \
+            -H "Content-Type: application/json" \
+            -d "{\"email\":\"${PLAYWRIGHT_TEST_EMAIL}\",\"password\":\"${PLAYWRIGHT_TEST_PASSWORD}\"}")
+          echo "Register status: ${STATUS}"
+          cat /tmp/register.json
+          if [ "${STATUS}" != "200" ] && [ "${STATUS}" != "201" ] && [ "${STATUS}" != "400" ]; then
+            echo "::error::Failed to register test user (status ${STATUS})"
+            exit 1
+          fi
+
+          # Flush auth rate-limit counters so Playwright starts clean.
+          docker compose -f docker/docker-compose.e2e.yml exec -T redis \
+            sh -c "redis-cli --scan --pattern 'surfsense:auth_rate_limit:*' \
+              | xargs -r redis-cli DEL" || true
+
+      # ─── Frontend (host-side) ──────────────────────────────────────────
+      # Playwright's webServer block in playwright.config.ts spawns
+      # `pnpm build && pnpm start` in CI mode and waits for :3000.
+      - uses: actions/setup-node@v6
+        with:
+          node-version: '20'
+
+      - uses: pnpm/action-setup@v6
+
+      - name: Get pnpm store directory
+        id: pnpm-cache
+        shell: bash
+        run: echo "STORE_PATH=$(pnpm store path --silent)" >> $GITHUB_OUTPUT
+
+      - name: Cache pnpm store
+        uses: actions/cache@v5
+        with:
+          path: ${{ steps.pnpm-cache.outputs.STORE_PATH }}
+          key: pnpm-${{ runner.os }}-${{ hashFiles('surfsense_web/pnpm-lock.yaml') }}
+          restore-keys: pnpm-${{ runner.os }}-
+
+      - name: Install web dependencies
+        working-directory: surfsense_web
+        run: pnpm install --frozen-lockfile
+
+      - name: Cache Playwright browsers
+        id: playwright-cache
+        uses: actions/cache@v5
+        with:
+          path: ~/.cache/ms-playwright
+          key: playwright-${{ runner.os }}-${{ hashFiles('surfsense_web/pnpm-lock.yaml') }}
+
+      - name: Install Playwright browsers
+        if: steps.playwright-cache.outputs.cache-hit != 'true'
+        working-directory: surfsense_web
+        run: pnpm exec playwright install --with-deps chromium
+
+      - name: Install Playwright system deps (cache hit)
+        if: steps.playwright-cache.outputs.cache-hit == 'true'
+        working-directory: surfsense_web
+        run: pnpm exec playwright install-deps chromium
+
+      - name: Cache Next.js build
+        uses: actions/cache@v5
+        with:
+          path: surfsense_web/.next/cache
+          key: nextjs-${{ runner.os }}-${{ hashFiles('surfsense_web/pnpm-lock.yaml') }}-${{ github.sha }}
+          restore-keys: |
+            nextjs-${{ runner.os }}-${{ hashFiles('surfsense_web/pnpm-lock.yaml') }}-
+            nextjs-${{ runner.os }}-
+
+      # ─── Tests ─────────────────────────────────────────────────────────
+      - name: Run Playwright tests
+        working-directory: surfsense_web
+        run: pnpm test:e2e:prod
+
+      # ─── Failure diagnostics ───────────────────────────────────────────
+      - name: Dump backend stack logs on failure
+        if: ${{ failure() || cancelled() }}
+        run: |
+          mkdir -p ./compose-logs
+          docker compose -f docker/docker-compose.e2e.yml logs --no-color --timestamps \
+            > ./compose-logs/all-services.log 2>&1 || true
+          for svc in db redis backend celery_worker; do
+            docker compose -f docker/docker-compose.e2e.yml logs --no-color --timestamps "$svc" \
+              > "./compose-logs/${svc}.log" 2>&1 || true
+          done
+          docker compose -f docker/docker-compose.e2e.yml ps \
+            > ./compose-logs/ps.txt 2>&1 || true
+
+      # ─── Artifacts ─────────────────────────────────────────────────────
+      - name: Upload Playwright HTML report
+        if: always()
+        uses: actions/upload-artifact@v7
+        with:
+          name: playwright-report
+          path: surfsense_web/playwright-report/
+          retention-days: 14
+
+      - name: Upload Playwright traces
+        if: failure()
+        uses: actions/upload-artifact@v7
+        with:
+          name: playwright-traces
+          path: surfsense_web/test-results/
+          retention-days: 14
+
+      - name: Upload backend stack logs
+        if: ${{ failure() || cancelled() }}
+        uses: actions/upload-artifact@v7
+        with:
+          name: backend-stack-logs
+          path: ./compose-logs/
+          retention-days: 7
+
+      # ─── Teardown ──────────────────────────────────────────────────────
+      - name: Tear down backend stack
+        if: always()
+        run: docker compose -f docker/docker-compose.e2e.yml down -v --remove-orphans
--- a/docker/.env.example
+++ b/docker/.env.example
@ -4,7 +4,7 @@
 # Database, Redis, and internal service wiring are handled automatically.
 # ==============================================================================

-# SurfSense version (use "latest", a clean version like "0.0.14", or a specific build like "0.0.14.1")
+# SurfSense version (use "latest" or a specific version like "0.0.14")
 SURFSENSE_VERSION=latest

 # ------------------------------------------------------------------------------
--- a/docker/docker-compose.dev.yml
+++ b/docker/docker-compose.dev.yml
@ -10,6 +10,11 @@

 name: surfsense-dev

+x-backend-build: &backend-build
+  context: ../surfsense_backend
+  args:
+    EMBEDDING_MODEL: ${EMBEDDING_MODEL:-sentence-transformers/all-MiniLM-L6-v2}
+
 services:
  db:
    image: pgvector/pgvector:pg17
@ -69,7 +74,7 @@ services:
      retries: 5

  backend:
-    build: ../surfsense_backend
+    build: *backend-build
    ports:
      - "${BACKEND_PORT:-8000}:8000"
    volumes:
@ -114,7 +119,7 @@ services:
      start_period: 200s

  celery_worker:
-    build: ../surfsense_backend
+    build: *backend-build
    volumes:
      - ../surfsense_backend/app:/app/app
      - shared_temp:/shared_tmp
@ -140,7 +145,7 @@ services:
        condition: service_healthy

  celery_beat:
-    build: ../surfsense_backend
+    build: *backend-build
    env_file:
      - ../surfsense_backend/.env
    environment:
@ -159,7 +164,7 @@ services:
        condition: service_started

  # flower:
-  #   build: ../surfsense_backend
+  #   build: *backend-build
  #   ports:
  #     - "${FLOWER_PORT:-5555}:5555"
  #   env_file:
--- a/docker/docker-compose.e2e.yml
+++ b/docker/docker-compose.e2e.yml
@ -0,0 +1,181 @@
+# =============================================================================
+# SurfSense — E2E Docker Compose stack
+# =============================================================================
+# Hermetic backend stack for Playwright E2E tests:
+#   - db / redis on an internal-only network (no internet egress)
+#   - backend (FastAPI) joins the internal network AND a separate ingress
+#     bridge so the host runner can reach :8000
+#   - celery_worker on the internal network only — zero egress surface
+#
+# The backend image is built from surfsense_backend/Dockerfile target=e2e,
+# which adds tests/ via the `tests-source` additional context (tests/ is
+# excluded from the main context by .dockerignore so production never ships
+# test fakes). See surfsense_backend/Dockerfile for stage layout.
+#
+# Usage from repo root:
+#   docker compose -f docker/docker-compose.e2e.yml up -d --build --wait
+#   curl -X POST http://localhost:8000/auth/register ...
+#   ( run Playwright on host, pointing at localhost:8000 + localhost:3000 )
+#   docker compose -f docker/docker-compose.e2e.yml down -v
+# =============================================================================
+
+name: surfsense-e2e
+
+x-backend-env: &backend-env
+  DATABASE_URL: postgresql+asyncpg://postgres:postgres@db:5432/surfsense_e2e
+  CELERY_BROKER_URL: redis://redis:6379/0
+  CELERY_RESULT_BACKEND: redis://redis:6379/0
+  REDIS_APP_URL: redis://redis:6379/0
+  CELERY_TASK_DEFAULT_QUEUE: surfsense
+  SECRET_KEY: ci-test-secret-key-not-for-production
+  AUTH_TYPE: LOCAL
+  REGISTRATION_ENABLED: "TRUE"
+  ETL_SERVICE: DOCLING
+  EMBEDDING_MODEL: sentence-transformers/all-MiniLM-L6-v2
+  NEXT_FRONTEND_URL: http://host.docker.internal:3000
+  # Sentinel keys — fakes never read them; turns leaked real calls into 401s.
+  COMPOSIO_API_KEY: e2e-deny-real-call-sentinel
+  COMPOSIO_ENABLED: "TRUE"
+  OPENAI_API_KEY: e2e-deny-real-call-sentinel
+  ANTHROPIC_API_KEY: e2e-deny-real-call-sentinel
+  LITELLM_API_KEY: e2e-deny-real-call-sentinel
+  MICROSOFT_CLIENT_ID: fake-microsoft-client-id
+  MICROSOFT_CLIENT_SECRET: fake-microsoft-client-secret
+  ONEDRIVE_REDIRECT_URI: http://localhost:8000/api/v1/auth/onedrive/connector/callback
+  DROPBOX_APP_KEY: fake-dropbox-app-key
+  DROPBOX_APP_SECRET: fake-dropbox-app-secret
+  DROPBOX_REDIRECT_URI: http://localhost:8000/api/v1/auth/dropbox/connector/callback
+  # Defense-in-depth: even though L3 egress is denied for the worker via
+  # `internal: true`, the backend still has a route via `ingress`. Setting
+  # HTTPS_PROXY to an unreachable port turns any leaked Python outbound HTTP
+  # call into a fast Connection refused. UNLIKE the old runner-shell setup,
+  # this proxy is set on the container env and `uv` is never invoked here,
+  # so there is no interaction with uv's implicit-sync behaviour.
+  HTTPS_PROXY: http://127.0.0.1:1
+  HTTP_PROXY: http://127.0.0.1:1
+  NO_PROXY: localhost,127.0.0.1,0.0.0.0,db,redis,host.docker.internal
+  HF_HUB_OFFLINE: "1"
+  TRANSFORMERS_OFFLINE: "1"
+  # Test-only token-mint endpoint secret (see tests/e2e/run_backend.py).
+  E2E_MINT_SECRET: e2e-mint-secret-not-for-production
+
+services:
+  db:
+    image: pgvector/pgvector:pg17
+    command: >
+      postgres
+        -c wal_level=logical
+        -c max_wal_senders=10
+        -c max_replication_slots=10
+    environment:
+      POSTGRES_USER: postgres
+      POSTGRES_PASSWORD: postgres
+      POSTGRES_DB: surfsense_e2e
+    # Ephemeral storage — every CI run gets a clean DB, no volume cleanup needed.
+    tmpfs:
+      - /var/lib/postgresql/data
+    healthcheck:
+      test: ["CMD-SHELL", "pg_isready -U postgres -d surfsense_e2e"]
+      interval: 2s
+      timeout: 3s
+      retries: 30
+    networks: [internal]
+
+  redis:
+    image: redis:8-alpine
+    healthcheck:
+      test: ["CMD", "redis-cli", "ping"]
+      interval: 2s
+      timeout: 3s
+      retries: 30
+    networks: [internal]
+
+  backend:
+    build:
+      context: ../surfsense_backend
+      dockerfile: Dockerfile
+      target: e2e
+      additional_contexts:
+        # tests/ is excluded from the main context by .dockerignore;
+        # the e2e stage's `COPY --from=tests-source` pulls it in here.
+        tests-source: ../surfsense_backend/tests
+      args:
+        EMBEDDING_MODEL: sentence-transformers/all-MiniLM-L6-v2
+      cache_from:
+        - type=gha,scope=surfsense-e2e-backend
+      cache_to:
+        - type=gha,mode=max,scope=surfsense-e2e-backend
+    image: surfsense-e2e-backend:local
+    environment:
+      <<: *backend-env
+      SERVICE_ROLE: api
+    volumes:
+      - shared_temp:/shared_tmp
+    extra_hosts:
+      - "host.docker.internal:host-gateway"
+    ports:
+      - "8000:8000"
+    depends_on:
+      db: { condition: service_healthy }
+      redis: { condition: service_healthy }
+    healthcheck:
+      # Use Python (already in the image) instead of curl/wget to avoid
+      # depending on either tool being installed in the runtime layers.
+      test:
+        - CMD
+        - python
+        - -c
+        - |
+          import sys, urllib.request
+          try:
+              r = urllib.request.urlopen("http://localhost:8000/openapi.json", timeout=2)
+              sys.exit(0 if r.status == 200 else 1)
+          except Exception:
+              sys.exit(1)
+      interval: 3s
+      timeout: 5s
+      retries: 60
+      start_period: 30s
+    networks:
+      - internal      # to reach db/redis
+      - ingress       # so host can reach :8000
+
+  celery_worker:
+    image: surfsense-e2e-backend:local
+    pull_policy: never
+    # No build: section — reuses the image built by the `backend` service.
+    # Compose v2 builds shared images exactly once across services that
+    # reference the same `image:` tag.
+    environment:
+      <<: *backend-env
+      SERVICE_ROLE: worker
+    volumes:
+      - shared_temp:/shared_tmp
+    depends_on:
+      backend: { condition: service_healthy }
+    healthcheck:
+      test:
+        - CMD-SHELL
+        - "celery -A app.celery_app inspect ping --timeout 2 | grep -q pong"
+      interval: 5s
+      timeout: 5s
+      retries: 12
+      start_period: 20s
+    networks: [internal]
+
+networks:
+  # Internal network: containers attached only to this network have NO route
+  # to the host or the internet. This is the L3 deny-egress mechanism that
+  # replaces the fragile HTTPS_PROXY-on-the-runner approach.
+  internal:
+    driver: bridge
+    internal: true
+
+  # Regular bridge network. Only the `backend` service joins it, solely so
+  # the host can reach :8000 via the published port. celery_worker / db /
+  # redis stay off this network entirely.
+  ingress:
+    driver: bridge
+
+volumes:
+  shared_temp:
--- a/package.json
+++ b/package.json
@ -1,5 +1,5 @@
 {
  "name": "surfsense",
  "private": true,
-  "packageManager": "pnpm@10.24.0"
+  "packageManager": "pnpm@10.26.0"
 }
--- a/surfsense_backend/.gitignore
+++ b/surfsense_backend/.gitignore
@ -13,5 +13,5 @@ celerybeat-schedule*
 celerybeat-schedule.*
 celerybeat-schedule.dir
 celerybeat-schedule.bak
-global_llm_config.yaml
+/app/config/global_llm_config.yaml
 app/templates/_generated/
--- a/surfsense_backend/Dockerfile
+++ b/surfsense_backend/Dockerfile
@ -1,8 +1,16 @@
-FROM python:3.12-slim
+# =============================================================================
+# SurfSense Backend — Multi-stage Dockerfile
+# =============================================================================
+# Graph: base → deps → models → {e2e, production}
+#   e2e        — tests/ via additional_contexts (docker-compose.e2e.yml)
+#   production — published ghcr.io image (docker-build.yml pins target)
+# =============================================================================
+
+# ─── Stage 1: base (system deps, Pandoc, certificates) ──────────────────────
+FROM python:3.12-slim AS base

 WORKDIR /app

-# Install system dependencies including SSL tools, CUDA dependencies, and Tesseract OCR
 RUN apt-get update && apt-get install -y --no-install-recommends \
    gcc \
    python3-dev \
@ -11,6 +19,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
    wget \
    unzip \
    gnupg2 \
+    ffmpeg \
    espeak-ng \
    libsndfile1 \
    libgl1 \
@ -22,21 +31,27 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
    git \
    && rm -rf /var/lib/apt/lists/*

-# Install Pandoc 3.x from GitHub as a fallback for Linux where pypandoc_binary
-# may not bundle pandoc (apt ships 2.17 which has broken table rendering).
-# pypandoc_binary bundles pandoc on Windows/macOS; on Linux it picks this up.
+RUN which ffmpeg && ffmpeg -version
+
+# Pandoc 3.x from GitHub Releases — apt ships 2.17 which has broken table rendering.
+# pypandoc_binary bundles pandoc on Windows/macOS; on Linux it picks up this binary.
 RUN ARCH=$(dpkg --print-architecture) && \
    wget -qO /tmp/pandoc.deb "https://github.com/jgm/pandoc/releases/download/3.9/pandoc-3.9-1-${ARCH}.deb" && \
    dpkg -i /tmp/pandoc.deb && \
    rm /tmp/pandoc.deb

-# Update certificates and install SSL tools
 RUN update-ca-certificates
 RUN pip install --upgrade certifi pip-system-certs

-# Copy requirements
-COPY pyproject.toml .
-COPY uv.lock .
+ENV SSL_CERT_FILE=/usr/local/lib/python3.12/site-packages/certifi/cacert.pem
+ENV REQUESTS_CA_BUNDLE=/usr/local/lib/python3.12/site-packages/certifi/cacert.pem
+ENV SURFSENSE_ALLOW_STATIC_FFMPEG_DOWNLOAD=FALSE
+
+
+# ─── Stage 2: deps (Python deps frozen from uv.lock) ────────────────────────
+FROM base AS deps
+
+COPY pyproject.toml uv.lock ./

 # Install all Python dependencies from uv.lock for deterministic builds.
 #
@ -49,9 +64,7 @@ COPY uv.lock .
 # Note on torch/CUDA: we do NOT install torch from a separate cu* index here.
 # PyPI's torch wheels for Linux x86_64 already ship CUDA-enabled and pull
 # nvidia-cudnn-cu13, nvidia-nccl-cu13, triton, etc. as install deps (all
-# captured in uv.lock). Installing from cu121 first only wasted ~2GB of
-# downloads that the lock-based install immediately replaced. If a specific
-# CUDA version is needed (driver compatibility, etc.), wire it through
+# captured in uv.lock). If a specific CUDA version is needed, wire it through
 # [tool.uv.sources] in pyproject.toml so the lock stays the source of truth.
 RUN pip install --no-cache-dir uv && \
    uv export --frozen --no-dev --no-hashes --no-emit-project \
@ -59,49 +72,42 @@ RUN pip install --no-cache-dir uv && \
    uv pip install --system --no-cache-dir -r /tmp/requirements.txt && \
    rm /tmp/requirements.txt

-# Set SSL environment variables dynamically
-RUN CERTIFI_PATH=$(python -c "import certifi; print(certifi.where())") && \
-    echo "Setting SSL_CERT_FILE to $CERTIFI_PATH" && \
-    echo "export SSL_CERT_FILE=$CERTIFI_PATH" >> /root/.bashrc && \
-    echo "export REQUESTS_CA_BUNDLE=$CERTIFI_PATH" >> /root/.bashrc
-ENV SSL_CERT_FILE=/usr/local/lib/python3.12/site-packages/certifi/cacert.pem
-ENV REQUESTS_CA_BUNDLE=/usr/local/lib/python3.12/site-packages/certifi/cacert.pem
+
+# ─── Stage 3: models (pre-baked offline assets) ─────────────────────────────
+FROM deps AS models

 # Pre-download EasyOCR models to avoid runtime SSL issues
-RUN mkdir -p /root/.EasyOCR/model
-RUN wget --no-check-certificate https://github.com/JaidedAI/EasyOCR/releases/download/v1.3/english_g2.zip -O /root/.EasyOCR/model/english_g2.zip || true
-RUN wget --no-check-certificate https://github.com/JaidedAI/EasyOCR/releases/download/pre-v1.1.6/craft_mlt_25k.zip -O /root/.EasyOCR/model/craft_mlt_25k.zip || true
-RUN cd /root/.EasyOCR/model && (unzip -o english_g2.zip || true) && (unzip -o craft_mlt_25k.zip || true)
+RUN mkdir -p /root/.EasyOCR/model && \
+    wget --no-check-certificate https://github.com/JaidedAI/EasyOCR/releases/download/v1.3/english_g2.zip      -O /root/.EasyOCR/model/english_g2.zip      || true && \
+    wget --no-check-certificate https://github.com/JaidedAI/EasyOCR/releases/download/pre-v1.1.6/craft_mlt_25k.zip -O /root/.EasyOCR/model/craft_mlt_25k.zip || true && \
+    cd /root/.EasyOCR/model && \
+    (unzip -o english_g2.zip || true) && \
+    (unzip -o craft_mlt_25k.zip || true)

 # Pre-download Docling models
-RUN python -c "try:\n    from docling.document_converter import DocumentConverter\n    conv = DocumentConverter()\nexcept:\n    pass" || true
+RUN printf '%s\n' \
+    'try:' \
+    '    from docling.document_converter import DocumentConverter' \
+    '    DocumentConverter()' \
+    'except Exception:' \
+    '    pass' \
+    | python || true

-# Install Playwright browsers for web scraping (the playwright package itself
-# is already installed via uv.lock above)
+ARG EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2
+RUN python -c "from chonkie import AutoEmbeddings; AutoEmbeddings.get_embeddings('${EMBEDDING_MODEL}')"
+
+# Install Playwright browsers (the playwright python package itself is in deps)
 RUN playwright install chromium --with-deps

-# Copy source code
-COPY . .
-
-# Install the project itself in editable mode. Dependencies were already
-# installed deterministically from uv.lock above, so --no-deps prevents any
-# re-resolution that could pull newer versions.
-RUN uv pip install --system --no-cache-dir --no-deps -e .
-
-# Copy and set permissions for entrypoint script
-# Use dos2unix to ensure LF line endings (fixes CRLF issues from Windows checkouts)
-COPY scripts/docker/entrypoint.sh /app/scripts/docker/entrypoint.sh
-RUN dos2unix /app/scripts/docker/entrypoint.sh && chmod +x /app/scripts/docker/entrypoint.sh
-
 # Shared temp directory for file uploads between API and Worker containers.
 # Python's tempfile module uses TMPDIR, so uploaded files land here.
 # Mount the SAME volume at /shared_tmp on both API and Worker in Coolify.
 RUN mkdir -p /shared_tmp
-ENV TMPDIR=/shared_tmp

-# Prevent uvloop compatibility issues
 ENV PYTHONPATH=/app
 ENV UVICORN_LOOP=asyncio
+ENV TMPDIR=/shared_tmp
+ENV PYTHONUNBUFFERED=1

 # Tune glibc malloc to return freed memory to the OS more aggressively.
 # Without these, Python's gc.collect() frees objects but the underlying
@ -110,6 +116,56 @@ ENV MALLOC_MMAP_THRESHOLD_=65536
 ENV MALLOC_TRIM_THRESHOLD_=131072
 ENV MALLOC_MMAP_MAX_=65536

+
+# ─── Stage 4: e2e (production source + tests/ + e2e entrypoint) ─────────────
+# Built via `docker buildx build --target e2e`. The default build target is
+# `production` (the last stage), so this stage is opt-in for CI only.
+#
+# `tests/` is excluded from the main build context by .dockerignore (so prod
+# can never accidentally ship test fakes). The e2e stage receives tests/
+# through an "additional context" passed by docker-compose.e2e.yml — see
+# https://docs.docker.com/reference/compose-file/build/#additional_contexts
+FROM models AS e2e
+
+# Same source copy as production. .dockerignore filters out tests/.
+COPY . .
+
+# Bring tests/ in via the named additional build context. CI passes
+#   --build-context tests-source=./tests
+# (or the equivalent additional_contexts entry in docker-compose.e2e.yml).
+COPY --from=tests-source . ./tests/
+
+# Install the project itself in editable mode. Dependencies were already
+# installed deterministically from uv.lock above, so --no-deps prevents any
+# re-resolution that could pull newer versions.
+RUN uv pip install --system --no-cache-dir --no-deps -e .
+
+COPY scripts/docker/entrypoint.e2e.sh /app/scripts/docker/entrypoint.e2e.sh
+RUN dos2unix /app/scripts/docker/entrypoint.e2e.sh && chmod +x /app/scripts/docker/entrypoint.e2e.sh
+
+# SERVICE_ROLE is overridden per service in docker-compose.e2e.yml (api / worker).
+ENV SERVICE_ROLE=api
+
+EXPOSE 8000-8001
+CMD ["/app/scripts/docker/entrypoint.e2e.sh"]
+
+
+# ─── Stage 5: production (published ghcr.io image) ──────────────────────────
+# CI pins `target: production`; also the default for `docker build` / dev compose.
+FROM models AS production
+
+# Copy source code (tests/ excluded by .dockerignore — production never ships tests).
+COPY . .
+
+# Install the project itself in editable mode. Dependencies were already
+# installed deterministically from uv.lock above, so --no-deps prevents any
+# re-resolution that could pull newer versions.
+RUN uv pip install --system --no-cache-dir --no-deps -e .
+
+# Use dos2unix to ensure LF line endings (fixes CRLF issues from Windows checkouts)
+COPY scripts/docker/entrypoint.sh /app/scripts/docker/entrypoint.sh
+RUN dos2unix /app/scripts/docker/entrypoint.sh && chmod +x /app/scripts/docker/entrypoint.sh
+
 # SERVICE_ROLE controls which process this container runs:
 #   api     – FastAPI backend only (runs migrations on startup)
 #   worker  – Celery worker only
@ -127,6 +183,5 @@ ENV CELERY_MAX_TASKS_PER_CHILD=50
 #   ""                       – both queues (default, for single-worker setups)
 ENV CELERY_QUEUES=""

-# Run
 EXPOSE 8000-8001
-CMD ["/app/scripts/docker/entrypoint.sh"]
+CMD ["/app/scripts/docker/entrypoint.sh"]
--- a/surfsense_backend/alembic/env.py
+++ b/surfsense_backend/alembic/env.py
@ -67,7 +67,11 @@ def run_migrations_offline() -> None:


 def do_run_migrations(connection: Connection) -> None:
-    context.configure(connection=connection, target_metadata=target_metadata)
+    context.configure(
+        connection=connection,
+        target_metadata=target_metadata,
+        transaction_per_migration=True,
+    )

    with context.begin_transaction():
        context.run_migrations()
--- a/surfsense_backend/alembic/versions/130_add_agent_action_log.py
+++ b/surfsense_backend/alembic/versions/130_add_agent_action_log.py
@ -26,6 +26,10 @@ depends_on: str | Sequence[str] | None = None


 def upgrade() -> None:
+    bind = op.get_bind()
+    if sa.inspect(bind).has_table("agent_action_log"):
+        return
+
    op.create_table(
        "agent_action_log",
        sa.Column("id", sa.Integer(), primary_key=True, index=True),
--- a/surfsense_backend/alembic/versions/131_add_document_revisions.py
+++ b/surfsense_backend/alembic/versions/131_add_document_revisions.py
@ -29,6 +29,21 @@ depends_on: str | Sequence[str] | None = None


 def upgrade() -> None:
+    bind = op.get_bind()
+    inspector = sa.inspect(bind)
+
+    if inspector.has_table("document_revisions") and inspector.has_table(
+        "folder_revisions"
+    ):
+        return
+
+    if not inspector.has_table("document_revisions"):
+        _create_document_revisions()
+    if not inspector.has_table("folder_revisions"):
+        _create_folder_revisions()
+
+
+def _create_document_revisions() -> None:
    op.create_table(
        "document_revisions",
        sa.Column("id", sa.Integer(), primary_key=True, index=True),
@ -74,6 +89,8 @@ def upgrade() -> None:
        ),
    )

+
+def _create_folder_revisions() -> None:
    op.create_table(
        "folder_revisions",
        sa.Column("id", sa.Integer(), primary_key=True, index=True),
--- a/surfsense_backend/alembic/versions/132_add_agent_permission_rules.py
+++ b/surfsense_backend/alembic/versions/132_add_agent_permission_rules.py
@ -26,6 +26,10 @@ depends_on: str | Sequence[str] | None = None


 def upgrade() -> None:
+    bind = op.get_bind()
+    if sa.inspect(bind).has_table("agent_permission_rules"):
+        return
+
    op.create_table(
        "agent_permission_rules",
        sa.Column("id", sa.Integer(), primary_key=True, index=True),
--- a/surfsense_backend/alembic/versions/135_action_log_correlation_ids.py
+++ b/surfsense_backend/alembic/versions/135_action_log_correlation_ids.py
@ -50,29 +50,39 @@ depends_on: str | Sequence[str] | None = None


 def upgrade() -> None:
-    op.add_column(
-        "agent_action_log",
-        sa.Column("tool_call_id", sa.String(length=64), nullable=True),
-    )
-    op.add_column(
-        "agent_action_log",
-        sa.Column("chat_turn_id", sa.String(length=64), nullable=True),
-    )
+    bind = op.get_bind()
+    inspector = sa.inspect(bind)
+    columns = {c["name"] for c in inspector.get_columns("agent_action_log")}
+    indexes = {i["name"] for i in inspector.get_indexes("agent_action_log")}

-    op.create_index(
-        "ix_agent_action_log_tool_call_id",
-        "agent_action_log",
-        ["tool_call_id"],
-    )
-    op.create_index(
-        "ix_agent_action_log_chat_turn_id",
-        "agent_action_log",
-        ["chat_turn_id"],
-    )
+    if "tool_call_id" not in columns:
+        op.add_column(
+            "agent_action_log",
+            sa.Column("tool_call_id", sa.String(length=64), nullable=True),
+        )
+    if "chat_turn_id" not in columns:
+        op.add_column(
+            "agent_action_log",
+            sa.Column("chat_turn_id", sa.String(length=64), nullable=True),
+        )

-    op.execute(
-        "UPDATE agent_action_log SET tool_call_id = turn_id WHERE tool_call_id IS NULL"
-    )
+    if "ix_agent_action_log_tool_call_id" not in indexes:
+        op.create_index(
+            "ix_agent_action_log_tool_call_id",
+            "agent_action_log",
+            ["tool_call_id"],
+        )
+    if "ix_agent_action_log_chat_turn_id" not in indexes:
+        op.create_index(
+            "ix_agent_action_log_chat_turn_id",
+            "agent_action_log",
+            ["chat_turn_id"],
+        )
+
+    if "turn_id" in columns:
+        op.execute(
+            "UPDATE agent_action_log SET tool_call_id = turn_id WHERE tool_call_id IS NULL"
+        )


 def downgrade() -> None:
--- a/surfsense_backend/alembic/versions/136_new_chat_message_turn_id.py
+++ b/surfsense_backend/alembic/versions/136_new_chat_message_turn_id.py
@ -36,15 +36,22 @@ depends_on: str | Sequence[str] | None = None


 def upgrade() -> None:
-    op.add_column(
-        "new_chat_messages",
-        sa.Column("turn_id", sa.String(length=64), nullable=True),
-    )
-    op.create_index(
-        "ix_new_chat_messages_turn_id",
-        "new_chat_messages",
-        ["turn_id"],
-    )
+    bind = op.get_bind()
+    inspector = sa.inspect(bind)
+    columns = {c["name"] for c in inspector.get_columns("new_chat_messages")}
+    indexes = {i["name"] for i in inspector.get_indexes("new_chat_messages")}
+
+    if "turn_id" not in columns:
+        op.add_column(
+            "new_chat_messages",
+            sa.Column("turn_id", sa.String(length=64), nullable=True),
+        )
+    if "ix_new_chat_messages_turn_id" not in indexes:
+        op.create_index(
+            "ix_new_chat_messages_turn_id",
+            "new_chat_messages",
+            ["turn_id"],
+        )


 def downgrade() -> None:
--- a/surfsense_backend/alembic/versions/137_unique_reverse_of_in_action_log.py
+++ b/surfsense_backend/alembic/versions/137_unique_reverse_of_in_action_log.py
@ -27,6 +27,8 @@ from __future__ import annotations

 from collections.abc import Sequence

+import sqlalchemy as sa
+
 from alembic import op

 revision: str = "137"
@ -39,6 +41,11 @@ _INDEX_NAME = "ux_agent_action_log_reverse_of"


 def upgrade() -> None:
+    bind = op.get_bind()
+    indexes = {i["name"] for i in sa.inspect(bind).get_indexes("agent_action_log")}
+    if _INDEX_NAME in indexes:
+        return
+
    # Defensively de-dup any pre-existing double-revert rows before
    # adding the unique index. Keeps the OLDEST row (smallest id) and
    # NULLs out the duplicates' ``reverse_of`` so they survive as audit
--- a/surfsense_backend/alembic/versions/141_unique_chat_message_turn_role.py
+++ b/surfsense_backend/alembic/versions/141_unique_chat_message_turn_role.py
@ -53,6 +53,11 @@ TABLE_NAME = "new_chat_messages"


 def upgrade() -> None:
+    bind = op.get_bind()
+    indexes = {i["name"] for i in sa.inspect(bind).get_indexes(TABLE_NAME)}
+    if INDEX_NAME in indexes:
+        return
+
    op.create_index(
        INDEX_NAME,
        TABLE_NAME,
--- a/surfsense_backend/app/config/init.py
+++ b/surfsense_backend/app/config/init.py
@ -473,10 +473,16 @@ def initialize_vision_llm_router():
 class Config:
    # Check if ffmpeg is installed
    if not is_ffmpeg_installed():
-        import static_ffmpeg
+        allow_static_ffmpeg = (
+            os.getenv("SURFSENSE_ALLOW_STATIC_FFMPEG_DOWNLOAD", "TRUE").upper()
+            == "TRUE"
+        )
+        if allow_static_ffmpeg:
+            import static_ffmpeg
+
+            # ffmpeg installed on first call to add_paths(), threadsafe.
+            static_ffmpeg.add_paths()

-        # ffmpeg installed on first call to add_paths(), threadsafe.
-        static_ffmpeg.add_paths()
        # check if ffmpeg is installed again
        if not is_ffmpeg_installed():
            raise ValueError(
--- a/surfsense_backend/scripts/docker/entrypoint.e2e.sh
+++ b/surfsense_backend/scripts/docker/entrypoint.e2e.sh
@ -0,0 +1,53 @@
+#!/bin/bash
+# =============================================================================
+# E2E entrypoint for the multi-stage Dockerfile's `e2e` target.
+#
+# Dispatches on SERVICE_ROLE to the test-only entrypoints under tests/e2e/.
+# Those scripts apply sys.modules hijacks and LLM/embedding patches BEFORE
+# importing production app code (see tests/e2e/run_backend.py for rationale).
+#
+# Production never sees this file: tests/ is excluded from the production
+# stage, and the production stage uses scripts/docker/entrypoint.sh.
+# =============================================================================
+set -euo pipefail
+
+SERVICE_ROLE="${SERVICE_ROLE:-api}"
+echo "[e2e-entrypoint] starting role=${SERVICE_ROLE}"
+
+wait_for_db() {
+    # Block until the database is reachable. We don't loop forever — Compose
+    # depends_on/healthchecks already gate on db readiness, this is just
+    # belt-and-suspenders so a slow first connection doesn't race migrations.
+    for i in {1..60}; do
+        echo "[e2e-entrypoint] db check attempt ${i}/60"
+        if python -c "from app.db import engine; import asyncio; asyncio.run(engine.dispose())"; then
+            echo "[e2e-entrypoint] db reachable after ${i} attempts"
+            return 0
+        fi
+        sleep 1
+    done
+    echo "[e2e-entrypoint] ERROR: db not reachable after 60s" >&2
+    return 1
+}
+
+case "${SERVICE_ROLE}" in
+    api)
+        wait_for_db
+        echo "[e2e-entrypoint] running alembic upgrade head"
+        alembic upgrade head
+        # `exec` so SIGTERM from `docker stop` reaches Python directly,
+        # without a shell wrapper interposing.
+        exec python tests/e2e/run_backend.py
+        ;;
+    worker)
+        # Worker doesn't run migrations — the api role does that exactly once.
+        # We still wait for db so Celery's broker connection check doesn't
+        # race against an unready Postgres on cold start.
+        wait_for_db
+        exec python tests/e2e/run_celery.py
+        ;;
+    *)
+        echo "[e2e-entrypoint] ERROR: unknown SERVICE_ROLE='${SERVICE_ROLE}' (expected: api | worker)" >&2
+        exit 1
+        ;;
+esac
--- a/surfsense_backend/tests/e2e/README.md
+++ b/surfsense_backend/tests/e2e/README.md
@ -1,48 +1,48 @@
-# Backend E2E Test Harness
+# Backend E2E Harness

-Strict fakes + alternative entrypoints used **only** by Playwright E2E.
-Excluded from the production Docker image via `.dockerignore`.
+This directory contains the test-only backend entrypoints and fakes used by
+Playwright. They are not part of the production image: `.dockerignore` excludes
+`tests/`, and the E2E Docker stage copies this directory through a separate
+build context.

 ## Files

-| Path                             | Role                                                                            |
-| -------------------------------- | ------------------------------------------------------------------------------- |
-| `run_backend.py`                 | FastAPI entrypoint that hijacks `sys.modules` before importing `app.app:app`    |
-| `run_celery.py`                  | Celery worker entrypoint with the same hijack + patch logic                     |
-| `middleware/scenario.py`         | `X-E2E-Scenario` header → ContextVar (read by fakes)                            |
-| `fakes/composio_module.py`       | Strict drop-in for the `composio` package; raises on unknown surface            |
-| `fakes/llm.py`                   | `fake_get_user_long_context_llm` returning a `FakeListChatModel`                |
-| `fakes/embeddings.py`            | Deterministic 0.1-vector `embed_text` / `embed_texts`                           |
-| `fakes/fixtures/drive_files.json`| Canned Drive listings + file contents (incl. canary tokens)                     |
+| Path | Purpose |
+| --- | --- |
+| `run_backend.py` | Starts FastAPI after installing the test fakes into `sys.modules`. |
+| `run_celery.py` | Starts the Celery worker with the same fake setup. |
+| `middleware/scenario.py` | Reads `X-E2E-Scenario` into a request-scoped context var. |
+| `fakes/composio_module.py` | Fake `composio` package used by connector flows. |
+| `fakes/llm.py` | Fake chat model factory. |
+| `fakes/embeddings.py` | Deterministic embedding helpers. |
+| `fakes/fixtures/drive_files.json` | Drive fixture data and canary file contents. |

-## Why a sys.modules hijack?
+## Why the import hook exists

-Production code does `from composio import Composio` at module load
-time. By the time the FastAPI app object exists, that binding has
-already been resolved. The hijack runs **before** any `app.*` import,
-so the binding resolves to our strict fake. No production source
-changes; fakes are physically excluded from production images.
+Some production modules import SDK clients at module load time, for example
+`from composio import Composio`. By the time `app.app` has been imported, those
+bindings are already fixed.

-Belt + suspenders + no internet: the strict `__getattr__` in every
-fake raises `NotImplementedError` if a future production code path
-introduces a new SDK call. CI also sets `HTTPS_PROXY=http://127.0.0.1:1`
-plus sentinel API keys so any leaked outbound HTTP fails immediately.
+The E2E entrypoints install fake modules in `sys.modules` before importing any
+`app.*` module. That lets the normal production code run while SDK calls resolve
+to local fakes.

-## Adding a new fake
+The fakes should fail loudly. If production starts using a new SDK method that
+the fake does not implement, add that method to the fake instead of letting the
+test call the real service.

-1. Create `fakes/<sdk>_module.py` modelled on `composio_module.py`.
-2. In `run_backend.py` and `run_celery.py`, register
-   `sys.modules["<sdk>"] = _fake_<sdk>` before the `from app.app import app`
-   line.
-3. If the new fake needs scenario branching, read from
+## Adding a fake
+
+1. Add `fakes/<sdk>_module.py`.
+2. Register it in both `run_backend.py` and `run_celery.py` before importing
+   `app.app` or `app.celery_app`.
+3. If the fake needs per-test behavior, read the current scenario from
   `tests.e2e.middleware.scenario.current_scenario()`.

-## Reused by backend integration tests
+## Shared with backend integration tests

-The strict fakes are not only for Playwright. Backend route integration
-tests can import the same fake before importing `app.app`, so Composio
-route tests exercise production route code without touching the real
-SDK:
+Backend integration tests can use the same fakes when they need production route
+code without the real SDK:

 ```python
 from tests.e2e.fakes import composio_module as _fake_composio
@ -50,20 +50,93 @@ sys.modules["composio"] = _fake_composio
 from app.app import app
 ```

-See `surfsense_backend/tests/integration/composio/conftest.py` for the
-current pattern.
+See `surfsense_backend/tests/integration/composio/conftest.py` for the current
+pattern.

 ## Running locally

+The recommended local flow runs only Postgres and Redis in Docker, and the
+backend + Celery worker on the host. No `.env` file is required: both
+entrypoints `setdefault` every variable they need (DB URL, Redis URL,
+sentinel API keys, etc.) to values that match `docker-compose.deps-only.yml`.
+
+### One-time setup
+
+From `surfsense_web/`:
+
 ```bash
-cd surfsense_backend
+pnpm install
+pnpm exec playwright install --with-deps chromium
+```
+
+### Each run
+
+**1. Bring up Postgres + Redis** from the repo root (the other deps-only
+services (SearXNG, Zero, pgAdmin) are not needed for E2E):
+
+```bash
+docker compose -f docker/docker-compose.deps-only.yml up -d db redis
+```
+
+**2. Start the backend** in `surfsense_backend/`, terminal A:
+
+```bash
+uv sync
+uv run alembic upgrade head
 uv run python tests/e2e/run_backend.py
-# in a second shell:
+```
+
+**3. Start the Celery worker** in `surfsense_backend/`, terminal B:
+
+```bash
 uv run python tests/e2e/run_celery.py
 ```

-Then in `surfsense_web`:
+**4. Register the Playwright user**:

 ```bash
-pnpm test:e2e
+curl -X POST http://localhost:8000/auth/register \
+  -H "Content-Type: application/json" \
+  -d '{"email":"e2e-test@surfsense.net","password":"E2eTestPassword123!"}'
 ```
+
+**5. Run Playwright** from `surfsense_web/`, terminal C:
+
+```bash
+pnpm test:e2e             # dev server (fast iteration)
+pnpm test:e2e:headed      # show the browser
+pnpm test:e2e:ui          # Playwright UI mode
+pnpm test:e2e:prod        # build + start (matches CI exactly)
+```
+
+`playwright.config.ts` and the run scripts share defaults, so this works on a
+fresh checkout. Set `PLAYWRIGHT_TEST_EMAIL`, `PLAYWRIGHT_TEST_PASSWORD`,
+`NEXT_PUBLIC_FASTAPI_BACKEND_URL`, or any backend env (e.g. `DATABASE_URL`)
+only when pointing tests at a different stack.
+
+### Cleanup
+
+```bash
+docker compose -f docker/docker-compose.deps-only.yml down
+```
+
+Add `-v` to also wipe the Postgres volume.
+
+### Hermetic alternative (matches CI)
+
+To reproduce the CI environment exactly — backend and Celery in containers,
+network egress denied at L3 — replace steps 1–3 with:
+
+```bash
+docker compose -f docker/docker-compose.e2e.yml up -d --build --wait
+```
+
+Then run steps 4 (curl register) and 5 (`pnpm test:e2e:prod`) as above. Tear
+down with:
+
+```bash
+docker compose -f docker/docker-compose.e2e.yml down -v --remove-orphans
+```
+
+This builds the ~9 GB `surfsense-e2e-backend:local` image, so the deps-only
+flow above is faster for day-to-day development.
--- a/surfsense_backend/tests/e2e/auth_mint.py
+++ b/surfsense_backend/tests/e2e/auth_mint.py
@ -0,0 +1,66 @@
+"""Test-only token mint endpoint for the E2E backend entrypoint.
+
+Mounted by ``tests/e2e/run_backend.py`` so Playwright can authenticate
+the seeded e2e user without hitting ``/auth/jwt/login`` (rate-limited
+to 5/min/IP in production). NEVER ships to production: this whole
+``tests/`` tree is excluded from the production Docker image by
+``surfsense_backend/.dockerignore``.
+
+Authn: shared secret in ``X-E2E-Mint-Secret``. Same value is set on the
+backend container env (``docker/docker-compose.e2e.yml``) and exported
+to the Playwright runner (``.github/workflows/e2e-tests.yml``).
+"""
+
+from __future__ import annotations
+
+import logging
+import os
+
+from fastapi import APIRouter, FastAPI, Header, HTTPException
+from pydantic import BaseModel
+from sqlalchemy import select
+
+from app.db import User, async_session_maker
+from app.users import get_jwt_strategy
+
+_logger = logging.getLogger("surfsense.e2e.auth_mint")
+
+
+class MintRequest(BaseModel):
+    email: str = "e2e-test@surfsense.net"
+
+
+class MintResponse(BaseModel):
+    access_token: str
+    token_type: str = "bearer"
+
+
+def _expected_secret() -> str:
+    return os.environ.get("E2E_MINT_SECRET", "local-e2e-mint-secret-not-for-production")
+
+
+router = APIRouter(prefix="/__e2e__", tags=["__e2e__"])
+
+
+@router.post("/auth/token", response_model=MintResponse)
+async def mint_test_token(
+    body: MintRequest,
+    x_e2e_mint_secret: str = Header(..., alias="X-E2E-Mint-Secret"),
+) -> MintResponse:
+    if x_e2e_mint_secret != _expected_secret():
+        raise HTTPException(status_code=403, detail="invalid e2e mint secret")
+    async with async_session_maker() as session:
+        result = await session.execute(select(User).where(User.email == body.email))
+        user = result.scalar_one_or_none()
+    if user is None:
+        raise HTTPException(
+            status_code=404, detail=f"e2e user {body.email!r} not seeded"
+        )
+    token = await get_jwt_strategy().write_token(user)
+    return MintResponse(access_token=token)
+
+
+def install(app: FastAPI) -> None:
+    """Mount the test-only mint router onto the given FastAPI app."""
+    app.include_router(router)
+    _logger.warning("[e2e] mounted POST /__e2e__/auth/token (test-only token mint)")
--- a/surfsense_backend/tests/e2e/fakes/docling_service.py
+++ b/surfsense_backend/tests/e2e/fakes/docling_service.py
@ -0,0 +1,141 @@
+"""Stub DoclingService.process_document for E2E.
+
+The real ``DoclingService.process_document`` calls
+``DocumentConverter.convert(file_path)`` which lazily downloads the
+``docling-project/docling-layout-heron`` model from Hugging Face Hub.
+The hermetic E2E container sets ``HF_HUB_OFFLINE=1`` (see
+``docker/docker-compose.e2e.yml``), so that download fails with
+``LocalEntryNotFoundError`` and the indexing Celery task retries until
+the Playwright test hits its ~4-minute step timeout. In CI that is the
+difference between the suite finishing and the 30-minute job timeout
+killing the run before any report can upload.
+
+Stubbing ``process_document`` bypasses ``DocumentConverter.convert()``
+entirely. ``DoclingService.__init__`` is intentionally left untouched
+because constructing ``DocumentConverter(...)`` is cheap and offline —
+it is only ``.convert()`` that triggers the offline-model download.
+
+Every canary PDF under ``tests/e2e/fakes/fixtures/binary/`` is produced
+by ``generate_canary_pdfs.py`` and embeds its canary token as plain
+``(text) Tj`` PDF text operators. Extracting those operators gives us
+the canary string back, which is what the Playwright assertions look
+for in the resulting Document row.
+"""
+
+from __future__ import annotations
+
+import logging
+import re
+from pathlib import Path
+from typing import Any
+
+logger = logging.getLogger(__name__)
+
+# Matches the `(escaped text) Tj` text-show operator emitted by
+# generate_canary_pdfs.py. Inside the parens, the escape rules are:
+#   \\  -> backslash
+#   \(  -> literal (
+#   \)  -> literal )
+# The character class [^\\()] consumes any non-escape byte; \\. consumes
+# an escape sequence. Sufficient for our synthetic fixtures.
+_TJ_PATTERN = re.compile(rb"\(((?:[^\\()]|\\.)*)\)\s*Tj")
+
+
+def _extract_text_from_synthetic_pdf(file_path: str) -> str:
+    """Pull every ``(text) Tj`` payload out of a fixture PDF in order.
+
+    Returns an empty string if the file cannot be read. We do not try to
+    handle arbitrary PDFs because the fake is only ever invoked against
+    fixtures we generate ourselves.
+    """
+    try:
+        data = Path(file_path).read_bytes()
+    except OSError as exc:
+        logger.warning("[fake-docling] could not read %s: %s", file_path, exc)
+        return ""
+
+    lines: list[str] = []
+    for match in _TJ_PATTERN.finditer(data):
+        raw = match.group(1)
+        # Order-sensitive unescape via sentinel: protect `\\` first so
+        # the subsequent `\(` / `\)` passes do not corrupt it.
+        text = (
+            raw.replace(rb"\\", b"\x00")
+            .replace(rb"\(", b"(")
+            .replace(rb"\)", b")")
+            .replace(b"\x00", b"\\")
+        )
+        try:
+            lines.append(text.decode("utf-8"))
+        except UnicodeDecodeError:
+            lines.append(text.decode("latin-1"))
+    return "\n".join(lines)
+
+
+async def fake_process_document(
+    self,
+    file_path: str,
+    filename: str | None = None,
+) -> dict[str, Any]:
+    """Drop-in replacement for ``DoclingService.process_document``.
+
+    Returns the same dict shape as the production method so callers
+    (``app/etl_pipeline/parsers/docling.py``) can keep reading
+    ``result["content"]`` without changes.
+    """
+    extracted = _extract_text_from_synthetic_pdf(file_path)
+    display_name = filename or Path(file_path).name
+
+    if extracted:
+        content = f"# {display_name}\n\n{extracted}\n"
+    else:
+        # Empty fallback so the indexing pipeline does not error out on
+        # an unexpected payload. A failing canary assertion is a much
+        # clearer failure mode than a hard parser exception.
+        content = (
+            f"# {display_name}\n\n(empty docling fake — no text-show operators found)\n"
+        )
+
+    logger.info(
+        "[fake-docling] returning %d chars for %s",
+        len(content),
+        display_name,
+    )
+
+    return {
+        "content": content,
+        "full_text": content,
+        "service_used": "docling-fake",
+        "status": "success",
+        "processing_notes": "e2e fake DoclingService — no real PDF parsing",
+    }
+
+
+def install(patches: list[Any]) -> None:
+    """Patch ``DoclingService.process_document`` at the class level.
+
+    Patching the class method (rather than each call site) is correct
+    here because every consumer goes through
+    ``create_docling_service()`` → ``DoclingService()`` → instance method
+    dispatch, so the descriptor protocol picks up our replacement. There
+    is exactly one such consumer today
+    (``app/etl_pipeline/parsers/docling.py``), but patching the class is
+    future-proof.
+
+    Fails loud rather than warning, because a silent passthrough means
+    real Docling + ``HF_HUB_OFFLINE=1`` = 4 minutes of CI hang per test.
+    """
+    from unittest.mock import patch as _patch
+
+    target = "app.services.docling_service.DoclingService.process_document"
+    try:
+        p = _patch(target, fake_process_document)
+        p.start()
+        patches.append(p)
+        logger.info("[fake-docling] patched %s", target)
+    except (ModuleNotFoundError, AttributeError) as exc:
+        raise RuntimeError(
+            f"Could not patch Docling binding {target!r}: {exc!s}. "
+            f"Update surfsense_backend/tests/e2e/fakes/docling_service.py "
+            f"to point at the new binding site."
+        ) from exc
--- a/surfsense_backend/tests/e2e/fixtures/global_llm_config.yaml
+++ b/surfsense_backend/tests/e2e/fixtures/global_llm_config.yaml
@ -0,0 +1,71 @@
+# Synthetic Global LLM configuration for E2E ONLY.
+#
+# Why this file exists:
+#   surfsense_backend/app/config/global_llm_config.yaml is gitignored
+#   (operators ship real API keys there). In CI that file does not exist,
+#   so app.config.load_global_llm_configs() returns [], every chat-stream
+#   test fails fast with "No usable global LLM configs are available for
+#   Auto mode" raised by auto_model_pin_service._global_candidates().
+#
+# What this file does:
+#   tests/e2e/run_backend.py and tests/e2e/run_celery.py copy this file
+#   to app/config/global_llm_config.yaml at startup, BEFORE app.config
+#   is imported. The copy lives only inside the E2E Docker container.
+#
+# Why a fake api_key is safe:
+#   tests.e2e.fakes.chat_llm patches
+#     app.tasks.chat.stream_new_chat.create_chat_litellm_from_agent_config
+#     app.tasks.chat.stream_new_chat.create_chat_litellm_from_config
+#   so the resolved auto-pin id is never sent to a real LLM provider.
+#   The values below only need to pass
+#   auto_model_pin_service._is_usable_global_config()
+#   which requires id / model_name / provider / api_key all truthy.
+#
+# Why TWO entries (premium + free):
+#   auto_model_pin_service.resolve_or_get_pinned_llm_config_id() splits
+#   candidates by billing_tier based on _is_premium_eligible(user):
+#     premium_eligible == True  -> keeps only tier=="premium" configs
+#     premium_eligible == False -> keeps only tier!="premium" configs
+#   A single-tier fixture would fail one of the two branches with
+#   "Auto mode could not find an eligible LLM config for this user and
+#   quota state". Shipping one of each guarantees every quota state
+#   resolves to a viable pin in E2E.
+
+router_settings:
+  routing_strategy: "simple-shuffle"
+  num_retries: 0
+  allowed_fails: 1
+  cooldown_time: 1
+
+global_llm_configs:
+  - id: -9001
+    name: "E2E Fake Auto Model (premium)"
+    billing_tier: "premium"
+    anonymous_enabled: false
+    seo_enabled: false
+    quality_score: 1.0
+    provider: "OPENAI"
+    model_name: "fake-e2e-model-premium"
+    api_key: "fake-e2e-api-key-not-for-production"
+    supports_image_input: false
+    quota_reserve_tokens: 1024
+    rpm: 1000
+    tpm: 100000
+    litellm_params:
+      model: "openai/fake-e2e-model-premium"
+
+  - id: -9002
+    name: "E2E Fake Auto Model (free)"
+    billing_tier: "free"
+    anonymous_enabled: false
+    seo_enabled: false
+    quality_score: 1.0
+    provider: "OPENAI"
+    model_name: "fake-e2e-model-free"
+    api_key: "fake-e2e-api-key-not-for-production"
+    supports_image_input: false
+    quota_reserve_tokens: 1024
+    rpm: 1000
+    tpm: 100000
+    litellm_params:
+      model: "openai/fake-e2e-model-free"
--- a/surfsense_backend/tests/e2e/run_backend.py
+++ b/surfsense_backend/tests/e2e/run_backend.py
@ -23,15 +23,12 @@ Usage:

 from __future__ import annotations

+import asyncio
 import logging
 import os
 import sys

-# ---------------------------------------------------------------------------
-# 1) Hijack sys.modules BEFORE any production import.
-#    Production: composio_service.py:11 does `from composio import Composio`.
-#    With this hijack in place, that import resolves to our strict fake.
-# ---------------------------------------------------------------------------
+import uvicorn

 # Make the surfsense_backend root importable as a top-level package so
 # `import tests.e2e.fakes...` works regardless of how the entrypoint is
@ -42,97 +39,175 @@ _BACKEND_ROOT = os.path.abspath(os.path.join(_THIS_DIR, "..", ".."))
 if _BACKEND_ROOT not in sys.path:
    sys.path.insert(0, _BACKEND_ROOT)

-import tests.e2e.fakes.composio_module as _fake_composio  # noqa: E402
-import tests.e2e.fakes.notion_module as _fake_notion  # noqa: E402

-sys.modules["composio"] = _fake_composio
-sys.modules["notion_client"] = _fake_notion
-sys.modules["notion_client.errors"] = _fake_notion.errors
-
-
-# ---------------------------------------------------------------------------
-# 2) Standard logging + dotenv so the rest of the app behaves like main.py.
-# ---------------------------------------------------------------------------
-
-from dotenv import load_dotenv  # noqa: E402
-
-load_dotenv()
-os.environ.setdefault("ATLASSIAN_CLIENT_ID", "fake-atlassian-client-id")
-os.environ.setdefault("ATLASSIAN_CLIENT_SECRET", "fake-atlassian-client-secret")
-os.environ.setdefault(
-    "CONFLUENCE_REDIRECT_URI",
-    "http://localhost:8000/api/v1/auth/confluence/connector/callback",
-)
-os.environ.setdefault("NOTION_CLIENT_ID", "fake-notion-client-id")
-os.environ.setdefault("NOTION_CLIENT_SECRET", "fake-notion-client-secret")
-os.environ.setdefault(
-    "NOTION_REDIRECT_URI",
-    "http://localhost:8000/api/v1/auth/notion/connector/callback",
-)
-os.environ.setdefault("MICROSOFT_CLIENT_ID", "fake-microsoft-client-id")
-os.environ.setdefault("MICROSOFT_CLIENT_SECRET", "fake-microsoft-client-secret")
-os.environ.setdefault(
-    "ONEDRIVE_REDIRECT_URI",
-    "http://localhost:8000/api/v1/auth/onedrive/connector/callback",
-)
-os.environ.setdefault("DROPBOX_APP_KEY", "fake-dropbox-app-key")
-os.environ.setdefault("DROPBOX_APP_SECRET", "fake-dropbox-app-secret")
-os.environ.setdefault(
-    "DROPBOX_REDIRECT_URI",
-    "http://localhost:8000/api/v1/auth/dropbox/connector/callback",
-)
-os.environ["SLACK_CLIENT_ID"] = "fake-slack-mcp-client-id"
-os.environ["SLACK_CLIENT_SECRET"] = "fake-slack-mcp-client-secret"
-
-logging.basicConfig(
-    level=logging.INFO,
-    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
-    datefmt="%Y-%m-%d %H:%M:%S",
-)
 logger = logging.getLogger("surfsense.e2e.backend")
-logger.warning(
-    "*** SURFSENSE E2E BACKEND ENTRYPOINT — fake Composio + LLM + embeddings ***"
-)
-
-
-# ---------------------------------------------------------------------------
-# 3) Now import the production app. Every module in app.* loads here,
-#    creating their bindings (some of which we will patch in step 4).
-# ---------------------------------------------------------------------------
-
-# ---------------------------------------------------------------------------
-# 4) Patch LLM + embedding bindings at every consumer site.
-#    Composio is already covered by the sys.modules hijack in step 1.
-# ---------------------------------------------------------------------------
-from unittest.mock import patch  # noqa: E402
-
-from app.app import app  # noqa: E402
-from tests.e2e.fakes import (  # noqa: E402
-    clickup_module as _fake_clickup_module,
-    confluence_indexer as _fake_confluence_indexer,
-    confluence_oauth as _fake_confluence_oauth,
-    dropbox_api as _fake_dropbox_api,
-    embeddings as _fake_embeddings,
-    jira_module as _fake_jira_module,
-    linear_module as _fake_linear_module,
-    mcp_oauth_runtime as _fake_mcp_oauth_runtime,
-    mcp_runtime as _fake_mcp_runtime,
-    native_google as _fake_native_google,
-    notion_module as _fake_notion_module,
-    onedrive_graph as _fake_onedrive_graph,
-    slack_module as _fake_slack_module,
-)
-from tests.e2e.fakes.chat_llm import (  # noqa: E402
-    fake_create_chat_litellm_from_agent_config,
-    fake_create_chat_litellm_from_config,
-)
-from tests.e2e.fakes.llm import fake_get_user_long_context_llm  # noqa: E402

+# Patches started during bootstrap are kept alive for the lifetime of the
+# process. We never call .stop() on them.
 _active_patches: list = []


+def _hijack_external_sdks() -> None:
+    """Replace composio + notion_client in sys.modules.
+
+    Production does ``from composio import Composio`` and
+    ``import notion_client`` at import time. With this hijack in place,
+    those imports resolve to our strict fakes.
+
+    MUST run before _import_production_app().
+    """
+    import tests.e2e.fakes.composio_module as _fake_composio
+    import tests.e2e.fakes.notion_module as _fake_notion
+
+    sys.modules["composio"] = _fake_composio
+    sys.modules["notion_client"] = _fake_notion
+    sys.modules["notion_client.errors"] = _fake_notion.errors
+
+
+def _load_dotenv_and_set_env_defaults() -> None:
+    """Load .env and set every env var the production config reads on import.
+
+    MUST run before _import_production_app(), since app.config consumes
+    these values at import time.
+    """
+    from dotenv import load_dotenv
+
+    load_dotenv()
+
+    os.environ.setdefault(
+        "DATABASE_URL",
+        "postgresql+asyncpg://postgres:postgres@localhost:5432/surfsense",
+    )
+    os.environ.setdefault("CELERY_BROKER_URL", "redis://localhost:6379/0")
+    os.environ.setdefault("CELERY_RESULT_BACKEND", "redis://localhost:6379/0")
+    os.environ.setdefault("REDIS_APP_URL", "redis://localhost:6379/0")
+    os.environ.setdefault("CELERY_TASK_DEFAULT_QUEUE", "surfsense")
+    os.environ.setdefault("SECRET_KEY", "local-e2e-secret-not-for-production")
+    os.environ.setdefault("AUTH_TYPE", "LOCAL")
+    os.environ.setdefault("REGISTRATION_ENABLED", "TRUE")
+    os.environ.setdefault("ETL_SERVICE", "DOCLING")
+    os.environ.setdefault("EMBEDDING_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
+    os.environ.setdefault("NEXT_FRONTEND_URL", "http://localhost:3000")
+
+    # Sentinel keys — fakes never read them; turns leaked real calls into 401s.
+    os.environ.setdefault("COMPOSIO_API_KEY", "local-deny-real-call-sentinel")
+    os.environ.setdefault("COMPOSIO_ENABLED", "TRUE")
+    os.environ.setdefault("OPENAI_API_KEY", "local-deny-real-call-sentinel")
+    os.environ.setdefault("ANTHROPIC_API_KEY", "local-deny-real-call-sentinel")
+    os.environ.setdefault("LITELLM_API_KEY", "local-deny-real-call-sentinel")
+
+    os.environ.setdefault("ATLASSIAN_CLIENT_ID", "fake-atlassian-client-id")
+    os.environ.setdefault("ATLASSIAN_CLIENT_SECRET", "fake-atlassian-client-secret")
+    os.environ.setdefault(
+        "CONFLUENCE_REDIRECT_URI",
+        "http://localhost:8000/api/v1/auth/confluence/connector/callback",
+    )
+    os.environ.setdefault("NOTION_CLIENT_ID", "fake-notion-client-id")
+    os.environ.setdefault("NOTION_CLIENT_SECRET", "fake-notion-client-secret")
+    os.environ.setdefault(
+        "NOTION_REDIRECT_URI",
+        "http://localhost:8000/api/v1/auth/notion/connector/callback",
+    )
+    os.environ.setdefault("MICROSOFT_CLIENT_ID", "fake-microsoft-client-id")
+    os.environ.setdefault("MICROSOFT_CLIENT_SECRET", "fake-microsoft-client-secret")
+    os.environ.setdefault(
+        "ONEDRIVE_REDIRECT_URI",
+        "http://localhost:8000/api/v1/auth/onedrive/connector/callback",
+    )
+    os.environ.setdefault("DROPBOX_APP_KEY", "fake-dropbox-app-key")
+    os.environ.setdefault("DROPBOX_APP_SECRET", "fake-dropbox-app-secret")
+    os.environ.setdefault(
+        "DROPBOX_REDIRECT_URI",
+        "http://localhost:8000/api/v1/auth/dropbox/connector/callback",
+    )
+    # Native Google OAuth — fake Flow in tests.e2e.fakes.native_google
+    # raises "Fake Google Flow requires redirect_uri." if these are empty,
+    # so connector/add routes return 500 in CI where no .env supplies them.
+    os.environ.setdefault(
+        "GOOGLE_DRIVE_REDIRECT_URI",
+        "http://localhost:8000/api/v1/auth/google/drive/connector/callback",
+    )
+    os.environ.setdefault(
+        "GOOGLE_GMAIL_REDIRECT_URI",
+        "http://localhost:8000/api/v1/auth/google/gmail/connector/callback",
+    )
+    os.environ.setdefault(
+        "GOOGLE_CALENDAR_REDIRECT_URI",
+        "http://localhost:8000/api/v1/auth/google/calendar/connector/callback",
+    )
+    os.environ["SLACK_CLIENT_ID"] = "fake-slack-mcp-client-id"
+    os.environ["SLACK_CLIENT_SECRET"] = "fake-slack-mcp-client-secret"
+
+
+def _install_synthetic_global_llm_config() -> None:
+    """Materialise a fake ``app/config/global_llm_config.yaml`` for E2E.
+
+    The real file is gitignored (production operators ship their own with
+    real API keys), so a fresh CI checkout has no YAML at the path
+    ``app.config.load_global_llm_configs()`` reads. With an empty
+    ``GLOBAL_LLM_CONFIGS`` list, ``auto_model_pin_service`` raises
+    ``"No usable global LLM configs are available for Auto mode"`` on
+    every chat-stream request.
+
+    We copy the synthetic fixture from ``tests/e2e/fixtures/`` into the
+    production-expected location BEFORE ``_import_production_app()`` so
+    ``app.config`` picks it up on import. Production code is untouched —
+    this is purely a test-time scaffold.
+
+    Only installs when the destination is missing. A developer running
+    the E2E entrypoint locally keeps their real ``global_llm_config.yaml``
+    intact (the patched ``create_chat_litellm_from_*`` factories make the
+    actual model values irrelevant either way).
+
+    MUST run before _import_production_app().
+    """
+    import shutil
+
+    src = os.path.join(_THIS_DIR, "fixtures", "global_llm_config.yaml")
+    dst = os.path.join(_BACKEND_ROOT, "app", "config", "global_llm_config.yaml")
+
+    if not os.path.exists(src):
+        raise RuntimeError(
+            f"E2E synthetic global LLM config fixture missing at {src!r}. "
+            f"This file is checked into tests/e2e/fixtures/ — if it has gone "
+            f"missing, restore it from VCS before running the E2E entrypoint."
+        )
+
+    if os.path.exists(dst):
+        logger.info(
+            "[e2e-global-llm-config] %s already exists; leaving it alone "
+            "(local dev config preserved)",
+            dst,
+        )
+        return
+
+    os.makedirs(os.path.dirname(dst), exist_ok=True)
+    shutil.copyfile(src, dst)
+    logger.info("[e2e-global-llm-config] installed %s -> %s", src, dst)
+
+
+def _import_production_app():
+    """Import and return the production FastAPI app.
+
+    Every module under ``app.*`` loads here, creating their bindings.
+    The LLM/embedding factories captured at this point will be replaced
+    by patches in _patch_llm_bindings() below.
+    """
+    from app.app import app as production_app
+
+    return production_app
+
+
 def _patch_llm_bindings() -> None:
    """Replace LLM factories at every known binding site."""
+    from unittest.mock import patch
+
+    from tests.e2e.fakes.chat_llm import (
+        fake_create_chat_litellm_from_agent_config,
+        fake_create_chat_litellm_from_config,
+    )
+    from tests.e2e.fakes.llm import fake_get_user_long_context_llm
+
    targets = [
        "app.services.llm_service.get_user_long_context_llm",
        "app.tasks.connector_indexers.confluence_indexer.get_user_long_context_llm",
@ -190,38 +265,90 @@ def _patch_llm_bindings() -> None:
            logger.warning("[fake-chat-llm] could not patch %s: %s.", target, exc)


-_patch_llm_bindings()
-_fake_embeddings.install(_active_patches)
-_fake_confluence_oauth.install(_active_patches)
-_fake_confluence_indexer.install(_active_patches)
-_fake_native_google.install(_active_patches)
-_fake_onedrive_graph.install(_active_patches)
-_fake_dropbox_api.install(_active_patches)
-_fake_notion_module.install(_active_patches)
-_fake_linear_module.install(_active_patches)
-_fake_jira_module.install(_active_patches)
-_fake_clickup_module.install(_active_patches)
-_fake_mcp_runtime.install(_active_patches)
-_fake_mcp_oauth_runtime.install(_active_patches)
-_fake_slack_module.install(_active_patches)
+def _install_runtime_fakes() -> None:
+    """Run each fake's install() against the active patch stack."""
+    from tests.e2e.fakes import (
+        clickup_module as _fake_clickup_module,
+        confluence_indexer as _fake_confluence_indexer,
+        confluence_oauth as _fake_confluence_oauth,
+        docling_service as _fake_docling_service,
+        dropbox_api as _fake_dropbox_api,
+        embeddings as _fake_embeddings,
+        jira_module as _fake_jira_module,
+        linear_module as _fake_linear_module,
+        mcp_oauth_runtime as _fake_mcp_oauth_runtime,
+        mcp_runtime as _fake_mcp_runtime,
+        native_google as _fake_native_google,
+        notion_module as _fake_notion_module,
+        onedrive_graph as _fake_onedrive_graph,
+        slack_module as _fake_slack_module,
+    )
+
+    _fake_embeddings.install(_active_patches)
+    _fake_docling_service.install(_active_patches)
+    _fake_confluence_oauth.install(_active_patches)
+    _fake_confluence_indexer.install(_active_patches)
+    _fake_native_google.install(_active_patches)
+    _fake_onedrive_graph.install(_active_patches)
+    _fake_dropbox_api.install(_active_patches)
+    _fake_notion_module.install(_active_patches)
+    _fake_linear_module.install(_active_patches)
+    _fake_jira_module.install(_active_patches)
+    _fake_clickup_module.install(_active_patches)
+    _fake_mcp_runtime.install(_active_patches)
+    _fake_mcp_oauth_runtime.install(_active_patches)
+    _fake_slack_module.install(_active_patches)


-# ---------------------------------------------------------------------------
-# 5) Mount test-only middleware. Production never reaches this code.
-# ---------------------------------------------------------------------------
+def _install_test_only_app_extensions(app) -> None:
+    """Mount test-only middleware + the /__e2e__ token mint router.

-from tests.e2e.middleware.scenario import ScenarioMiddleware  # noqa: E402
+    POST /__e2e__/auth/token bypasses /auth/jwt/login's 5/min/IP rate
+    limit so Playwright workers can authenticate without thrashing the
+    production auth surface. See tests/e2e/auth_mint.py.
+    """
+    from tests.e2e.auth_mint import install as install_e2e_mint
+    from tests.e2e.middleware.scenario import ScenarioMiddleware

-app.add_middleware(ScenarioMiddleware)
+    app.add_middleware(ScenarioMiddleware)
+    install_e2e_mint(app)


-# ---------------------------------------------------------------------------
-# 6) Start uvicorn, mirroring main.py's behaviour.
-# ---------------------------------------------------------------------------
+def _bootstrap():
+    """Run the full E2E bootstrap and return the production FastAPI app.

-import asyncio  # noqa: E402
+    Ordering is load-bearing:
+      1) Hijack composio + notion_client in sys.modules.
+      2) Load .env + set env defaults (app.config reads env on import).
+      3) Configure logging.
+      4) Materialise the synthetic global_llm_config.yaml so Auto-mode
+         pin resolution finds at least one usable candidate.
+      5) Import production app (which transitively imports the now-faked
+         external SDKs and reads the env defaults + YAML).
+      6) Patch LLM / embedding bindings at every consumer site.
+      7) Mount test-only middleware + /__e2e__ routes onto the app.
+    """
+    _hijack_external_sdks()
+    _load_dotenv_and_set_env_defaults()

-import uvicorn  # noqa: E402
+    logging.basicConfig(
+        level=logging.INFO,
+        format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+        datefmt="%Y-%m-%d %H:%M:%S",
+    )
+    logger.warning(
+        "*** SURFSENSE E2E BACKEND ENTRYPOINT — fake Composio + LLM + embeddings ***"
+    )
+
+    _install_synthetic_global_llm_config()
+    production_app = _import_production_app()
+    _patch_llm_bindings()
+    _install_runtime_fakes()
+    _install_test_only_app_extensions(production_app)
+    return production_app
+
+
+app = _bootstrap()


 def _main() -> None:
--- a/surfsense_backend/tests/e2e/run_celery.py
+++ b/surfsense_backend/tests/e2e/run_celery.py
@ -25,96 +25,166 @@ if _BACKEND_ROOT not in sys.path:
    sys.path.insert(0, _BACKEND_ROOT)


-# ---------------------------------------------------------------------------
-# 1) Hijack sys.modules BEFORE production celery imports anything.
-# ---------------------------------------------------------------------------
-
-import tests.e2e.fakes.composio_module as _fake_composio  # noqa: E402
-import tests.e2e.fakes.notion_module as _fake_notion  # noqa: E402
-
-sys.modules["composio"] = _fake_composio
-sys.modules["notion_client"] = _fake_notion
-sys.modules["notion_client.errors"] = _fake_notion.errors
-
-
-# ---------------------------------------------------------------------------
-# 2) Logging + dotenv.
-# ---------------------------------------------------------------------------
-
-from dotenv import load_dotenv  # noqa: E402
-
-load_dotenv()
-os.environ.setdefault("ATLASSIAN_CLIENT_ID", "fake-atlassian-client-id")
-os.environ.setdefault("ATLASSIAN_CLIENT_SECRET", "fake-atlassian-client-secret")
-os.environ.setdefault(
-    "CONFLUENCE_REDIRECT_URI",
-    "http://localhost:8000/api/v1/auth/confluence/connector/callback",
-)
-os.environ.setdefault("NOTION_CLIENT_ID", "fake-notion-client-id")
-os.environ.setdefault("NOTION_CLIENT_SECRET", "fake-notion-client-secret")
-os.environ.setdefault(
-    "NOTION_REDIRECT_URI",
-    "http://localhost:8000/api/v1/auth/notion/connector/callback",
-)
-os.environ.setdefault("MICROSOFT_CLIENT_ID", "fake-microsoft-client-id")
-os.environ.setdefault("MICROSOFT_CLIENT_SECRET", "fake-microsoft-client-secret")
-os.environ.setdefault(
-    "ONEDRIVE_REDIRECT_URI",
-    "http://localhost:8000/api/v1/auth/onedrive/connector/callback",
-)
-os.environ.setdefault("DROPBOX_APP_KEY", "fake-dropbox-app-key")
-os.environ.setdefault("DROPBOX_APP_SECRET", "fake-dropbox-app-secret")
-os.environ.setdefault(
-    "DROPBOX_REDIRECT_URI",
-    "http://localhost:8000/api/v1/auth/dropbox/connector/callback",
-)
-os.environ["SLACK_CLIENT_ID"] = "fake-slack-mcp-client-id"
-os.environ["SLACK_CLIENT_SECRET"] = "fake-slack-mcp-client-secret"
-
-logging.basicConfig(
-    level=logging.INFO,
-    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
-    datefmt="%Y-%m-%d %H:%M:%S",
-)
 logger = logging.getLogger("surfsense.e2e.celery")
-logger.warning("*** SURFSENSE E2E CELERY WORKER — fake Composio + LLM + embeddings ***")
-
-
-# ---------------------------------------------------------------------------
-# 3) Import the production celery_app. All task modules load here.
-# ---------------------------------------------------------------------------
-
-# ---------------------------------------------------------------------------
-# 4) Patch LLM + embedding bindings inside the worker process.
-# ---------------------------------------------------------------------------
-from unittest.mock import patch  # noqa: E402
-
-from app.celery_app import celery_app  # noqa: E402
-from tests.e2e.fakes import (  # noqa: E402
-    clickup_module as _fake_clickup_module,
-    confluence_indexer as _fake_confluence_indexer,
-    confluence_oauth as _fake_confluence_oauth,
-    dropbox_api as _fake_dropbox_api,
-    embeddings as _fake_embeddings,
-    jira_module as _fake_jira_module,
-    linear_module as _fake_linear_module,
-    mcp_oauth_runtime as _fake_mcp_oauth_runtime,
-    mcp_runtime as _fake_mcp_runtime,
-    native_google as _fake_native_google,
-    notion_module as _fake_notion_module,
-    onedrive_graph as _fake_onedrive_graph,
-    slack_module as _fake_slack_module,
-)
-from tests.e2e.fakes.chat_llm import (  # noqa: E402
-    fake_create_chat_litellm_from_agent_config,
-    fake_create_chat_litellm_from_config,
-)
-from tests.e2e.fakes.llm import fake_get_user_long_context_llm  # noqa: E402

+# Patches started during bootstrap are kept alive for the lifetime of the
+# process. We never call .stop() on them.
 _active_patches: list = []


+def _hijack_external_sdks() -> None:
+    """Replace composio + notion_client in sys.modules.
+
+    Production does ``from composio import Composio`` and
+    ``import notion_client`` at import time. With this hijack in place,
+    those imports resolve to our strict fakes.
+
+    MUST run before _import_celery_app().
+    """
+    import tests.e2e.fakes.composio_module as _fake_composio
+    import tests.e2e.fakes.notion_module as _fake_notion
+
+    sys.modules["composio"] = _fake_composio
+    sys.modules["notion_client"] = _fake_notion
+    sys.modules["notion_client.errors"] = _fake_notion.errors
+
+
+def _load_dotenv_and_set_env_defaults() -> None:
+    """Load .env and set every env var the production config reads on import.
+
+    MUST run before _import_celery_app(), since app.config consumes
+    these values at import time.
+    """
+    from dotenv import load_dotenv
+
+    load_dotenv()
+
+    os.environ.setdefault(
+        "DATABASE_URL",
+        "postgresql+asyncpg://postgres:postgres@localhost:5432/surfsense",
+    )
+    os.environ.setdefault("CELERY_BROKER_URL", "redis://localhost:6379/0")
+    os.environ.setdefault("CELERY_RESULT_BACKEND", "redis://localhost:6379/0")
+    os.environ.setdefault("REDIS_APP_URL", "redis://localhost:6379/0")
+    os.environ.setdefault("CELERY_TASK_DEFAULT_QUEUE", "surfsense")
+    os.environ.setdefault("SECRET_KEY", "local-e2e-secret-not-for-production")
+    os.environ.setdefault("AUTH_TYPE", "LOCAL")
+    os.environ.setdefault("REGISTRATION_ENABLED", "TRUE")
+    os.environ.setdefault("ETL_SERVICE", "DOCLING")
+    os.environ.setdefault("EMBEDDING_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
+    os.environ.setdefault("NEXT_FRONTEND_URL", "http://localhost:3000")
+
+    # Sentinel keys — fakes never read them; turns leaked real calls into 401s.
+    os.environ.setdefault("COMPOSIO_API_KEY", "local-deny-real-call-sentinel")
+    os.environ.setdefault("COMPOSIO_ENABLED", "TRUE")
+    os.environ.setdefault("OPENAI_API_KEY", "local-deny-real-call-sentinel")
+    os.environ.setdefault("ANTHROPIC_API_KEY", "local-deny-real-call-sentinel")
+    os.environ.setdefault("LITELLM_API_KEY", "local-deny-real-call-sentinel")
+
+    os.environ.setdefault("ATLASSIAN_CLIENT_ID", "fake-atlassian-client-id")
+    os.environ.setdefault("ATLASSIAN_CLIENT_SECRET", "fake-atlassian-client-secret")
+    os.environ.setdefault(
+        "CONFLUENCE_REDIRECT_URI",
+        "http://localhost:8000/api/v1/auth/confluence/connector/callback",
+    )
+    os.environ.setdefault("NOTION_CLIENT_ID", "fake-notion-client-id")
+    os.environ.setdefault("NOTION_CLIENT_SECRET", "fake-notion-client-secret")
+    os.environ.setdefault(
+        "NOTION_REDIRECT_URI",
+        "http://localhost:8000/api/v1/auth/notion/connector/callback",
+    )
+    os.environ.setdefault("MICROSOFT_CLIENT_ID", "fake-microsoft-client-id")
+    os.environ.setdefault("MICROSOFT_CLIENT_SECRET", "fake-microsoft-client-secret")
+    os.environ.setdefault(
+        "ONEDRIVE_REDIRECT_URI",
+        "http://localhost:8000/api/v1/auth/onedrive/connector/callback",
+    )
+    os.environ.setdefault("DROPBOX_APP_KEY", "fake-dropbox-app-key")
+    os.environ.setdefault("DROPBOX_APP_SECRET", "fake-dropbox-app-secret")
+    os.environ.setdefault(
+        "DROPBOX_REDIRECT_URI",
+        "http://localhost:8000/api/v1/auth/dropbox/connector/callback",
+    )
+    # Native Google OAuth — fake Flow in tests.e2e.fakes.native_google raises
+    # "Fake Google Flow requires redirect_uri." when these are empty.
+    os.environ.setdefault(
+        "GOOGLE_DRIVE_REDIRECT_URI",
+        "http://localhost:8000/api/v1/auth/google/drive/connector/callback",
+    )
+    os.environ.setdefault(
+        "GOOGLE_GMAIL_REDIRECT_URI",
+        "http://localhost:8000/api/v1/auth/google/gmail/connector/callback",
+    )
+    os.environ.setdefault(
+        "GOOGLE_CALENDAR_REDIRECT_URI",
+        "http://localhost:8000/api/v1/auth/google/calendar/connector/callback",
+    )
+    os.environ["SLACK_CLIENT_ID"] = "fake-slack-mcp-client-id"
+    os.environ["SLACK_CLIENT_SECRET"] = "fake-slack-mcp-client-secret"
+
+
+def _install_synthetic_global_llm_config() -> None:
+    """Materialise a fake ``app/config/global_llm_config.yaml`` for E2E.
+
+    The real file is gitignored (production operators ship their own with
+    real API keys), so a fresh CI checkout has no YAML at the path
+    ``app.config.load_global_llm_configs()`` reads. With an empty
+    ``GLOBAL_LLM_CONFIGS`` list, the worker's view of the config diverges
+    from the API container.
+
+    We copy the synthetic fixture from ``tests/e2e/fixtures/`` into the
+    production-expected location BEFORE _import_celery_app() so
+    ``app.config`` picks it up on import. Install-only-if-missing so a
+    developer's local config (with real API keys) is preserved.
+
+    MUST run before _import_celery_app().
+    """
+    import shutil
+
+    src = os.path.join(_THIS_DIR, "fixtures", "global_llm_config.yaml")
+    dst = os.path.join(_BACKEND_ROOT, "app", "config", "global_llm_config.yaml")
+
+    if not os.path.exists(src):
+        raise RuntimeError(
+            f"E2E synthetic global LLM config fixture missing at {src!r}. "
+            f"Restore tests/e2e/fixtures/global_llm_config.yaml from VCS."
+        )
+
+    if os.path.exists(dst):
+        logger.info(
+            "[e2e-global-llm-config] %s already exists; leaving it alone "
+            "(local dev config preserved)",
+            dst,
+        )
+        return
+
+    os.makedirs(os.path.dirname(dst), exist_ok=True)
+    shutil.copyfile(src, dst)
+    logger.info("[e2e-global-llm-config] installed %s -> %s", src, dst)
+
+
+def _import_celery_app():
+    """Import and return the production Celery app.
+
+    Every module under ``app.*`` (including all task modules) loads here,
+    creating their bindings. The LLM/embedding factories captured at this
+    point will be replaced by patches in _patch_llm_bindings() below.
+    """
+    from app.celery_app import celery_app
+
+    return celery_app
+
+
 def _patch_llm_bindings() -> None:
+    """Replace LLM factories at every known binding site in worker tasks."""
+    from unittest.mock import patch
+
+    from tests.e2e.fakes.chat_llm import (
+        fake_create_chat_litellm_from_agent_config,
+        fake_create_chat_litellm_from_config,
+    )
+    from tests.e2e.fakes.llm import fake_get_user_long_context_llm
+
    targets = [
        "app.services.llm_service.get_user_long_context_llm",
        "app.tasks.connector_indexers.confluence_indexer.get_user_long_context_llm",
@ -172,38 +242,93 @@ def _patch_llm_bindings() -> None:
            )


-_patch_llm_bindings()
-_fake_embeddings.install(_active_patches)
-_fake_confluence_oauth.install(_active_patches)
-_fake_confluence_indexer.install(_active_patches)
-_fake_native_google.install(_active_patches)
-_fake_onedrive_graph.install(_active_patches)
-_fake_dropbox_api.install(_active_patches)
-_fake_notion_module.install(_active_patches)
-_fake_linear_module.install(_active_patches)
-_fake_jira_module.install(_active_patches)
-_fake_clickup_module.install(_active_patches)
-_fake_mcp_runtime.install(_active_patches)
-_fake_mcp_oauth_runtime.install(_active_patches)
-_fake_slack_module.install(_active_patches)
+def _install_runtime_fakes() -> None:
+    """Run each fake's install() against the active patch stack."""
+    from tests.e2e.fakes import (
+        clickup_module as _fake_clickup_module,
+        confluence_indexer as _fake_confluence_indexer,
+        confluence_oauth as _fake_confluence_oauth,
+        docling_service as _fake_docling_service,
+        dropbox_api as _fake_dropbox_api,
+        embeddings as _fake_embeddings,
+        jira_module as _fake_jira_module,
+        linear_module as _fake_linear_module,
+        mcp_oauth_runtime as _fake_mcp_oauth_runtime,
+        mcp_runtime as _fake_mcp_runtime,
+        native_google as _fake_native_google,
+        notion_module as _fake_notion_module,
+        onedrive_graph as _fake_onedrive_graph,
+        slack_module as _fake_slack_module,
+    )
+
+    _fake_embeddings.install(_active_patches)
+    _fake_docling_service.install(_active_patches)
+    _fake_confluence_oauth.install(_active_patches)
+    _fake_confluence_indexer.install(_active_patches)
+    _fake_native_google.install(_active_patches)
+    _fake_onedrive_graph.install(_active_patches)
+    _fake_dropbox_api.install(_active_patches)
+    _fake_notion_module.install(_active_patches)
+    _fake_linear_module.install(_active_patches)
+    _fake_jira_module.install(_active_patches)
+    _fake_clickup_module.install(_active_patches)
+    _fake_mcp_runtime.install(_active_patches)
+    _fake_mcp_oauth_runtime.install(_active_patches)
+    _fake_slack_module.install(_active_patches)


-# ---------------------------------------------------------------------------
-# 5) Start the worker.
-# ---------------------------------------------------------------------------
+def _bootstrap():
+    """Run the full E2E bootstrap and return the production Celery app.
+
+    Ordering is load-bearing:
+      1) Hijack composio + notion_client in sys.modules.
+      2) Load .env + set env defaults (app.config reads env on import).
+      3) Configure logging.
+      4) Materialise the synthetic global_llm_config.yaml so the worker's
+         view of GLOBAL_LLM_CONFIGS matches the API container.
+      5) Import production celery_app (which transitively imports the
+         now-faked external SDKs and reads the env defaults + YAML).
+      6) Patch LLM / embedding bindings at every consumer site.
+      7) Install runtime fakes for connectors and chat backends.
+    """
+    _hijack_external_sdks()
+    _load_dotenv_and_set_env_defaults()
+
+    logging.basicConfig(
+        level=logging.INFO,
+        format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+        datefmt="%Y-%m-%d %H:%M:%S",
+    )
+    logger.warning(
+        "*** SURFSENSE E2E CELERY WORKER — fake Composio + LLM + embeddings ***"
+    )
+
+    _install_synthetic_global_llm_config()
+    celery_app = _import_celery_app()
+    _patch_llm_bindings()
+    _install_runtime_fakes()
+    return celery_app
+
+
+celery_app = _bootstrap()


 def _main() -> None:
-    # Default queues mirror production (default queue + connectors queue
-    # so Drive indexing tasks are picked up).
    queue_name = os.getenv("CELERY_TASK_DEFAULT_QUEUE", "surfsense")
    queues = f"{queue_name},{queue_name}.connectors"
+
+    # macOS forks-after-MPS-init crash prefork workers; threads avoid it.
+    default_pool = "threads" if sys.platform == "darwin" else "prefork"
+    pool = os.getenv("CELERY_POOL", default_pool)
+    concurrency = os.getenv("CELERY_CONCURRENCY", "2")
+
    celery_app.worker_main(
        argv=[
            "worker",
            "--loglevel=info",
            f"--queues={queues}",
-            "--concurrency=2",
+            f"--pool={pool}",
+            f"--concurrency={concurrency}",
            "--without-gossip",
            "--without-mingle",
        ]
--- a/surfsense_desktop/package.json
+++ b/surfsense_desktop/package.json
@ -21,7 +21,7 @@
    "email": "rohan@surfsense.com"
  },
  "license": "MIT",
-  "packageManager": "pnpm@10.24.0",
+  "packageManager": "pnpm@10.26.0",
  "devDependencies": {
    "@electron/rebuild": "^4.0.3",
    "@types/node": "^25.5.0",
--- a/surfsense_web/.gitignore
+++ b/surfsense_web/.gitignore
@ -12,6 +12,10 @@

 # testing
 /coverage
+/playwright/.auth/
+/playwright-report/
+/test-results/
+/blob-report/

 # next.js
 /.next/
@ -48,5 +52,4 @@ next-env.d.ts
 # source
 /.source/

-.pnpm-store/
-
+.pnpm-store/
--- a/surfsense_web/Dockerfile
+++ b/surfsense_web/Dockerfile
@ -12,7 +12,7 @@ WORKDIR /app
 RUN corepack enable pnpm

 # Copy package files
-COPY package.json pnpm-lock.yaml* .npmrc* ./
+COPY package.json pnpm-lock.yaml* pnpm-workspace.yaml* .npmrc* ./

 # First copy the config file and content to avoid fumadocs-mdx postinstall error
 COPY source.config.ts ./
--- a/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx
@ -208,7 +208,10 @@ const MentionedDocumentInfoSchema = z.object({
 	id: z.number(),
 	title: z.string(),
 	document_type: z.string(),
-	kind: z.union([z.literal("doc"), z.literal("folder")]).optional().default("doc"),
+	kind: z
+		.union([z.literal("doc"), z.literal("folder")])
+		.optional()
+		.default("doc"),
 });

 const MentionedDocumentsPartSchema = z.object({
@ -1029,9 +1032,7 @@ export default function NewChatPage() {
 							mentioned_surfsense_doc_ids: hasSurfsenseDocIds
 								? mentionedDocumentIds.surfsense_doc_ids
 								: undefined,
-							mentioned_folder_ids: hasFolderIds
-								? mentionedDocumentIds.folder_ids
-								: undefined,
+							mentioned_folder_ids: hasFolderIds ? mentionedDocumentIds.folder_ids : undefined,
 							// Full mention metadata (docs + folders, with
 							// ``kind`` discriminator) so the BE can embed a
 							// ``mentioned-documents`` ContentPart on the
@ -1900,12 +1901,10 @@ export default function NewChatPage() {
 					filesystem_mode: selection.filesystem_mode,
 					client_platform: selection.client_platform,
 					local_filesystem_mounts: selection.local_filesystem_mounts,
-					mentioned_document_ids:
-						regenerateDocIds.length > 0 ? regenerateDocIds : undefined,
+					mentioned_document_ids: regenerateDocIds.length > 0 ? regenerateDocIds : undefined,
 					mentioned_surfsense_doc_ids:
 						regenerateSurfsenseDocIds.length > 0 ? regenerateSurfsenseDocIds : undefined,
-					mentioned_folder_ids:
-						regenerateFolderIds.length > 0 ? regenerateFolderIds : undefined,
+					mentioned_folder_ids: regenerateFolderIds.length > 0 ? regenerateFolderIds : undefined,
 					// Full mention metadata for the regenerate-specific
 					// source list. Only meaningful for edit (the BE only
 					// re-persists a user row when ``user_query`` is set);
--- a/surfsense_web/atoms/chat/mentioned-documents.atom.ts
+++ b/surfsense_web/atoms/chat/mentioned-documents.atom.ts
@ -97,9 +97,7 @@ export const mentionedDocumentIdsAtom = atom((get) => {
 		surfsense_doc_ids: docs
 			.filter((doc) => doc.document_type === "SURFSENSE_DOCS")
 			.map((doc) => doc.id),
-		document_ids: docs
-			.filter((doc) => doc.document_type !== "SURFSENSE_DOCS")
-			.map((doc) => doc.id),
+		document_ids: docs.filter((doc) => doc.document_type !== "SURFSENSE_DOCS").map((doc) => doc.id),
 		folder_ids: folders.map((f) => f.id),
 	};
 });
--- a/surfsense_web/biome.json
+++ b/surfsense_web/biome.json
@ -7,7 +7,19 @@
 	},
 	"files": {
 		"ignoreUnknown": true,
-		"includes": ["**", "!!node_modules", "!!.git", "!!.next", "!!dist", "!!build", "!!coverage"],
+		"includes": [
+			"**",
+			"!!node_modules",
+			"!!.git",
+			"!!.next",
+			"!!dist",
+			"!!build",
+			"!!coverage",
+			"!!test-results",
+			"!!playwright-report",
+			"!!blob-report",
+			"!!playwright/.auth"
+		],
 		"maxSize": 1048576
 	},
 	"formatter": {
--- a/surfsense_web/components/assistant-ui/inline-mention-editor.tsx
+++ b/surfsense_web/components/assistant-ui/inline-mention-editor.tsx
@ -47,10 +47,7 @@ export interface InlineMentionEditorRef {
 	setText: (text: string) => void;
 	getText: () => string;
 	getMentionedDocuments: () => MentionedDocument[];
-	insertMentionChip: (
-		mention: MentionChipInput,
-		options?: { removeTriggerText?: boolean }
-	) => void;
+	insertMentionChip: (mention: MentionChipInput, options?: { removeTriggerText?: boolean }) => void;
 	/**
 	 * @deprecated Use ``insertMentionChip``. Kept for one transition
 	 * cycle so we don't break ad-hoc callers; prefer the new name.
@ -364,8 +361,7 @@ export const InlineMentionEditor = forwardRef<InlineMentionEditorRef, InlineMent
 				const selection = editor.selection;
 				const kind: MentionKind = mention.kind ?? "doc";
 				const document_type =
-					mention.document_type ??
-					(kind === "folder" ? FOLDER_MENTION_DOCUMENT_TYPE : undefined);
+					mention.document_type ?? (kind === "folder" ? FOLDER_MENTION_DOCUMENT_TYPE : undefined);
 				const mentionNode: MentionElementNode = {
 					type: MENTION_TYPE,
 					id: mention.id,
--- a/surfsense_web/components/assistant-ui/markdown-text.tsx
+++ b/surfsense_web/components/assistant-ui/markdown-text.tsx
@ -33,8 +33,8 @@ import {
 } from "@/components/ui/table";
 import { useElectronAPI } from "@/hooks/use-platform";
 import { documentsApiService } from "@/lib/apis/documents-api.service";
-import { type CitationUrlMap, preprocessCitationMarkdown } from "@/lib/citations/citation-parser";
 import { getVirtualPathDisplay } from "@/lib/chat/virtual-path-display";
+import { type CitationUrlMap, preprocessCitationMarkdown } from "@/lib/citations/citation-parser";
 import { cn } from "@/lib/utils";

 function MarkdownCodeBlockSkeleton() {
@ -222,11 +222,7 @@ function FilePathLink({ path, className }: { path: string; className?: string })
 		: undefined;

 	const { displayName, isFolder } = getVirtualPathDisplay(path);
-	const icon = isFolder ? (
-		<FolderIcon className="size-3.5" />
-	) : (
-		<FileIcon className="size-3.5" />
-	);
+	const icon = isFolder ? <FolderIcon className="size-3.5" /> : <FileIcon className="size-3.5" />;

 	const handleClick = useCallback(
 		(event: React.MouseEvent<HTMLButtonElement>) => {
--- a/surfsense_web/components/assistant-ui/user-message.tsx
+++ b/surfsense_web/components/assistant-ui/user-message.tsx
@ -111,11 +111,7 @@ const UserTextPart: FC = () => {
 						icon={icon}
 						label={segment.doc.title}
 						tooltip={isFolder ? `Folder: ${segment.doc.title}` : segment.doc.title}
-						onClick={
-							isFolder
-								? undefined
-								: () => handleOpenDoc(segment.doc.id, segment.doc.title)
-						}
+						onClick={isFolder ? undefined : () => handleOpenDoc(segment.doc.id, segment.doc.title)}
 						className="mx-0.5"
 					/>
 				);
--- a/surfsense_web/components/editor/plate-editor.tsx
+++ b/surfsense_web/components/editor/plate-editor.tsx
@ -170,16 +170,10 @@ export function PlateEditor({
 			: markdown
 				? (editor) => {
 						if (!enableCitations) {
-							return safeDeserializeMarkdown(
-								editor,
-								escapeMdxExpressions(markdown)
-							) as Value;
+							return safeDeserializeMarkdown(editor, escapeMdxExpressions(markdown)) as Value;
 						}
 						const { content: rewritten, urlMap } = preprocessCitationMarkdown(markdown);
-						const value = safeDeserializeMarkdown(
-							editor,
-							escapeMdxExpressions(rewritten)
-						);
+						const value = safeDeserializeMarkdown(editor, escapeMdxExpressions(rewritten));
 						return injectCitationNodes(value, urlMap) as Value;
 					}
 				: undefined,
@ -203,10 +197,7 @@ export function PlateEditor({
 			let newValue: Descendant[];
 			if (enableCitations) {
 				const { content: rewritten, urlMap } = preprocessCitationMarkdown(markdown);
-				const deserialized = safeDeserializeMarkdown(
-					editor,
-					escapeMdxExpressions(rewritten)
-				);
+				const deserialized = safeDeserializeMarkdown(editor, escapeMdxExpressions(rewritten));
 				newValue = injectCitationNodes(deserialized, urlMap);
 			} else {
 				newValue = safeDeserializeMarkdown(editor, escapeMdxExpressions(markdown));
--- a/surfsense_web/components/editor/utils/safe-deserialize.ts
+++ b/surfsense_web/components/editor/utils/safe-deserialize.ts
@ -49,10 +49,7 @@ export function safeDeserializeMarkdown(
 		return api.deserialize(markdown, { remarkPlugins: STRICT_PLUGINS }) as Descendant[];
 	} catch (mdxError) {
 		if (process.env.NODE_ENV !== "production") {
-			console.warn(
-				"[plate-editor] MDX parse failed, retrying without remark-mdx:",
-				mdxError
-			);
+			console.warn("[plate-editor] MDX parse failed, retrying without remark-mdx:", mdxError);
 		}
 		try {
 			return api.deserialize(markdown, { remarkPlugins: LENIENT_PLUGINS }) as Descendant[];
--- a/surfsense_web/components/layout/ui/sidebar/DocumentsSidebar.tsx
+++ b/surfsense_web/components/layout/ui/sidebar/DocumentsSidebar.tsx
@ -24,10 +24,7 @@ import type React from "react";
 import { useCallback, useEffect, useMemo, useRef, useState } from "react";
 import { toast } from "sonner";
 import { agentFlagsAtom } from "@/atoms/agent/agent-flags-query.atom";
-import {
-	makeFolderMention,
-	mentionedDocumentsAtom,
-} from "@/atoms/chat/mentioned-documents.atom";
+import { makeFolderMention, mentionedDocumentsAtom } from "@/atoms/chat/mentioned-documents.atom";
 import { connectorDialogOpenAtom } from "@/atoms/connector-dialog/connector-dialog.atoms";
 import { connectorsAtom } from "@/atoms/connectors/connector-query.atoms";
 import { deleteDocumentMutationAtom } from "@/atoms/documents/document-mutation.atoms";
--- a/surfsense_web/components/new-chat/document-mention-picker.tsx
+++ b/surfsense_web/components/new-chat/document-mention-picker.tsx
@ -301,8 +301,7 @@ export const DocumentMentionPicker = forwardRef<
 	// folder entries lift the existing kind-aware key so the same
 	// matchers used by the chip atom apply unchanged.
 	const selectedKeys = useMemo(
-		() =>
-			new Set(initialSelectedDocuments.map((d) => getMentionDocKey(d))),
+		() => new Set(initialSelectedDocuments.map((d) => getMentionDocKey(d))),
 		[initialSelectedDocuments]
 	);

@ -583,9 +582,7 @@ export const DocumentMentionPicker = forwardRef<
 								{(surfsenseDocsList.length > 0 || userDocsList.length > 0) && (
 									<div className="mx-2 my-4 border-t border-border dark:border-white/5" />
 								)}
-								<div className="px-3 py-2 text-xs font-bold text-muted-foreground/55">
-									Folders
-								</div>
+								<div className="px-3 py-2 text-xs font-bold text-muted-foreground/55">Folders</div>
 								{folderMentions.map((folder) => {
 									const folderKey = getMentionDocKey(folder);
 									const isAlreadySelected = selectedKeys.has(folderKey);
--- a/surfsense_web/package.json
+++ b/surfsense_web/package.json
@ -2,6 +2,7 @@
 	"name": "surfsense_web",
 	"version": "0.0.23",
 	"private": true,
+	"packageManager": "pnpm@10.26.0",
 	"description": "SurfSense Frontend",
 	"scripts": {
 		"dev": "next dev --turbopack",
@ -20,6 +21,7 @@
 		"db:studio": "drizzle-kit studio",
 		"format:fix": "npx @biomejs/biome check --fix",
 		"test:e2e": "playwright test",
+		"test:e2e:prod": "cross-env CI=1 playwright test",
 		"test:e2e:ui": "playwright test --ui",
 		"test:e2e:headed": "playwright test --headed",
 		"test:e2e:debug": "playwright test --debug",
--- a/surfsense_web/playwright.config.ts
+++ b/surfsense_web/playwright.config.ts
@ -4,6 +4,11 @@ const PORT = process.env.PORT || "3000";
 const BACKEND_PORT = process.env.BACKEND_PORT || "8000";
 const baseURL = process.env.PLAYWRIGHT_BASE_URL || `http://localhost:${PORT}`;

+process.env.PLAYWRIGHT_TEST_EMAIL ??= "e2e-test@surfsense.net";
+process.env.PLAYWRIGHT_TEST_PASSWORD ??= "E2eTestPassword123!";
+process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL ??= `http://localhost:${BACKEND_PORT}`;
+process.env.NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE ??= "LOCAL";
+
 /**
 * Playwright configuration for SurfSense web E2E tests.
 *
@ -22,8 +27,8 @@ export default defineConfig({
 	expect: { timeout: 15_000 },
 	fullyParallel: true,
 	forbidOnly: !!process.env.CI,
-	retries: process.env.CI ? 2 : 0,
-	workers: process.env.CI ? 1 : undefined,
+	retries: process.env.CI ? 1 : 0,
+	workers: 1,
 	reporter: process.env.CI
 		? [["html", { open: "never" }], ["github"], ["list"]]
 		: [["html", { open: "on-failure" }], ["list"]],
@ -31,7 +36,7 @@ export default defineConfig({
 		baseURL,
 		trace: "on-first-retry",
 		screenshot: "only-on-failure",
-		video: "retain-on-failure",
+		video: process.env.CI ? "off" : "retain-on-failure",
 		extraHTTPHeaders: {
 			"x-playwright-test": "true",
 		},
@ -53,14 +58,16 @@ export default defineConfig({
 	webServer: process.env.PLAYWRIGHT_NO_WEB_SERVER
 		? undefined
 		: {
-				// Pin to webpack dev (Turbopack has caused stale-lock panics in E2E).
-				command: "pnpm exec next dev",
+				// Local stays on webpack dev (Turbopack caused stale-lock panics in E2E).
+				command: process.env.CI ? "pnpm build && pnpm start" : "pnpm exec next dev",
 				url: `http://localhost:${PORT}`,
 				reuseExistingServer: !process.env.CI,
-				timeout: 180_000,
+				timeout: process.env.CI ? 300_000 : 180_000,
+				stdout: "pipe",
+				stderr: "pipe",
 				env: {
-					NEXT_PUBLIC_FASTAPI_BACKEND_URL: `http://localhost:${BACKEND_PORT}`,
-					NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE: "LOCAL",
+					NEXT_PUBLIC_FASTAPI_BACKEND_URL: process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL,
+					NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE: process.env.NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE,
 				},
 			},
 });
--- a/surfsense_web/pnpm-workspace.yaml
+++ b/surfsense_web/pnpm-workspace.yaml
@ -0,0 +1,11 @@
+allowBuilds:
+  "@parcel/watcher": true
+  "@rocicorp/zero-sqlite3": true
+  "@swc/core": true
+  core-js: true
+  esbuild: true
+  protobufjs: true
+  sharp: true
+  unrs-resolver: true
+
+minimumReleaseAge: 10080
--- a/surfsense_web/tests/README.md
+++ b/surfsense_web/tests/README.md
@ -5,29 +5,6 @@ Celery + Postgres + Redis). Designed to scale from one connector
 (Composio Drive in Phase 1) to every connector + manual file upload
 without rewriting the harness.

-## Layout
-
-```
-tests/
-├── auth.setup.ts                    # one-time login, persists localStorage
-├── smoke/                           # tracer-bullet tests (dashboard renders)
-├── connectors/
-│   └── composio/
-│       └── drive/                   # Composio Google Drive — Phase 1
-│           └── journey.spec.ts      # connect -> select -> index -> canary assertion
-├── fixtures/                        # test.extend() fixtures
-│   ├── index.ts                     # named `test` exports per spec category
-│   ├── search-space.fixture.ts      # apiToken + per-test search space
-│   └── connectors/
-│       └── composio-drive.fixture.ts
-├── helpers/                         # reusable building blocks
-│   ├── api/                         # backend HTTP helpers
-│   ├── ui/                          # page-object selectors
-│   ├── waits/                       # deterministic polling
-│   └── canary.ts                    # canary tokens + fixed Drive file ids
-└── README.md                        # this file
-```
-
 ## How the deterministic harness works

 There are **three layers of defense** against accidental real-world
@ -47,26 +24,90 @@ calls. None of them touch production code.

 ## Running locally

+The recommended flow runs only Postgres and Redis in Docker, and the backend
+ Celery worker on the host. The E2E entrypoints `setdefault` every backend
+variable they need, so no `.env` file is required on a fresh checkout.
+
+### One-time setup
+
+From `surfsense_web/`:
+
 ```bash
-# 1. Bring up Postgres + Redis (Docker compose, supabase, whatever you use)
-docker compose up -d postgres redis
-
-# 2. Backend with E2E entrypoint (note: NOT `uv run main.py`)
-cd surfsense_backend
-uv run alembic upgrade head
-uv run python tests/e2e/run_backend.py &
-
-# 3. Celery worker with the same entrypoint pattern
-uv run python tests/e2e/run_celery.py &
-
-# 4. Run Playwright tests (auto-starts `pnpm dev` via webServer config)
-cd ../surfsense_web
-pnpm test:e2e
+pnpm install
+pnpm exec playwright install --with-deps chromium
 ```

-For CI behavior in one go: `pnpm test:e2e:headless`.
+### Each run

-To debug the Drive journey: `pnpm test:e2e -- connectors/composio/drive/journey.spec.ts --headed`.
+**1. Bring up Postgres + Redis** from the repo root:
+
+```bash
+docker compose -f docker/docker-compose.deps-only.yml up -d db redis
+```
+
+**2. Start the backend** in `surfsense_backend/`, terminal A:
+
+```bash
+uv sync
+uv run alembic upgrade head
+uv run python tests/e2e/run_backend.py
+```
+
+**3. Start the Celery worker** in `surfsense_backend/`, terminal B:
+
+```bash
+uv run python tests/e2e/run_celery.py
+```
+
+**4. Register the Playwright user**:
+
+```bash
+curl -X POST http://localhost:8000/auth/register \
+  -H "Content-Type: application/json" \
+  -d '{"email":"e2e-test@surfsense.net","password":"E2eTestPassword123!"}'
+```
+
+**5. Run Playwright** from `surfsense_web/`, terminal C:
+
+```bash
+pnpm test:e2e             # dev server (fast iteration)
+pnpm test:e2e:headed      # show the browser
+pnpm test:e2e:ui          # Playwright UI mode
+pnpm test:e2e:debug       # Playwright Inspector
+pnpm test:e2e:prod        # build + start (matches CI exactly)
+pnpm test:e2e:report      # open the last HTML report
+```
+
+`playwright.config.ts` and the backend run scripts share defaults, so the
+above works without exporting any env vars. Override
+`PLAYWRIGHT_TEST_EMAIL`, `PLAYWRIGHT_TEST_PASSWORD`, or
+`NEXT_PUBLIC_FASTAPI_BACKEND_URL` only when pointing tests at a different
+stack.
+
+To debug a single journey:
+
+```bash
+pnpm test:e2e:headed connectors/composio/drive/journey.spec.ts
+```
+
+### Hermetic alternative (matches CI)
+
+To reproduce the CI environment exactly: backend and Celery in containers
+with L3 egress denied, replace steps 1–3 with:
+
+```bash
+docker compose -f docker/docker-compose.e2e.yml up -d --build --wait
+```
+
+Then run steps 4 (curl register) and 5 (`pnpm test:e2e:prod`) as above. Tear
+down with:
+
+```bash
+docker compose -f docker/docker-compose.e2e.yml down -v --remove-orphans
+```
+
+This builds the ~9 GB e2e backend image, so the deps-only flow is faster for
+day-to-day work.

 ## Adding a new connector

--- a/surfsense_web/tests/auth.setup.ts
+++ b/surfsense_web/tests/auth.setup.ts
@ -1,47 +1,21 @@
 import path from "node:path";
 import { expect, test as setup } from "@playwright/test";
+import { acquireTestToken } from "./helpers/api/auth";

 /**
- * One-time authentication setup. Logs in via the FastAPI backend directly
- * (skipping the UI) and persists the resulting localStorage token so every
- * test in the chromium project starts already authenticated.
- *
- * Mirrors the real auth flow in `lib/apis/auth-api.service.ts`:
- *   POST /auth/jwt/login  ->  { access_token }
- *   localStorage.setItem("surfsense_bearer_token", access_token)
- *
- * Requires a seeded test user in the dev/test DB. Configure via env:
- *   PLAYWRIGHT_TEST_EMAIL, PLAYWRIGHT_TEST_PASSWORD
- *   NEXT_PUBLIC_FASTAPI_BACKEND_URL  (defaults to http://localhost:8000)
+ * One-time authentication setup. Acquires a bearer token for the seeded
+ * e2e user (rate-limit-free /__e2e__/auth/token first, /auth/jwt/login
+ * fallback) and persists it via localStorage so every test in the
+ * chromium project starts already authenticated.
 */

 const authFile = path.join(__dirname, "..", "playwright", ".auth", "user.json");

-const TEST_USER_EMAIL = process.env.PLAYWRIGHT_TEST_EMAIL || "test@surfsense.net";
-const TEST_USER_PASSWORD = process.env.PLAYWRIGHT_TEST_PASSWORD || "TestPassword123!";
-const BACKEND_URL = process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL || "http://localhost:8000";
 const STORAGE_KEY = "surfsense_bearer_token";

 setup("authenticate", async ({ page, request }) => {
-	const response = await request.post(`${BACKEND_URL}/auth/jwt/login`, {
-		form: {
-			username: TEST_USER_EMAIL,
-			password: TEST_USER_PASSWORD,
-			grant_type: "password",
-		},
-		headers: { "Content-Type": "application/x-www-form-urlencoded" },
-	});
-
-	expect(
-		response.ok(),
-		`Login to ${BACKEND_URL}/auth/jwt/login failed (${response.status()}). ` +
-			`Check that the backend is running and that PLAYWRIGHT_TEST_EMAIL ` +
-			`(${TEST_USER_EMAIL}) is seeded with PLAYWRIGHT_TEST_PASSWORD. ` +
-			`Body: ${await response.text()}`
-	).toBeTruthy();
-
-	const { access_token } = (await response.json()) as { access_token: string };
-	expect(access_token, "Backend response missing access_token").toBeTruthy();
+	const access_token = await acquireTestToken(request);
+	expect(access_token, "Failed to acquire e2e bearer token").toBeTruthy();

 	await page.addInitScript(
 		({ key, token }) => {
--- a/surfsense_web/tests/documents/file-upload/journey.spec.ts
+++ b/surfsense_web/tests/documents/file-upload/journey.spec.ts
@ -107,14 +107,14 @@ test.describe("Manual file upload journey", () => {
 		});
 	});

-	test("user uploads a PDF (DOCUMENT branch via real Docling)", async ({
+	test("user uploads a PDF (DOCUMENT branch)", async ({
 		page,
 		request,
 		apiToken,
 		searchSpace,
 		chatThread,
 	}) => {
-		test.setTimeout(240_000); // Docling cold-start can take 30-60s on first invocation.
+		test.setTimeout(180_000);

 		await uploadAndAssert({
 			page,
--- a/surfsense_web/tests/fixtures/search-space.fixture.ts
+++ b/surfsense_web/tests/fixtures/search-space.fixture.ts
@ -1,5 +1,7 @@
+import fs from "node:fs";
+import path from "node:path";
 import { test as base } from "@playwright/test";
-import { loginAsTestUser } from "../helpers/api/auth";
+import { acquireTestToken } from "../helpers/api/auth";
 import {
 	createSearchSpace,
 	deleteSearchSpace,
@ -20,12 +22,45 @@ export type SearchSpaceFixtures = {
 	searchSpace: SearchSpaceRow;
 };

+const STORAGE_KEY = "surfsense_bearer_token";
+
+// Reuse the token written by tests/auth.setup.ts; on cache miss we
+// mint a fresh one via /__e2e__/auth/token (rate-limit-free).
+const AUTH_STATE_PATH = path.join(__dirname, "..", "..", "playwright", ".auth", "user.json");
+
+function loadCachedBearerToken(): string | null {
+	try {
+		const raw = fs.readFileSync(AUTH_STATE_PATH, "utf8");
+		const parsed = JSON.parse(raw) as {
+			origins?: Array<{
+				origin?: string;
+				localStorage?: Array<{ name?: string; value?: string }>;
+			}>;
+		};
+		for (const origin of parsed.origins ?? []) {
+			for (const entry of origin.localStorage ?? []) {
+				if (entry.name === STORAGE_KEY && entry.value) {
+					return entry.value;
+				}
+			}
+		}
+	} catch {
+		// Fall back to a fresh login.
+	}
+	return null;
+}
+
 export const searchSpaceFixtures = base.extend<SearchSpaceFixtures, { apiTokenWorker: string }>({
 	apiTokenWorker: [
 		async ({ playwright }, use) => {
+			const cached = loadCachedBearerToken();
+			if (cached) {
+				await use(cached);
+				return;
+			}
 			const ctx = await playwright.request.newContext();
 			try {
-				const token = await loginAsTestUser(ctx);
+				const token = await acquireTestToken(ctx);
 				await use(token);
 			} finally {
 				await ctx.dispose();
--- a/surfsense_web/tests/helpers/api/auth.ts
+++ b/surfsense_web/tests/helpers/api/auth.ts
@ -11,8 +11,39 @@ import type { APIRequestContext } from "@playwright/test";

 export const BACKEND_URL = process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL || "http://localhost:8000";

-const TEST_USER_EMAIL = process.env.PLAYWRIGHT_TEST_EMAIL || "test@surfsense.net";
-const TEST_USER_PASSWORD = process.env.PLAYWRIGHT_TEST_PASSWORD || "TestPassword123!";
+const TEST_USER_EMAIL = process.env.PLAYWRIGHT_TEST_EMAIL || "e2e-test@surfsense.net";
+const TEST_USER_PASSWORD = process.env.PLAYWRIGHT_TEST_PASSWORD || "E2eTestPassword123!";
+const E2E_MINT_SECRET = process.env.E2E_MINT_SECRET || "local-e2e-mint-secret-not-for-production";
+
+/**
+ * Mints a JWT for the seeded e2e user via the test-only endpoint mounted
+ * by surfsense_backend/tests/e2e/run_backend.py. Bypasses the production
+ * /auth/jwt/login rate limit (5/min/IP), so it's safe to call from any
+ * worker / retry. Returns 404 from the backend when the endpoint isn't
+ * mounted (i.e. someone is pointing the suite at a non-e2e backend).
+ */
+export async function mintTestToken(
+	request: APIRequestContext,
+	email: string = TEST_USER_EMAIL
+): Promise<string> {
+	const response = await request.post(`${BACKEND_URL}/__e2e__/auth/token`, {
+		data: { email },
+		headers: {
+			"Content-Type": "application/json",
+			"X-E2E-Mint-Secret": E2E_MINT_SECRET,
+		},
+	});
+	if (!response.ok()) {
+		throw new Error(
+			`Mint token at ${BACKEND_URL}/__e2e__/auth/token failed (${response.status()}): ${await response.text()}`
+		);
+	}
+	const { access_token } = (await response.json()) as { access_token: string };
+	if (!access_token) {
+		throw new Error("Mint response missing access_token");
+	}
+	return access_token;
+}

 export async function loginAsTestUser(request: APIRequestContext): Promise<string> {
 	const response = await request.post(`${BACKEND_URL}/auth/jwt/login`, {
@ -37,6 +68,23 @@ export async function loginAsTestUser(request: APIRequestContext): Promise<strin
 	return access_token;
 }

+/**
+ * Get a bearer token by trying the rate-limit-free mint endpoint first
+ * and falling back to /auth/jwt/login if the e2e endpoint isn't mounted
+ * (e.g. running against a non-e2e backend in local dev).
+ */
+export async function acquireTestToken(request: APIRequestContext): Promise<string> {
+	try {
+		return await mintTestToken(request);
+	} catch (err) {
+		const msg = err instanceof Error ? err.message : String(err);
+		if (msg.includes("(404)") || msg.includes("(405)")) {
+			return loginAsTestUser(request);
+		}
+		throw err;
+	}
+}
+
 /**
 * Standard auth headers for backend API calls. Optionally injects an
 * X-E2E-Scenario header that the test-only ScenarioMiddleware in