Merge pull request #1377 from AnishSarkar22/feat/e2e-testing-ci

feat: add E2E CI and harden Docker build migrations
2026-05-17 18:35:19 +02:00 · 2026-05-15 04:47:26 -07:00 · 2026-05-15 04:47:26 -07:00 · 4db3cf7fd5
commit 4db3cf7fd5
parent e8aad48ddf 883c72396c
45 changed files with 1733 additions and 495 deletions
--- a/.github/workflows/docker-build.yml
+++ b/.github/workflows/docker-build.yml
@ -31,7 +31,7 @@ jobs:
      new_tag: ${{ steps.tag_version.outputs.next_version }}
    steps:
      - name: Checkout code
-        uses: actions/checkout@v4
+        uses: actions/checkout@v6
        with:
          fetch-depth: 0
          ref: ${{ github.event.inputs.branch }}
@ -108,16 +108,18 @@ jobs:
            name: surfsense-backend
            context: ./surfsense_backend
            file: ./surfsense_backend/Dockerfile
            target: production
          - image: web
            name: surfsense-web
            context: ./surfsense_web
            file: ./surfsense_web/Dockerfile
            target: runner
    env:
      REGISTRY_IMAGE: ghcr.io/${{ github.repository_owner }}/${{ matrix.name }}
    steps:
      - name: Checkout code
-        uses: actions/checkout@v4
+        uses: actions/checkout@v6
      - name: Set lowercase image name
        id: image
@ -125,19 +127,19 @@ jobs:
      - name: Docker meta
        id: meta
-        uses: docker/metadata-action@v5
+        uses: docker/metadata-action@v6
        with:
          images: ${{ steps.image.outputs.name }}
      - name: Login to GitHub Container Registry
-        uses: docker/login-action@v3
+        uses: docker/login-action@v4
        with:
          registry: ghcr.io
          username: ${{ github.repository_owner }}
          password: ${{ secrets.GITHUB_TOKEN }}
      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
+        uses: docker/setup-buildx-action@v4
      - name: Free up disk space
        run: |
@ -149,10 +151,11 @@ jobs:
      - name: Build and push by digest ${{ matrix.name }} (${{ matrix.suffix }})
        id: build
-        uses: docker/build-push-action@v6
+        uses: docker/build-push-action@v7
        with:
          context: ${{ matrix.context }}
          file: ${{ matrix.file }}
          target: ${{ matrix.target }}
          labels: ${{ steps.meta.outputs.labels }}
          tags: ${{ steps.image.outputs.name }}
          outputs: type=image,push-by-digest=true,name-canonical=true,push=true
@ -174,7 +177,7 @@ jobs:
          touch "/tmp/digests/${digest#sha256:}"
      - name: Upload digest
-        uses: actions/upload-artifact@v4
+        uses: actions/upload-artifact@v7
        with:
          name: digests-${{ matrix.image }}-${{ matrix.suffix }}
          path: /tmp/digests/*
@ -205,22 +208,22 @@ jobs:
        run: echo "name=${REGISTRY_IMAGE,,}" >> $GITHUB_OUTPUT
      - name: Download amd64 digest
-        uses: actions/download-artifact@v4
+        uses: actions/download-artifact@v8
        with:
          name: digests-${{ matrix.image }}-amd64
          path: /tmp/digests
      - name: Download arm64 digest
-        uses: actions/download-artifact@v4
+        uses: actions/download-artifact@v8
        with:
          name: digests-${{ matrix.image }}-arm64
          path: /tmp/digests
      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
+        uses: docker/setup-buildx-action@v4
      - name: Login to GitHub Container Registry
-        uses: docker/login-action@v3
+        uses: docker/login-action@v4
        with:
          registry: ghcr.io
          username: ${{ github.repository_owner }}
@ -239,7 +242,7 @@ jobs:
      - name: Docker meta
        id: meta
-        uses: docker/metadata-action@v5
+        uses: docker/metadata-action@v6
        with:
          images: ${{ steps.image.outputs.name }}
          tags: |
--- a/.github/workflows/e2e-tests.yml
+++ b/.github/workflows/e2e-tests.yml
@ -0,0 +1,174 @@
 name: E2E Tests
 on:
  pull_request:
    branches: [main, dev]
    types: [opened, synchronize, reopened, ready_for_review]
    paths:
      - 'surfsense_web/**'
      - 'surfsense_backend/**'
      - 'docker/docker-compose.e2e.yml'
      - '.github/workflows/e2e-tests.yml'
  workflow_dispatch:
 concurrency:
  group: ${{ github.workflow }}-${{ github.ref }}
  cancel-in-progress: true
 jobs:
  e2e:
    name: Journey
    runs-on: ubuntu-latest
    if: github.event.pull_request.draft == false
    timeout-minutes: 30
    env:
      # Test user that the backend creates via /auth/register before Playwright runs.
      PLAYWRIGHT_TEST_EMAIL: e2e-test@surfsense.net
      PLAYWRIGHT_TEST_PASSWORD: E2eTestPassword123!
      # Frontend env: Playwright's webServer (surfsense_web/playwright.config.ts)
      # spawns `pnpm build && pnpm start` in CI; these get baked into the build.
      NEXT_PUBLIC_FASTAPI_BACKEND_URL: http://localhost:8000
      NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE: LOCAL
      # Shared secret for the test-only POST /__e2e__/auth/token endpoint.
      # Must match docker-compose.e2e.yml's backend env (x-backend-env).
      E2E_MINT_SECRET: e2e-mint-secret-not-for-production
    steps:
      - uses: actions/checkout@v6
      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v4
      # ─── Backend stack ─────────────────────────────────────────────────
      # Builds the e2e image (multi-stage, deps cached via GHA), brings up
      # db + redis + backend + celery_worker, blocks until every healthcheck
      # is green. No `uv` invocation on the runner; no PID files; no curl
      # polling loops; readiness is gated by Docker healthchecks.
      - name: Build & start backend stack
        run: |
          docker compose -f docker/docker-compose.e2e.yml \
            up -d --build --wait --wait-timeout 300
      - name: Show backend stack status
        if: always()
        run: docker compose -f docker/docker-compose.e2e.yml ps
      - name: Register E2E test user
        run: |
          # 200/201 = created, 400 = already exists (idempotent across reruns).
          STATUS=$(curl -s -o /tmp/register.json -w "%{http_code}" \
            -X POST http://localhost:8000/auth/register \
            -H "Content-Type: application/json" \
            -d "{\"email\":\"${PLAYWRIGHT_TEST_EMAIL}\",\"password\":\"${PLAYWRIGHT_TEST_PASSWORD}\"}")
          echo "Register status: ${STATUS}"
          cat /tmp/register.json
          if [ "${STATUS}" != "200" ] && [ "${STATUS}" != "201" ] && [ "${STATUS}" != "400" ]; then
            echo "::error::Failed to register test user (status ${STATUS})"
            exit 1
          fi
          # Flush auth rate-limit counters so Playwright starts clean.
          docker compose -f docker/docker-compose.e2e.yml exec -T redis \
            sh -c "redis-cli --scan --pattern 'surfsense:auth_rate_limit:*' \
              | xargs -r redis-cli DEL" || true
      # ─── Frontend (host-side) ──────────────────────────────────────────
      # Playwright's webServer block in playwright.config.ts spawns
      # `pnpm build && pnpm start` in CI mode and waits for :3000.
      - uses: actions/setup-node@v6
        with:
          node-version: '20'
      - uses: pnpm/action-setup@v6
      - name: Get pnpm store directory
        id: pnpm-cache
        shell: bash
        run: echo "STORE_PATH=$(pnpm store path --silent)" >> $GITHUB_OUTPUT
      - name: Cache pnpm store
        uses: actions/cache@v5
        with:
          path: ${{ steps.pnpm-cache.outputs.STORE_PATH }}
          key: pnpm-${{ runner.os }}-${{ hashFiles('surfsense_web/pnpm-lock.yaml') }}
          restore-keys: pnpm-${{ runner.os }}-
      - name: Install web dependencies
        working-directory: surfsense_web
        run: pnpm install --frozen-lockfile
      - name: Cache Playwright browsers
        id: playwright-cache
        uses: actions/cache@v5
        with:
          path: ~/.cache/ms-playwright
          key: playwright-${{ runner.os }}-${{ hashFiles('surfsense_web/pnpm-lock.yaml') }}
      - name: Install Playwright browsers
        if: steps.playwright-cache.outputs.cache-hit != 'true'
        working-directory: surfsense_web
        run: pnpm exec playwright install --with-deps chromium
      - name: Install Playwright system deps (cache hit)
        if: steps.playwright-cache.outputs.cache-hit == 'true'
        working-directory: surfsense_web
        run: pnpm exec playwright install-deps chromium
      - name: Cache Next.js build
        uses: actions/cache@v5
        with:
          path: surfsense_web/.next/cache
          key: nextjs-${{ runner.os }}-${{ hashFiles('surfsense_web/pnpm-lock.yaml') }}-${{ github.sha }}
          restore-keys: |
            nextjs-${{ runner.os }}-${{ hashFiles('surfsense_web/pnpm-lock.yaml') }}-
            nextjs-${{ runner.os }}-
      # ─── Tests ─────────────────────────────────────────────────────────
      - name: Run Playwright tests
        working-directory: surfsense_web
        run: pnpm test:e2e:prod
      # ─── Failure diagnostics ───────────────────────────────────────────
      - name: Dump backend stack logs on failure
        if: ${{ failure() || cancelled() }}
        run: |
          mkdir -p ./compose-logs
          docker compose -f docker/docker-compose.e2e.yml logs --no-color --timestamps \
            > ./compose-logs/all-services.log 2>&1 || true
          for svc in db redis backend celery_worker; do
            docker compose -f docker/docker-compose.e2e.yml logs --no-color --timestamps "$svc" \
              > "./compose-logs/${svc}.log" 2>&1 || true
          done
          docker compose -f docker/docker-compose.e2e.yml ps \
            > ./compose-logs/ps.txt 2>&1 || true
      # ─── Artifacts ─────────────────────────────────────────────────────
      - name: Upload Playwright HTML report
        if: always()
        uses: actions/upload-artifact@v7
        with:
          name: playwright-report
          path: surfsense_web/playwright-report/
          retention-days: 14
      - name: Upload Playwright traces
        if: failure()
        uses: actions/upload-artifact@v7
        with:
          name: playwright-traces
          path: surfsense_web/test-results/
          retention-days: 14
      - name: Upload backend stack logs
        if: ${{ failure() || cancelled() }}
        uses: actions/upload-artifact@v7
        with:
          name: backend-stack-logs
          path: ./compose-logs/
          retention-days: 7
      # ─── Teardown ──────────────────────────────────────────────────────
      - name: Tear down backend stack
        if: always()
        run: docker compose -f docker/docker-compose.e2e.yml down -v --remove-orphans
--- a/docker/.env.example
+++ b/docker/.env.example
@ -4,7 +4,7 @@
 # Database, Redis, and internal service wiring are handled automatically.
 # ==============================================================================
-# SurfSense version (use "latest", a clean version like "0.0.14", or a specific build like "0.0.14.1")
+# SurfSense version (use "latest" or a specific version like "0.0.14")
 SURFSENSE_VERSION=latest
 # ------------------------------------------------------------------------------
--- a/docker/docker-compose.dev.yml
+++ b/docker/docker-compose.dev.yml
@ -10,6 +10,11 @@
 name: surfsense-dev
 x-backend-build: &backend-build
  context: ../surfsense_backend
  args:
    EMBEDDING_MODEL: ${EMBEDDING_MODEL:-sentence-transformers/all-MiniLM-L6-v2}
 services:
  db:
    image: pgvector/pgvector:pg17
@ -69,7 +74,7 @@ services:
      retries: 5
  backend:
-    build: ../surfsense_backend
+    build: *backend-build
    ports:
      - "${BACKEND_PORT:-8000}:8000"
    volumes:
@ -114,7 +119,7 @@ services:
      start_period: 200s
  celery_worker:
-    build: ../surfsense_backend
+    build: *backend-build
    volumes:
      - ../surfsense_backend/app:/app/app
      - shared_temp:/shared_tmp
@ -140,7 +145,7 @@ services:
        condition: service_healthy
  celery_beat:
-    build: ../surfsense_backend
+    build: *backend-build
    env_file:
      - ../surfsense_backend/.env
    environment:
@ -159,7 +164,7 @@ services:
        condition: service_started
  # flower:
-  #   build: ../surfsense_backend
+  #   build: *backend-build
  #   ports:
  #     - "${FLOWER_PORT:-5555}:5555"
  #   env_file:
--- a/docker/docker-compose.e2e.yml
+++ b/docker/docker-compose.e2e.yml
@ -0,0 +1,181 @@
 # =============================================================================
 # SurfSense — E2E Docker Compose stack
 # =============================================================================
 # Hermetic backend stack for Playwright E2E tests:
 #   - db / redis on an internal-only network (no internet egress)
 #   - backend (FastAPI) joins the internal network AND a separate ingress
 #     bridge so the host runner can reach :8000
 #   - celery_worker on the internal network only — zero egress surface
 #
 # The backend image is built from surfsense_backend/Dockerfile target=e2e,
 # which adds tests/ via the `tests-source` additional context (tests/ is
 # excluded from the main context by .dockerignore so production never ships
 # test fakes). See surfsense_backend/Dockerfile for stage layout.
 #
 # Usage from repo root:
 #   docker compose -f docker/docker-compose.e2e.yml up -d --build --wait
 #   curl -X POST http://localhost:8000/auth/register ...
 #   ( run Playwright on host, pointing at localhost:8000 + localhost:3000 )
 #   docker compose -f docker/docker-compose.e2e.yml down -v
 # =============================================================================
 name: surfsense-e2e
 x-backend-env: &backend-env
  DATABASE_URL: postgresql+asyncpg://postgres:postgres@db:5432/surfsense_e2e
  CELERY_BROKER_URL: redis://redis:6379/0
  CELERY_RESULT_BACKEND: redis://redis:6379/0
  REDIS_APP_URL: redis://redis:6379/0
  CELERY_TASK_DEFAULT_QUEUE: surfsense
  SECRET_KEY: ci-test-secret-key-not-for-production
  AUTH_TYPE: LOCAL
  REGISTRATION_ENABLED: "TRUE"
  ETL_SERVICE: DOCLING
  EMBEDDING_MODEL: sentence-transformers/all-MiniLM-L6-v2
  NEXT_FRONTEND_URL: http://host.docker.internal:3000
  # Sentinel keys — fakes never read them; turns leaked real calls into 401s.
  COMPOSIO_API_KEY: e2e-deny-real-call-sentinel
  COMPOSIO_ENABLED: "TRUE"
  OPENAI_API_KEY: e2e-deny-real-call-sentinel
  ANTHROPIC_API_KEY: e2e-deny-real-call-sentinel
  LITELLM_API_KEY: e2e-deny-real-call-sentinel
  MICROSOFT_CLIENT_ID: fake-microsoft-client-id
  MICROSOFT_CLIENT_SECRET: fake-microsoft-client-secret
  ONEDRIVE_REDIRECT_URI: http://localhost:8000/api/v1/auth/onedrive/connector/callback
  DROPBOX_APP_KEY: fake-dropbox-app-key
  DROPBOX_APP_SECRET: fake-dropbox-app-secret
  DROPBOX_REDIRECT_URI: http://localhost:8000/api/v1/auth/dropbox/connector/callback
  # Defense-in-depth: even though L3 egress is denied for the worker via
  # `internal: true`, the backend still has a route via `ingress`. Setting
  # HTTPS_PROXY to an unreachable port turns any leaked Python outbound HTTP
  # call into a fast Connection refused. UNLIKE the old runner-shell setup,
  # this proxy is set on the container env and `uv` is never invoked here,
  # so there is no interaction with uv's implicit-sync behaviour.
  HTTPS_PROXY: http://127.0.0.1:1
  HTTP_PROXY: http://127.0.0.1:1
  NO_PROXY: localhost,127.0.0.1,0.0.0.0,db,redis,host.docker.internal
  HF_HUB_OFFLINE: "1"
  TRANSFORMERS_OFFLINE: "1"
  # Test-only token-mint endpoint secret (see tests/e2e/run_backend.py).
  E2E_MINT_SECRET: e2e-mint-secret-not-for-production
 services:
  db:
    image: pgvector/pgvector:pg17
    command: >
      postgres
        -c wal_level=logical
        -c max_wal_senders=10
        -c max_replication_slots=10
    environment:
      POSTGRES_USER: postgres
      POSTGRES_PASSWORD: postgres
      POSTGRES_DB: surfsense_e2e
    # Ephemeral storage — every CI run gets a clean DB, no volume cleanup needed.
    tmpfs:
      - /var/lib/postgresql/data
    healthcheck:
      test: ["CMD-SHELL", "pg_isready -U postgres -d surfsense_e2e"]
      interval: 2s
      timeout: 3s
      retries: 30
    networks: [internal]
  redis:
    image: redis:8-alpine
    healthcheck:
      test: ["CMD", "redis-cli", "ping"]
      interval: 2s
      timeout: 3s
      retries: 30
    networks: [internal]
  backend:
    build:
      context: ../surfsense_backend
      dockerfile: Dockerfile
      target: e2e
      additional_contexts:
        # tests/ is excluded from the main context by .dockerignore;
        # the e2e stage's `COPY --from=tests-source` pulls it in here.
        tests-source: ../surfsense_backend/tests
      args:
        EMBEDDING_MODEL: sentence-transformers/all-MiniLM-L6-v2
      cache_from:
        - type=gha,scope=surfsense-e2e-backend
      cache_to:
        - type=gha,mode=max,scope=surfsense-e2e-backend
    image: surfsense-e2e-backend:local
    environment:
      <<: *backend-env
      SERVICE_ROLE: api
    volumes:
      - shared_temp:/shared_tmp
    extra_hosts:
      - "host.docker.internal:host-gateway"
    ports:
      - "8000:8000"
    depends_on:
      db: { condition: service_healthy }
      redis: { condition: service_healthy }
    healthcheck:
      # Use Python (already in the image) instead of curl/wget to avoid
      # depending on either tool being installed in the runtime layers.
      test:
        - CMD
        - python
        - -c
        - |
          import sys, urllib.request
          try:
              r = urllib.request.urlopen("http://localhost:8000/openapi.json", timeout=2)
              sys.exit(0 if r.status == 200 else 1)
          except Exception:
              sys.exit(1)
      interval: 3s
      timeout: 5s
      retries: 60
      start_period: 30s
    networks:
      - internal      # to reach db/redis
      - ingress       # so host can reach :8000
  celery_worker:
    image: surfsense-e2e-backend:local
    pull_policy: never
    # No build: section — reuses the image built by the `backend` service.
    # Compose v2 builds shared images exactly once across services that
    # reference the same `image:` tag.
    environment:
      <<: *backend-env
      SERVICE_ROLE: worker
    volumes:
      - shared_temp:/shared_tmp
    depends_on:
      backend: { condition: service_healthy }
    healthcheck:
      test:
        - CMD-SHELL
        - "celery -A app.celery_app inspect ping --timeout 2 | grep -q pong"
      interval: 5s
      timeout: 5s
      retries: 12
      start_period: 20s
    networks: [internal]
 networks:
  # Internal network: containers attached only to this network have NO route
  # to the host or the internet. This is the L3 deny-egress mechanism that
  # replaces the fragile HTTPS_PROXY-on-the-runner approach.
  internal:
    driver: bridge
    internal: true
  # Regular bridge network. Only the `backend` service joins it, solely so
  # the host can reach :8000 via the published port. celery_worker / db /
  # redis stay off this network entirely.
  ingress:
    driver: bridge
 volumes:
  shared_temp:
--- a/package.json
+++ b/package.json
@ -1,5 +1,5 @@
 {
  "name": "surfsense",
  "private": true,
-  "packageManager": "pnpm@10.24.0"
+  "packageManager": "pnpm@10.26.0"
 }
--- a/surfsense_backend/.gitignore
+++ b/surfsense_backend/.gitignore
@ -13,5 +13,5 @@ celerybeat-schedule*
 celerybeat-schedule.*
 celerybeat-schedule.dir
 celerybeat-schedule.bak
-global_llm_config.yaml
+/app/config/global_llm_config.yaml
 app/templates/_generated/
--- a/surfsense_backend/Dockerfile
+++ b/surfsense_backend/Dockerfile
@ -1,8 +1,16 @@
-FROM python:3.12-slim
+# =============================================================================
 # SurfSense Backend — Multi-stage Dockerfile
 # =============================================================================
 # Graph: base → deps → models → {e2e, production}
 #   e2e        — tests/ via additional_contexts (docker-compose.e2e.yml)
 #   production — published ghcr.io image (docker-build.yml pins target)
 # =============================================================================
 # ─── Stage 1: base (system deps, Pandoc, certificates) ──────────────────────
 FROM python:3.12-slim AS base
 WORKDIR /app
 # Install system dependencies including SSL tools, CUDA dependencies, and Tesseract OCR
 RUN apt-get update && apt-get install -y --no-install-recommends \
    gcc \
    python3-dev \
@ -11,6 +19,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
    wget \
    unzip \
    gnupg2 \
    ffmpeg \
    espeak-ng \
    libsndfile1 \
    libgl1 \
@ -22,21 +31,27 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
    git \
    && rm -rf /var/lib/apt/lists/*
-# Install Pandoc 3.x from GitHub as a fallback for Linux where pypandoc_binary
+RUN which ffmpeg && ffmpeg -version
-# may not bundle pandoc (apt ships 2.17 which has broken table rendering).
+
-# pypandoc_binary bundles pandoc on Windows/macOS; on Linux it picks this up.
+# Pandoc 3.x from GitHub Releases — apt ships 2.17 which has broken table rendering.
 # pypandoc_binary bundles pandoc on Windows/macOS; on Linux it picks up this binary.
 RUN ARCH=$(dpkg --print-architecture) && \
    wget -qO /tmp/pandoc.deb "https://github.com/jgm/pandoc/releases/download/3.9/pandoc-3.9-1-${ARCH}.deb" && \
    dpkg -i /tmp/pandoc.deb && \
    rm /tmp/pandoc.deb
 # Update certificates and install SSL tools
 RUN update-ca-certificates
 RUN pip install --upgrade certifi pip-system-certs
-# Copy requirements
+ENV SSL_CERT_FILE=/usr/local/lib/python3.12/site-packages/certifi/cacert.pem
-COPY pyproject.toml .
+ENV REQUESTS_CA_BUNDLE=/usr/local/lib/python3.12/site-packages/certifi/cacert.pem
-COPY uv.lock .
+ENV SURFSENSE_ALLOW_STATIC_FFMPEG_DOWNLOAD=FALSE
 # ─── Stage 2: deps (Python deps frozen from uv.lock) ────────────────────────
 FROM base AS deps
 COPY pyproject.toml uv.lock ./
 # Install all Python dependencies from uv.lock for deterministic builds.
 #
@ -49,9 +64,7 @@ COPY uv.lock .
 # Note on torch/CUDA: we do NOT install torch from a separate cu* index here.
 # PyPI's torch wheels for Linux x86_64 already ship CUDA-enabled and pull
 # nvidia-cudnn-cu13, nvidia-nccl-cu13, triton, etc. as install deps (all
-# captured in uv.lock). Installing from cu121 first only wasted ~2GB of
+# captured in uv.lock). If a specific CUDA version is needed, wire it through
 # downloads that the lock-based install immediately replaced. If a specific
 # CUDA version is needed (driver compatibility, etc.), wire it through
 # [tool.uv.sources] in pyproject.toml so the lock stays the source of truth.
 RUN pip install --no-cache-dir uv && \
    uv export --frozen --no-dev --no-hashes --no-emit-project \
@ -59,49 +72,42 @@ RUN pip install --no-cache-dir uv && \
    uv pip install --system --no-cache-dir -r /tmp/requirements.txt && \
    rm /tmp/requirements.txt
-# Set SSL environment variables dynamically
+
-RUN CERTIFI_PATH=$(python -c "import certifi; print(certifi.where())") && \
+# ─── Stage 3: models (pre-baked offline assets) ─────────────────────────────
-    echo "Setting SSL_CERT_FILE to $CERTIFI_PATH" && \
+FROM deps AS models
    echo "export SSL_CERT_FILE=$CERTIFI_PATH" >> /root/.bashrc && \
    echo "export REQUESTS_CA_BUNDLE=$CERTIFI_PATH" >> /root/.bashrc
 ENV SSL_CERT_FILE=/usr/local/lib/python3.12/site-packages/certifi/cacert.pem
 ENV REQUESTS_CA_BUNDLE=/usr/local/lib/python3.12/site-packages/certifi/cacert.pem
 # Pre-download EasyOCR models to avoid runtime SSL issues
-RUN mkdir -p /root/.EasyOCR/model
+RUN mkdir -p /root/.EasyOCR/model && \
-RUN wget --no-check-certificate https://github.com/JaidedAI/EasyOCR/releases/download/v1.3/english_g2.zip -O /root/.EasyOCR/model/english_g2.zip || true
+    wget --no-check-certificate https://github.com/JaidedAI/EasyOCR/releases/download/v1.3/english_g2.zip      -O /root/.EasyOCR/model/english_g2.zip      || true && \
-RUN wget --no-check-certificate https://github.com/JaidedAI/EasyOCR/releases/download/pre-v1.1.6/craft_mlt_25k.zip -O /root/.EasyOCR/model/craft_mlt_25k.zip || true
+    wget --no-check-certificate https://github.com/JaidedAI/EasyOCR/releases/download/pre-v1.1.6/craft_mlt_25k.zip -O /root/.EasyOCR/model/craft_mlt_25k.zip || true && \
-RUN cd /root/.EasyOCR/model && (unzip -o english_g2.zip || true) && (unzip -o craft_mlt_25k.zip || true)
+    cd /root/.EasyOCR/model && \
    (unzip -o english_g2.zip || true) && \
    (unzip -o craft_mlt_25k.zip || true)
 # Pre-download Docling models
-RUN python -c "try:\n    from docling.document_converter import DocumentConverter\n    conv = DocumentConverter()\nexcept:\n    pass" || true
+RUN printf '%s\n' \
    'try:' \
    '    from docling.document_converter import DocumentConverter' \
    '    DocumentConverter()' \
    'except Exception:' \
    '    pass' \
    | python || true
-# Install Playwright browsers for web scraping (the playwright package itself
+ARG EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2
-# is already installed via uv.lock above)
+RUN python -c "from chonkie import AutoEmbeddings; AutoEmbeddings.get_embeddings('${EMBEDDING_MODEL}')"
 # Install Playwright browsers (the playwright python package itself is in deps)
 RUN playwright install chromium --with-deps
 # Copy source code
 COPY . .
 # Install the project itself in editable mode. Dependencies were already
 # installed deterministically from uv.lock above, so --no-deps prevents any
 # re-resolution that could pull newer versions.
 RUN uv pip install --system --no-cache-dir --no-deps -e .
 # Copy and set permissions for entrypoint script
 # Use dos2unix to ensure LF line endings (fixes CRLF issues from Windows checkouts)
 COPY scripts/docker/entrypoint.sh /app/scripts/docker/entrypoint.sh
 RUN dos2unix /app/scripts/docker/entrypoint.sh && chmod +x /app/scripts/docker/entrypoint.sh
 # Shared temp directory for file uploads between API and Worker containers.
 # Python's tempfile module uses TMPDIR, so uploaded files land here.
 # Mount the SAME volume at /shared_tmp on both API and Worker in Coolify.
 RUN mkdir -p /shared_tmp
 ENV TMPDIR=/shared_tmp
 # Prevent uvloop compatibility issues
 ENV PYTHONPATH=/app
 ENV UVICORN_LOOP=asyncio
 ENV TMPDIR=/shared_tmp
 ENV PYTHONUNBUFFERED=1
 # Tune glibc malloc to return freed memory to the OS more aggressively.
 # Without these, Python's gc.collect() frees objects but the underlying
@ -110,6 +116,56 @@ ENV MALLOC_MMAP_THRESHOLD_=65536
 ENV MALLOC_TRIM_THRESHOLD_=131072
 ENV MALLOC_MMAP_MAX_=65536
 # ─── Stage 4: e2e (production source + tests/ + e2e entrypoint) ─────────────
 # Built via `docker buildx build --target e2e`. The default build target is
 # `production` (the last stage), so this stage is opt-in for CI only.
 #
 # `tests/` is excluded from the main build context by .dockerignore (so prod
 # can never accidentally ship test fakes). The e2e stage receives tests/
 # through an "additional context" passed by docker-compose.e2e.yml — see
 # https://docs.docker.com/reference/compose-file/build/#additional_contexts
 FROM models AS e2e
 # Same source copy as production. .dockerignore filters out tests/.
 COPY . .
 # Bring tests/ in via the named additional build context. CI passes
 #   --build-context tests-source=./tests
 # (or the equivalent additional_contexts entry in docker-compose.e2e.yml).
 COPY --from=tests-source . ./tests/
 # Install the project itself in editable mode. Dependencies were already
 # installed deterministically from uv.lock above, so --no-deps prevents any
 # re-resolution that could pull newer versions.
 RUN uv pip install --system --no-cache-dir --no-deps -e .
 COPY scripts/docker/entrypoint.e2e.sh /app/scripts/docker/entrypoint.e2e.sh
 RUN dos2unix /app/scripts/docker/entrypoint.e2e.sh && chmod +x /app/scripts/docker/entrypoint.e2e.sh
 # SERVICE_ROLE is overridden per service in docker-compose.e2e.yml (api / worker).
 ENV SERVICE_ROLE=api
 EXPOSE 8000-8001
 CMD ["/app/scripts/docker/entrypoint.e2e.sh"]
 # ─── Stage 5: production (published ghcr.io image) ──────────────────────────
 # CI pins `target: production`; also the default for `docker build` / dev compose.
 FROM models AS production
 # Copy source code (tests/ excluded by .dockerignore — production never ships tests).
 COPY . .
 # Install the project itself in editable mode. Dependencies were already
 # installed deterministically from uv.lock above, so --no-deps prevents any
 # re-resolution that could pull newer versions.
 RUN uv pip install --system --no-cache-dir --no-deps -e .
 # Use dos2unix to ensure LF line endings (fixes CRLF issues from Windows checkouts)
 COPY scripts/docker/entrypoint.sh /app/scripts/docker/entrypoint.sh
 RUN dos2unix /app/scripts/docker/entrypoint.sh && chmod +x /app/scripts/docker/entrypoint.sh
 # SERVICE_ROLE controls which process this container runs:
 #   api     – FastAPI backend only (runs migrations on startup)
 #   worker  – Celery worker only
@ -127,6 +183,5 @@ ENV CELERY_MAX_TASKS_PER_CHILD=50
 #   ""                       – both queues (default, for single-worker setups)
 ENV CELERY_QUEUES=""
 # Run
 EXPOSE 8000-8001
-CMD ["/app/scripts/docker/entrypoint.sh"]
+CMD ["/app/scripts/docker/entrypoint.sh"]
--- a/surfsense_backend/alembic/env.py
+++ b/surfsense_backend/alembic/env.py
@ -67,7 +67,11 @@ def run_migrations_offline() -> None:
 def do_run_migrations(connection: Connection) -> None:
-    context.configure(connection=connection, target_metadata=target_metadata)
+    context.configure(
        connection=connection,
        target_metadata=target_metadata,
        transaction_per_migration=True,
    )
    with context.begin_transaction():
        context.run_migrations()
--- a/surfsense_backend/alembic/versions/130_add_agent_action_log.py
+++ b/surfsense_backend/alembic/versions/130_add_agent_action_log.py
@ -26,6 +26,10 @@ depends_on: str | Sequence[str] | None = None
 def upgrade() -> None:
    bind = op.get_bind()
    if sa.inspect(bind).has_table("agent_action_log"):
        return
    op.create_table(
        "agent_action_log",
        sa.Column("id", sa.Integer(), primary_key=True, index=True),
--- a/surfsense_backend/alembic/versions/131_add_document_revisions.py
+++ b/surfsense_backend/alembic/versions/131_add_document_revisions.py
@ -29,6 +29,21 @@ depends_on: str | Sequence[str] | None = None
 def upgrade() -> None:
    bind = op.get_bind()
    inspector = sa.inspect(bind)
    if inspector.has_table("document_revisions") and inspector.has_table(
        "folder_revisions"
    ):
        return
    if not inspector.has_table("document_revisions"):
        _create_document_revisions()
    if not inspector.has_table("folder_revisions"):
        _create_folder_revisions()
 def _create_document_revisions() -> None:
    op.create_table(
        "document_revisions",
        sa.Column("id", sa.Integer(), primary_key=True, index=True),
@ -74,6 +89,8 @@ def upgrade() -> None:
        ),
    )
 def _create_folder_revisions() -> None:
    op.create_table(
        "folder_revisions",
        sa.Column("id", sa.Integer(), primary_key=True, index=True),
--- a/surfsense_backend/alembic/versions/132_add_agent_permission_rules.py
+++ b/surfsense_backend/alembic/versions/132_add_agent_permission_rules.py
@ -26,6 +26,10 @@ depends_on: str | Sequence[str] | None = None
 def upgrade() -> None:
    bind = op.get_bind()
    if sa.inspect(bind).has_table("agent_permission_rules"):
        return
    op.create_table(
        "agent_permission_rules",
        sa.Column("id", sa.Integer(), primary_key=True, index=True),
--- a/surfsense_backend/alembic/versions/135_action_log_correlation_ids.py
+++ b/surfsense_backend/alembic/versions/135_action_log_correlation_ids.py
@ -50,29 +50,39 @@ depends_on: str | Sequence[str] | None = None
 def upgrade() -> None:
-    op.add_column(
+    bind = op.get_bind()
-        "agent_action_log",
+    inspector = sa.inspect(bind)
-        sa.Column("tool_call_id", sa.String(length=64), nullable=True),
+    columns = {c["name"] for c in inspector.get_columns("agent_action_log")}
-    )
+    indexes = {i["name"] for i in inspector.get_indexes("agent_action_log")}
    op.add_column(
        "agent_action_log",
        sa.Column("chat_turn_id", sa.String(length=64), nullable=True),
    )
-    op.create_index(
+    if "tool_call_id" not in columns:
-        "ix_agent_action_log_tool_call_id",
+        op.add_column(
-        "agent_action_log",
+            "agent_action_log",
-        ["tool_call_id"],
+            sa.Column("tool_call_id", sa.String(length=64), nullable=True),
-    )
+        )
-    op.create_index(
+    if "chat_turn_id" not in columns:
-        "ix_agent_action_log_chat_turn_id",
+        op.add_column(
-        "agent_action_log",
+            "agent_action_log",
-        ["chat_turn_id"],
+            sa.Column("chat_turn_id", sa.String(length=64), nullable=True),
-    )
+        )
-    op.execute(
+    if "ix_agent_action_log_tool_call_id" not in indexes:
-        "UPDATE agent_action_log SET tool_call_id = turn_id WHERE tool_call_id IS NULL"
+        op.create_index(
-    )
+            "ix_agent_action_log_tool_call_id",
            "agent_action_log",
            ["tool_call_id"],
        )
    if "ix_agent_action_log_chat_turn_id" not in indexes:
        op.create_index(
            "ix_agent_action_log_chat_turn_id",
            "agent_action_log",
            ["chat_turn_id"],
        )
    if "turn_id" in columns:
        op.execute(
            "UPDATE agent_action_log SET tool_call_id = turn_id WHERE tool_call_id IS NULL"
        )
 def downgrade() -> None:
--- a/surfsense_backend/alembic/versions/136_new_chat_message_turn_id.py
+++ b/surfsense_backend/alembic/versions/136_new_chat_message_turn_id.py
@ -36,15 +36,22 @@ depends_on: str | Sequence[str] | None = None
 def upgrade() -> None:
-    op.add_column(
+    bind = op.get_bind()
-        "new_chat_messages",
+    inspector = sa.inspect(bind)
-        sa.Column("turn_id", sa.String(length=64), nullable=True),
+    columns = {c["name"] for c in inspector.get_columns("new_chat_messages")}
-    )
+    indexes = {i["name"] for i in inspector.get_indexes("new_chat_messages")}
-    op.create_index(
+
-        "ix_new_chat_messages_turn_id",
+    if "turn_id" not in columns:
-        "new_chat_messages",
+        op.add_column(
-        ["turn_id"],
+            "new_chat_messages",
-    )
+            sa.Column("turn_id", sa.String(length=64), nullable=True),
        )
    if "ix_new_chat_messages_turn_id" not in indexes:
        op.create_index(
            "ix_new_chat_messages_turn_id",
            "new_chat_messages",
            ["turn_id"],
        )
 def downgrade() -> None:
--- a/surfsense_backend/alembic/versions/137_unique_reverse_of_in_action_log.py
+++ b/surfsense_backend/alembic/versions/137_unique_reverse_of_in_action_log.py
@ -27,6 +27,8 @@ from __future__ import annotations
 from collections.abc import Sequence
 import sqlalchemy as sa
 from alembic import op
 revision: str = "137"
@ -39,6 +41,11 @@ _INDEX_NAME = "ux_agent_action_log_reverse_of"
 def upgrade() -> None:
    bind = op.get_bind()
    indexes = {i["name"] for i in sa.inspect(bind).get_indexes("agent_action_log")}
    if _INDEX_NAME in indexes:
        return
    # Defensively de-dup any pre-existing double-revert rows before
    # adding the unique index. Keeps the OLDEST row (smallest id) and
    # NULLs out the duplicates' ``reverse_of`` so they survive as audit
--- a/surfsense_backend/alembic/versions/141_unique_chat_message_turn_role.py
+++ b/surfsense_backend/alembic/versions/141_unique_chat_message_turn_role.py
@ -53,6 +53,11 @@ TABLE_NAME = "new_chat_messages"
 def upgrade() -> None:
    bind = op.get_bind()
    indexes = {i["name"] for i in sa.inspect(bind).get_indexes(TABLE_NAME)}
    if INDEX_NAME in indexes:
        return
    op.create_index(
        INDEX_NAME,
        TABLE_NAME,
--- a/surfsense_backend/app/config/init.py
+++ b/surfsense_backend/app/config/init.py
@ -473,10 +473,16 @@ def initialize_vision_llm_router():
 class Config:
    # Check if ffmpeg is installed
    if not is_ffmpeg_installed():
-        import static_ffmpeg
+        allow_static_ffmpeg = (
            os.getenv("SURFSENSE_ALLOW_STATIC_FFMPEG_DOWNLOAD", "TRUE").upper()
            == "TRUE"
        )
        if allow_static_ffmpeg:
            import static_ffmpeg
            # ffmpeg installed on first call to add_paths(), threadsafe.
            static_ffmpeg.add_paths()
        # ffmpeg installed on first call to add_paths(), threadsafe.
        static_ffmpeg.add_paths()
        # check if ffmpeg is installed again
        if not is_ffmpeg_installed():
            raise ValueError(
--- a/surfsense_backend/scripts/docker/entrypoint.e2e.sh
+++ b/surfsense_backend/scripts/docker/entrypoint.e2e.sh
@ -0,0 +1,53 @@
 #!/bin/bash
 # =============================================================================
 # E2E entrypoint for the multi-stage Dockerfile's `e2e` target.
 #
 # Dispatches on SERVICE_ROLE to the test-only entrypoints under tests/e2e/.
 # Those scripts apply sys.modules hijacks and LLM/embedding patches BEFORE
 # importing production app code (see tests/e2e/run_backend.py for rationale).
 #
 # Production never sees this file: tests/ is excluded from the production
 # stage, and the production stage uses scripts/docker/entrypoint.sh.
 # =============================================================================
 set -euo pipefail
 SERVICE_ROLE="${SERVICE_ROLE:-api}"
 echo "[e2e-entrypoint] starting role=${SERVICE_ROLE}"
 wait_for_db() {
    # Block until the database is reachable. We don't loop forever — Compose
    # depends_on/healthchecks already gate on db readiness, this is just
    # belt-and-suspenders so a slow first connection doesn't race migrations.
    for i in {1..60}; do
        echo "[e2e-entrypoint] db check attempt ${i}/60"
        if python -c "from app.db import engine; import asyncio; asyncio.run(engine.dispose())"; then
            echo "[e2e-entrypoint] db reachable after ${i} attempts"
            return 0
        fi
        sleep 1
    done
    echo "[e2e-entrypoint] ERROR: db not reachable after 60s" >&2
    return 1
 }
 case "${SERVICE_ROLE}" in
    api)
        wait_for_db
        echo "[e2e-entrypoint] running alembic upgrade head"
        alembic upgrade head
        # `exec` so SIGTERM from `docker stop` reaches Python directly,
        # without a shell wrapper interposing.
        exec python tests/e2e/run_backend.py
        ;;
    worker)
        # Worker doesn't run migrations — the api role does that exactly once.
        # We still wait for db so Celery's broker connection check doesn't
        # race against an unready Postgres on cold start.
        wait_for_db
        exec python tests/e2e/run_celery.py
        ;;
    *)
        echo "[e2e-entrypoint] ERROR: unknown SERVICE_ROLE='${SERVICE_ROLE}' (expected: api | worker)" >&2
        exit 1
        ;;
 esac
--- a/surfsense_backend/tests/e2e/README.md
+++ b/surfsense_backend/tests/e2e/README.md
@ -1,48 +1,48 @@
-# Backend E2E Test Harness
+# Backend E2E Harness
-Strict fakes + alternative entrypoints used **only** by Playwright E2E.
+This directory contains the test-only backend entrypoints and fakes used by
-Excluded from the production Docker image via `.dockerignore`.
+Playwright. They are not part of the production image: `.dockerignore` excludes
 `tests/`, and the E2E Docker stage copies this directory through a separate
 build context.
 ## Files
-| Path                             | Role                                                                            |
+| Path | Purpose |
-| -------------------------------- | ------------------------------------------------------------------------------- |
+| --- | --- |
-| `run_backend.py`                 | FastAPI entrypoint that hijacks `sys.modules` before importing `app.app:app`    |
+| `run_backend.py` | Starts FastAPI after installing the test fakes into `sys.modules`. |
-| `run_celery.py`                  | Celery worker entrypoint with the same hijack + patch logic                     |
+| `run_celery.py` | Starts the Celery worker with the same fake setup. |
-| `middleware/scenario.py`         | `X-E2E-Scenario` header → ContextVar (read by fakes)                            |
+| `middleware/scenario.py` | Reads `X-E2E-Scenario` into a request-scoped context var. |
-| `fakes/composio_module.py`       | Strict drop-in for the `composio` package; raises on unknown surface            |
+| `fakes/composio_module.py` | Fake `composio` package used by connector flows. |
-| `fakes/llm.py`                   | `fake_get_user_long_context_llm` returning a `FakeListChatModel`                |
+| `fakes/llm.py` | Fake chat model factory. |
-| `fakes/embeddings.py`            | Deterministic 0.1-vector `embed_text` / `embed_texts`                           |
+| `fakes/embeddings.py` | Deterministic embedding helpers. |
-| `fakes/fixtures/drive_files.json`| Canned Drive listings + file contents (incl. canary tokens)                     |
+| `fakes/fixtures/drive_files.json` | Drive fixture data and canary file contents. |
-## Why a sys.modules hijack?
+## Why the import hook exists
-Production code does `from composio import Composio` at module load
+Some production modules import SDK clients at module load time, for example
-time. By the time the FastAPI app object exists, that binding has
+`from composio import Composio`. By the time `app.app` has been imported, those
-already been resolved. The hijack runs **before** any `app.*` import,
+bindings are already fixed.
 so the binding resolves to our strict fake. No production source
 changes; fakes are physically excluded from production images.
-Belt + suspenders + no internet: the strict `__getattr__` in every
+The E2E entrypoints install fake modules in `sys.modules` before importing any
-fake raises `NotImplementedError` if a future production code path
+`app.*` module. That lets the normal production code run while SDK calls resolve
-introduces a new SDK call. CI also sets `HTTPS_PROXY=http://127.0.0.1:1`
+to local fakes.
 plus sentinel API keys so any leaked outbound HTTP fails immediately.
-## Adding a new fake
+The fakes should fail loudly. If production starts using a new SDK method that
 the fake does not implement, add that method to the fake instead of letting the
 test call the real service.
-1. Create `fakes/<sdk>_module.py` modelled on `composio_module.py`.
+## Adding a fake
-2. In `run_backend.py` and `run_celery.py`, register
+
-   `sys.modules["<sdk>"] = _fake_<sdk>` before the `from app.app import app`
+1. Add `fakes/<sdk>_module.py`.
-   line.
+2. Register it in both `run_backend.py` and `run_celery.py` before importing
-3. If the new fake needs scenario branching, read from
+   `app.app` or `app.celery_app`.
 3. If the fake needs per-test behavior, read the current scenario from
   `tests.e2e.middleware.scenario.current_scenario()`.
-## Reused by backend integration tests
+## Shared with backend integration tests
-The strict fakes are not only for Playwright. Backend route integration
+Backend integration tests can use the same fakes when they need production route
-tests can import the same fake before importing `app.app`, so Composio
+code without the real SDK:
 route tests exercise production route code without touching the real
 SDK:
 ```python
 from tests.e2e.fakes import composio_module as _fake_composio
@ -50,20 +50,93 @@ sys.modules["composio"] = _fake_composio
 from app.app import app
 ```
-See `surfsense_backend/tests/integration/composio/conftest.py` for the
+See `surfsense_backend/tests/integration/composio/conftest.py` for the current
-current pattern.
+pattern.
 ## Running locally
 The recommended local flow runs only Postgres and Redis in Docker, and the
 backend + Celery worker on the host. No `.env` file is required: both
 entrypoints `setdefault` every variable they need (DB URL, Redis URL,
 sentinel API keys, etc.) to values that match `docker-compose.deps-only.yml`.
 ### One-time setup
 From `surfsense_web/`:
 ```bash
-cd surfsense_backend
+pnpm install
 pnpm exec playwright install --with-deps chromium
 ```
 ### Each run
 **1. Bring up Postgres + Redis** from the repo root (the other deps-only
 services (SearXNG, Zero, pgAdmin) are not needed for E2E):
 ```bash
 docker compose -f docker/docker-compose.deps-only.yml up -d db redis
 ```
 **2. Start the backend** in `surfsense_backend/`, terminal A:
 ```bash
 uv sync
 uv run alembic upgrade head
 uv run python tests/e2e/run_backend.py
-# in a second shell:
+```
 **3. Start the Celery worker** in `surfsense_backend/`, terminal B:
 ```bash
 uv run python tests/e2e/run_celery.py
 ```
-Then in `surfsense_web`:
+**4. Register the Playwright user**:
 ```bash
-pnpm test:e2e
+curl -X POST http://localhost:8000/auth/register \
  -H "Content-Type: application/json" \
  -d '{"email":"e2e-test@surfsense.net","password":"E2eTestPassword123!"}'
 ```
 **5. Run Playwright** from `surfsense_web/`, terminal C:
 ```bash
 pnpm test:e2e             # dev server (fast iteration)
 pnpm test:e2e:headed      # show the browser
 pnpm test:e2e:ui          # Playwright UI mode
 pnpm test:e2e:prod        # build + start (matches CI exactly)
 ```
 `playwright.config.ts` and the run scripts share defaults, so this works on a
 fresh checkout. Set `PLAYWRIGHT_TEST_EMAIL`, `PLAYWRIGHT_TEST_PASSWORD`,
 `NEXT_PUBLIC_FASTAPI_BACKEND_URL`, or any backend env (e.g. `DATABASE_URL`)
 only when pointing tests at a different stack.
 ### Cleanup
 ```bash
 docker compose -f docker/docker-compose.deps-only.yml down
 ```
 Add `-v` to also wipe the Postgres volume.
 ### Hermetic alternative (matches CI)
 To reproduce the CI environment exactly — backend and Celery in containers,
 network egress denied at L3 — replace steps 1–3 with:
 ```bash
 docker compose -f docker/docker-compose.e2e.yml up -d --build --wait
 ```
 Then run steps 4 (curl register) and 5 (`pnpm test:e2e:prod`) as above. Tear
 down with:
 ```bash
 docker compose -f docker/docker-compose.e2e.yml down -v --remove-orphans
 ```
 This builds the ~9 GB `surfsense-e2e-backend:local` image, so the deps-only
 flow above is faster for day-to-day development.
--- a/surfsense_backend/tests/e2e/auth_mint.py
+++ b/surfsense_backend/tests/e2e/auth_mint.py
@ -0,0 +1,66 @@
 """Test-only token mint endpoint for the E2E backend entrypoint.
 Mounted by ``tests/e2e/run_backend.py`` so Playwright can authenticate
 the seeded e2e user without hitting ``/auth/jwt/login`` (rate-limited
 to 5/min/IP in production). NEVER ships to production: this whole
 ``tests/`` tree is excluded from the production Docker image by
 ``surfsense_backend/.dockerignore``.
 Authn: shared secret in ``X-E2E-Mint-Secret``. Same value is set on the
 backend container env (``docker/docker-compose.e2e.yml``) and exported
 to the Playwright runner (``.github/workflows/e2e-tests.yml``).
 """
 from __future__ import annotations
 import logging
 import os
 from fastapi import APIRouter, FastAPI, Header, HTTPException
 from pydantic import BaseModel
 from sqlalchemy import select
 from app.db import User, async_session_maker
 from app.users import get_jwt_strategy
 _logger = logging.getLogger("surfsense.e2e.auth_mint")
 class MintRequest(BaseModel):
    email: str = "e2e-test@surfsense.net"
 class MintResponse(BaseModel):
    access_token: str
    token_type: str = "bearer"
 def _expected_secret() -> str:
    return os.environ.get("E2E_MINT_SECRET", "local-e2e-mint-secret-not-for-production")
 router = APIRouter(prefix="/__e2e__", tags=["__e2e__"])
@router.post("/auth/token", response_model=MintResponse)
 async def mint_test_token(
    body: MintRequest,
    x_e2e_mint_secret: str = Header(..., alias="X-E2E-Mint-Secret"),
 ) -> MintResponse:
    if x_e2e_mint_secret != _expected_secret():
        raise HTTPException(status_code=403, detail="invalid e2e mint secret")
    async with async_session_maker() as session:
        result = await session.execute(select(User).where(User.email == body.email))
        user = result.scalar_one_or_none()
    if user is None:
        raise HTTPException(
            status_code=404, detail=f"e2e user {body.email!r} not seeded"
        )
    token = await get_jwt_strategy().write_token(user)
    return MintResponse(access_token=token)
 def install(app: FastAPI) -> None:
    """Mount the test-only mint router onto the given FastAPI app."""
    app.include_router(router)
    _logger.warning("[e2e] mounted POST /__e2e__/auth/token (test-only token mint)")
--- a/surfsense_backend/tests/e2e/fakes/docling_service.py
+++ b/surfsense_backend/tests/e2e/fakes/docling_service.py
@ -0,0 +1,141 @@
 """Stub DoclingService.process_document for E2E.
 The real ``DoclingService.process_document`` calls
 ``DocumentConverter.convert(file_path)`` which lazily downloads the
 ``docling-project/docling-layout-heron`` model from Hugging Face Hub.
 The hermetic E2E container sets ``HF_HUB_OFFLINE=1`` (see
 ``docker/docker-compose.e2e.yml``), so that download fails with
 ``LocalEntryNotFoundError`` and the indexing Celery task retries until
 the Playwright test hits its ~4-minute step timeout. In CI that is the
 difference between the suite finishing and the 30-minute job timeout
 killing the run before any report can upload.
 Stubbing ``process_document`` bypasses ``DocumentConverter.convert()``
 entirely. ``DoclingService.__init__`` is intentionally left untouched
 because constructing ``DocumentConverter(...)`` is cheap and offline —
 it is only ``.convert()`` that triggers the offline-model download.
 Every canary PDF under ``tests/e2e/fakes/fixtures/binary/`` is produced
 by ``generate_canary_pdfs.py`` and embeds its canary token as plain
 ``(text) Tj`` PDF text operators. Extracting those operators gives us
 the canary string back, which is what the Playwright assertions look
 for in the resulting Document row.
 """
 from __future__ import annotations
 import logging
 import re
 from pathlib import Path
 from typing import Any
 logger = logging.getLogger(__name__)
 # Matches the `(escaped text) Tj` text-show operator emitted by
 # generate_canary_pdfs.py. Inside the parens, the escape rules are:
 #   \\  -> backslash
 #   \(  -> literal (
 #   \)  -> literal )
 # The character class [^\\()] consumes any non-escape byte; \\. consumes
 # an escape sequence. Sufficient for our synthetic fixtures.
 _TJ_PATTERN = re.compile(rb"\(((?:[^\\()]|\\.)*)\)\s*Tj")
 def _extract_text_from_synthetic_pdf(file_path: str) -> str:
    """Pull every ``(text) Tj`` payload out of a fixture PDF in order.
    Returns an empty string if the file cannot be read. We do not try to
    handle arbitrary PDFs because the fake is only ever invoked against
    fixtures we generate ourselves.
    """
    try:
        data = Path(file_path).read_bytes()
    except OSError as exc:
        logger.warning("[fake-docling] could not read %s: %s", file_path, exc)
        return ""
    lines: list[str] = []
    for match in _TJ_PATTERN.finditer(data):
        raw = match.group(1)
        # Order-sensitive unescape via sentinel: protect `\\` first so
        # the subsequent `\(` / `\)` passes do not corrupt it.
        text = (
            raw.replace(rb"\\", b"\x00")
            .replace(rb"\(", b"(")
            .replace(rb"\)", b")")
            .replace(b"\x00", b"\\")
        )
        try:
            lines.append(text.decode("utf-8"))
        except UnicodeDecodeError:
            lines.append(text.decode("latin-1"))
    return "\n".join(lines)
 async def fake_process_document(
    self,
    file_path: str,
    filename: str | None = None,
 ) -> dict[str, Any]:
    """Drop-in replacement for ``DoclingService.process_document``.
    Returns the same dict shape as the production method so callers
    (``app/etl_pipeline/parsers/docling.py``) can keep reading
    ``result["content"]`` without changes.
    """
    extracted = _extract_text_from_synthetic_pdf(file_path)
    display_name = filename or Path(file_path).name
    if extracted:
        content = f"# {display_name}\n\n{extracted}\n"
    else:
        # Empty fallback so the indexing pipeline does not error out on
        # an unexpected payload. A failing canary assertion is a much
        # clearer failure mode than a hard parser exception.
        content = (
            f"# {display_name}\n\n(empty docling fake — no text-show operators found)\n"
        )
    logger.info(
        "[fake-docling] returning %d chars for %s",
        len(content),
        display_name,
    )
    return {
        "content": content,
        "full_text": content,
        "service_used": "docling-fake",
        "status": "success",
        "processing_notes": "e2e fake DoclingService — no real PDF parsing",
    }
 def install(patches: list[Any]) -> None:
    """Patch ``DoclingService.process_document`` at the class level.
    Patching the class method (rather than each call site) is correct
    here because every consumer goes through
    ``create_docling_service()`` → ``DoclingService()`` → instance method
    dispatch, so the descriptor protocol picks up our replacement. There
    is exactly one such consumer today
    (``app/etl_pipeline/parsers/docling.py``), but patching the class is
    future-proof.
    Fails loud rather than warning, because a silent passthrough means
    real Docling + ``HF_HUB_OFFLINE=1`` = 4 minutes of CI hang per test.
    """
    from unittest.mock import patch as _patch
    target = "app.services.docling_service.DoclingService.process_document"
    try:
        p = _patch(target, fake_process_document)
        p.start()
        patches.append(p)
        logger.info("[fake-docling] patched %s", target)
    except (ModuleNotFoundError, AttributeError) as exc:
        raise RuntimeError(
            f"Could not patch Docling binding {target!r}: {exc!s}. "
            f"Update surfsense_backend/tests/e2e/fakes/docling_service.py "
            f"to point at the new binding site."
        ) from exc
--- a/surfsense_backend/tests/e2e/fixtures/global_llm_config.yaml
+++ b/surfsense_backend/tests/e2e/fixtures/global_llm_config.yaml
@ -0,0 +1,71 @@
 # Synthetic Global LLM configuration for E2E ONLY.
 #
 # Why this file exists:
 #   surfsense_backend/app/config/global_llm_config.yaml is gitignored
 #   (operators ship real API keys there). In CI that file does not exist,
 #   so app.config.load_global_llm_configs() returns [], every chat-stream
 #   test fails fast with "No usable global LLM configs are available for
 #   Auto mode" raised by auto_model_pin_service._global_candidates().
 #
 # What this file does:
 #   tests/e2e/run_backend.py and tests/e2e/run_celery.py copy this file
 #   to app/config/global_llm_config.yaml at startup, BEFORE app.config
 #   is imported. The copy lives only inside the E2E Docker container.
 #
 # Why a fake api_key is safe:
 #   tests.e2e.fakes.chat_llm patches
 #     app.tasks.chat.stream_new_chat.create_chat_litellm_from_agent_config
 #     app.tasks.chat.stream_new_chat.create_chat_litellm_from_config
 #   so the resolved auto-pin id is never sent to a real LLM provider.
 #   The values below only need to pass
 #   auto_model_pin_service._is_usable_global_config()
 #   which requires id / model_name / provider / api_key all truthy.
 #
 # Why TWO entries (premium + free):
 #   auto_model_pin_service.resolve_or_get_pinned_llm_config_id() splits
 #   candidates by billing_tier based on _is_premium_eligible(user):
 #     premium_eligible == True  -> keeps only tier=="premium" configs
 #     premium_eligible == False -> keeps only tier!="premium" configs
 #   A single-tier fixture would fail one of the two branches with
 #   "Auto mode could not find an eligible LLM config for this user and
 #   quota state". Shipping one of each guarantees every quota state
 #   resolves to a viable pin in E2E.
 router_settings:
  routing_strategy: "simple-shuffle"
  num_retries: 0
  allowed_fails: 1
  cooldown_time: 1
 global_llm_configs:
  - id: -9001
    name: "E2E Fake Auto Model (premium)"
    billing_tier: "premium"
    anonymous_enabled: false
    seo_enabled: false
    quality_score: 1.0
    provider: "OPENAI"
    model_name: "fake-e2e-model-premium"
    api_key: "fake-e2e-api-key-not-for-production"
    supports_image_input: false
    quota_reserve_tokens: 1024
    rpm: 1000
    tpm: 100000
    litellm_params:
      model: "openai/fake-e2e-model-premium"
  - id: -9002
    name: "E2E Fake Auto Model (free)"
    billing_tier: "free"
    anonymous_enabled: false
    seo_enabled: false
    quality_score: 1.0
    provider: "OPENAI"
    model_name: "fake-e2e-model-free"
    api_key: "fake-e2e-api-key-not-for-production"
    supports_image_input: false
    quota_reserve_tokens: 1024
    rpm: 1000
    tpm: 100000
    litellm_params:
      model: "openai/fake-e2e-model-free"
--- a/surfsense_backend/tests/e2e/run_backend.py
+++ b/surfsense_backend/tests/e2e/run_backend.py
@ -23,15 +23,12 @@ Usage:
 from __future__ import annotations
 import asyncio
 import logging
 import os
 import sys
-# ---------------------------------------------------------------------------
+import uvicorn
 # 1) Hijack sys.modules BEFORE any production import.
 #    Production: composio_service.py:11 does `from composio import Composio`.
 #    With this hijack in place, that import resolves to our strict fake.
 # ---------------------------------------------------------------------------
 # Make the surfsense_backend root importable as a top-level package so
 # `import tests.e2e.fakes...` works regardless of how the entrypoint is
@ -42,97 +39,175 @@ _BACKEND_ROOT = os.path.abspath(os.path.join(_THIS_DIR, "..", ".."))
 if _BACKEND_ROOT not in sys.path:
    sys.path.insert(0, _BACKEND_ROOT)
 import tests.e2e.fakes.composio_module as _fake_composio  # noqa: E402
 import tests.e2e.fakes.notion_module as _fake_notion  # noqa: E402
 sys.modules["composio"] = _fake_composio
 sys.modules["notion_client"] = _fake_notion
 sys.modules["notion_client.errors"] = _fake_notion.errors
 # ---------------------------------------------------------------------------
 # 2) Standard logging + dotenv so the rest of the app behaves like main.py.
 # ---------------------------------------------------------------------------
 from dotenv import load_dotenv  # noqa: E402
 load_dotenv()
 os.environ.setdefault("ATLASSIAN_CLIENT_ID", "fake-atlassian-client-id")
 os.environ.setdefault("ATLASSIAN_CLIENT_SECRET", "fake-atlassian-client-secret")
 os.environ.setdefault(
    "CONFLUENCE_REDIRECT_URI",
    "http://localhost:8000/api/v1/auth/confluence/connector/callback",
 )
 os.environ.setdefault("NOTION_CLIENT_ID", "fake-notion-client-id")
 os.environ.setdefault("NOTION_CLIENT_SECRET", "fake-notion-client-secret")
 os.environ.setdefault(
    "NOTION_REDIRECT_URI",
    "http://localhost:8000/api/v1/auth/notion/connector/callback",
 )
 os.environ.setdefault("MICROSOFT_CLIENT_ID", "fake-microsoft-client-id")
 os.environ.setdefault("MICROSOFT_CLIENT_SECRET", "fake-microsoft-client-secret")
 os.environ.setdefault(
    "ONEDRIVE_REDIRECT_URI",
    "http://localhost:8000/api/v1/auth/onedrive/connector/callback",
 )
 os.environ.setdefault("DROPBOX_APP_KEY", "fake-dropbox-app-key")
 os.environ.setdefault("DROPBOX_APP_SECRET", "fake-dropbox-app-secret")
 os.environ.setdefault(
    "DROPBOX_REDIRECT_URI",
    "http://localhost:8000/api/v1/auth/dropbox/connector/callback",
 )
 os.environ["SLACK_CLIENT_ID"] = "fake-slack-mcp-client-id"
 os.environ["SLACK_CLIENT_SECRET"] = "fake-slack-mcp-client-secret"
 logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
    datefmt="%Y-%m-%d %H:%M:%S",
 )
 logger = logging.getLogger("surfsense.e2e.backend")
 logger.warning(
    "*** SURFSENSE E2E BACKEND ENTRYPOINT — fake Composio + LLM + embeddings ***"
 )
 # ---------------------------------------------------------------------------
 # 3) Now import the production app. Every module in app.* loads here,
 #    creating their bindings (some of which we will patch in step 4).
 # ---------------------------------------------------------------------------
 # ---------------------------------------------------------------------------
 # 4) Patch LLM + embedding bindings at every consumer site.
 #    Composio is already covered by the sys.modules hijack in step 1.
 # ---------------------------------------------------------------------------
 from unittest.mock import patch  # noqa: E402
 from app.app import app  # noqa: E402
 from tests.e2e.fakes import (  # noqa: E402
    clickup_module as _fake_clickup_module,
    confluence_indexer as _fake_confluence_indexer,
    confluence_oauth as _fake_confluence_oauth,
    dropbox_api as _fake_dropbox_api,
    embeddings as _fake_embeddings,
    jira_module as _fake_jira_module,
    linear_module as _fake_linear_module,
    mcp_oauth_runtime as _fake_mcp_oauth_runtime,
    mcp_runtime as _fake_mcp_runtime,
    native_google as _fake_native_google,
    notion_module as _fake_notion_module,
    onedrive_graph as _fake_onedrive_graph,
    slack_module as _fake_slack_module,
 )
 from tests.e2e.fakes.chat_llm import (  # noqa: E402
    fake_create_chat_litellm_from_agent_config,
    fake_create_chat_litellm_from_config,
 )
 from tests.e2e.fakes.llm import fake_get_user_long_context_llm  # noqa: E402
 # Patches started during bootstrap are kept alive for the lifetime of the
 # process. We never call .stop() on them.
 _active_patches: list = []
 def _hijack_external_sdks() -> None:
    """Replace composio + notion_client in sys.modules.
    Production does ``from composio import Composio`` and
    ``import notion_client`` at import time. With this hijack in place,
    those imports resolve to our strict fakes.
    MUST run before _import_production_app().
    """
    import tests.e2e.fakes.composio_module as _fake_composio
    import tests.e2e.fakes.notion_module as _fake_notion
    sys.modules["composio"] = _fake_composio
    sys.modules["notion_client"] = _fake_notion
    sys.modules["notion_client.errors"] = _fake_notion.errors
 def _load_dotenv_and_set_env_defaults() -> None:
    """Load .env and set every env var the production config reads on import.
    MUST run before _import_production_app(), since app.config consumes
    these values at import time.
    """
    from dotenv import load_dotenv
    load_dotenv()
    os.environ.setdefault(
        "DATABASE_URL",
        "postgresql+asyncpg://postgres:postgres@localhost:5432/surfsense",
    )
    os.environ.setdefault("CELERY_BROKER_URL", "redis://localhost:6379/0")
    os.environ.setdefault("CELERY_RESULT_BACKEND", "redis://localhost:6379/0")
    os.environ.setdefault("REDIS_APP_URL", "redis://localhost:6379/0")
    os.environ.setdefault("CELERY_TASK_DEFAULT_QUEUE", "surfsense")
    os.environ.setdefault("SECRET_KEY", "local-e2e-secret-not-for-production")
    os.environ.setdefault("AUTH_TYPE", "LOCAL")
    os.environ.setdefault("REGISTRATION_ENABLED", "TRUE")
    os.environ.setdefault("ETL_SERVICE", "DOCLING")
    os.environ.setdefault("EMBEDDING_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
    os.environ.setdefault("NEXT_FRONTEND_URL", "http://localhost:3000")
    # Sentinel keys — fakes never read them; turns leaked real calls into 401s.
    os.environ.setdefault("COMPOSIO_API_KEY", "local-deny-real-call-sentinel")
    os.environ.setdefault("COMPOSIO_ENABLED", "TRUE")
    os.environ.setdefault("OPENAI_API_KEY", "local-deny-real-call-sentinel")
    os.environ.setdefault("ANTHROPIC_API_KEY", "local-deny-real-call-sentinel")
    os.environ.setdefault("LITELLM_API_KEY", "local-deny-real-call-sentinel")
    os.environ.setdefault("ATLASSIAN_CLIENT_ID", "fake-atlassian-client-id")
    os.environ.setdefault("ATLASSIAN_CLIENT_SECRET", "fake-atlassian-client-secret")
    os.environ.setdefault(
        "CONFLUENCE_REDIRECT_URI",
        "http://localhost:8000/api/v1/auth/confluence/connector/callback",
    )
    os.environ.setdefault("NOTION_CLIENT_ID", "fake-notion-client-id")
    os.environ.setdefault("NOTION_CLIENT_SECRET", "fake-notion-client-secret")
    os.environ.setdefault(
        "NOTION_REDIRECT_URI",
        "http://localhost:8000/api/v1/auth/notion/connector/callback",
    )
    os.environ.setdefault("MICROSOFT_CLIENT_ID", "fake-microsoft-client-id")
    os.environ.setdefault("MICROSOFT_CLIENT_SECRET", "fake-microsoft-client-secret")
    os.environ.setdefault(
        "ONEDRIVE_REDIRECT_URI",
        "http://localhost:8000/api/v1/auth/onedrive/connector/callback",
    )
    os.environ.setdefault("DROPBOX_APP_KEY", "fake-dropbox-app-key")
    os.environ.setdefault("DROPBOX_APP_SECRET", "fake-dropbox-app-secret")
    os.environ.setdefault(
        "DROPBOX_REDIRECT_URI",
        "http://localhost:8000/api/v1/auth/dropbox/connector/callback",
    )
    # Native Google OAuth — fake Flow in tests.e2e.fakes.native_google
    # raises "Fake Google Flow requires redirect_uri." if these are empty,
    # so connector/add routes return 500 in CI where no .env supplies them.
    os.environ.setdefault(
        "GOOGLE_DRIVE_REDIRECT_URI",
        "http://localhost:8000/api/v1/auth/google/drive/connector/callback",
    )
    os.environ.setdefault(
        "GOOGLE_GMAIL_REDIRECT_URI",
        "http://localhost:8000/api/v1/auth/google/gmail/connector/callback",
    )
    os.environ.setdefault(
        "GOOGLE_CALENDAR_REDIRECT_URI",
        "http://localhost:8000/api/v1/auth/google/calendar/connector/callback",
    )
    os.environ["SLACK_CLIENT_ID"] = "fake-slack-mcp-client-id"
    os.environ["SLACK_CLIENT_SECRET"] = "fake-slack-mcp-client-secret"
 def _install_synthetic_global_llm_config() -> None:
    """Materialise a fake ``app/config/global_llm_config.yaml`` for E2E.
    The real file is gitignored (production operators ship their own with
    real API keys), so a fresh CI checkout has no YAML at the path
    ``app.config.load_global_llm_configs()`` reads. With an empty
    ``GLOBAL_LLM_CONFIGS`` list, ``auto_model_pin_service`` raises
    ``"No usable global LLM configs are available for Auto mode"`` on
    every chat-stream request.
    We copy the synthetic fixture from ``tests/e2e/fixtures/`` into the
    production-expected location BEFORE ``_import_production_app()`` so
    ``app.config`` picks it up on import. Production code is untouched —
    this is purely a test-time scaffold.
    Only installs when the destination is missing. A developer running
    the E2E entrypoint locally keeps their real ``global_llm_config.yaml``
    intact (the patched ``create_chat_litellm_from_*`` factories make the
    actual model values irrelevant either way).
    MUST run before _import_production_app().
    """
    import shutil
    src = os.path.join(_THIS_DIR, "fixtures", "global_llm_config.yaml")
    dst = os.path.join(_BACKEND_ROOT, "app", "config", "global_llm_config.yaml")
    if not os.path.exists(src):
        raise RuntimeError(
            f"E2E synthetic global LLM config fixture missing at {src!r}. "
            f"This file is checked into tests/e2e/fixtures/ — if it has gone "
            f"missing, restore it from VCS before running the E2E entrypoint."
        )
    if os.path.exists(dst):
        logger.info(
            "[e2e-global-llm-config] %s already exists; leaving it alone "
            "(local dev config preserved)",
            dst,
        )
        return
    os.makedirs(os.path.dirname(dst), exist_ok=True)
    shutil.copyfile(src, dst)
    logger.info("[e2e-global-llm-config] installed %s -> %s", src, dst)
 def _import_production_app():
    """Import and return the production FastAPI app.
    Every module under ``app.*`` loads here, creating their bindings.
    The LLM/embedding factories captured at this point will be replaced
    by patches in _patch_llm_bindings() below.
    """
    from app.app import app as production_app
    return production_app
 def _patch_llm_bindings() -> None:
    """Replace LLM factories at every known binding site."""
    from unittest.mock import patch
    from tests.e2e.fakes.chat_llm import (
        fake_create_chat_litellm_from_agent_config,
        fake_create_chat_litellm_from_config,
    )
    from tests.e2e.fakes.llm import fake_get_user_long_context_llm
    targets = [
        "app.services.llm_service.get_user_long_context_llm",
        "app.tasks.connector_indexers.confluence_indexer.get_user_long_context_llm",
@ -190,38 +265,90 @@ def _patch_llm_bindings() -> None:
            logger.warning("[fake-chat-llm] could not patch %s: %s.", target, exc)
-_patch_llm_bindings()
+def _install_runtime_fakes() -> None:
-_fake_embeddings.install(_active_patches)
+    """Run each fake's install() against the active patch stack."""
-_fake_confluence_oauth.install(_active_patches)
+    from tests.e2e.fakes import (
-_fake_confluence_indexer.install(_active_patches)
+        clickup_module as _fake_clickup_module,
-_fake_native_google.install(_active_patches)
+        confluence_indexer as _fake_confluence_indexer,
-_fake_onedrive_graph.install(_active_patches)
+        confluence_oauth as _fake_confluence_oauth,
-_fake_dropbox_api.install(_active_patches)
+        docling_service as _fake_docling_service,
-_fake_notion_module.install(_active_patches)
+        dropbox_api as _fake_dropbox_api,
-_fake_linear_module.install(_active_patches)
+        embeddings as _fake_embeddings,
-_fake_jira_module.install(_active_patches)
+        jira_module as _fake_jira_module,
-_fake_clickup_module.install(_active_patches)
+        linear_module as _fake_linear_module,
-_fake_mcp_runtime.install(_active_patches)
+        mcp_oauth_runtime as _fake_mcp_oauth_runtime,
-_fake_mcp_oauth_runtime.install(_active_patches)
+        mcp_runtime as _fake_mcp_runtime,
-_fake_slack_module.install(_active_patches)
+        native_google as _fake_native_google,
        notion_module as _fake_notion_module,
        onedrive_graph as _fake_onedrive_graph,
        slack_module as _fake_slack_module,
    )
    _fake_embeddings.install(_active_patches)
    _fake_docling_service.install(_active_patches)
    _fake_confluence_oauth.install(_active_patches)
    _fake_confluence_indexer.install(_active_patches)
    _fake_native_google.install(_active_patches)
    _fake_onedrive_graph.install(_active_patches)
    _fake_dropbox_api.install(_active_patches)
    _fake_notion_module.install(_active_patches)
    _fake_linear_module.install(_active_patches)
    _fake_jira_module.install(_active_patches)
    _fake_clickup_module.install(_active_patches)
    _fake_mcp_runtime.install(_active_patches)
    _fake_mcp_oauth_runtime.install(_active_patches)
    _fake_slack_module.install(_active_patches)
-# ---------------------------------------------------------------------------
+def _install_test_only_app_extensions(app) -> None:
-# 5) Mount test-only middleware. Production never reaches this code.
+    """Mount test-only middleware + the /__e2e__ token mint router.
 # ---------------------------------------------------------------------------
-from tests.e2e.middleware.scenario import ScenarioMiddleware  # noqa: E402
+    POST /__e2e__/auth/token bypasses /auth/jwt/login's 5/min/IP rate
    limit so Playwright workers can authenticate without thrashing the
    production auth surface. See tests/e2e/auth_mint.py.
    """
    from tests.e2e.auth_mint import install as install_e2e_mint
    from tests.e2e.middleware.scenario import ScenarioMiddleware
-app.add_middleware(ScenarioMiddleware)
+    app.add_middleware(ScenarioMiddleware)
    install_e2e_mint(app)
-# ---------------------------------------------------------------------------
+def _bootstrap():
-# 6) Start uvicorn, mirroring main.py's behaviour.
+    """Run the full E2E bootstrap and return the production FastAPI app.
 # ---------------------------------------------------------------------------
-import asyncio  # noqa: E402
+    Ordering is load-bearing:
      1) Hijack composio + notion_client in sys.modules.
      2) Load .env + set env defaults (app.config reads env on import).
      3) Configure logging.
      4) Materialise the synthetic global_llm_config.yaml so Auto-mode
         pin resolution finds at least one usable candidate.
      5) Import production app (which transitively imports the now-faked
         external SDKs and reads the env defaults + YAML).
      6) Patch LLM / embedding bindings at every consumer site.
      7) Mount test-only middleware + /__e2e__ routes onto the app.
    """
    _hijack_external_sdks()
    _load_dotenv_and_set_env_defaults()
-import uvicorn  # noqa: E402
+    logging.basicConfig(
        level=logging.INFO,
        format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
        datefmt="%Y-%m-%d %H:%M:%S",
    )
    logger.warning(
        "*** SURFSENSE E2E BACKEND ENTRYPOINT — fake Composio + LLM + embeddings ***"
    )
    _install_synthetic_global_llm_config()
    production_app = _import_production_app()
    _patch_llm_bindings()
    _install_runtime_fakes()
    _install_test_only_app_extensions(production_app)
    return production_app
 app = _bootstrap()
 def _main() -> None:
--- a/surfsense_backend/tests/e2e/run_celery.py
+++ b/surfsense_backend/tests/e2e/run_celery.py
@ -25,96 +25,166 @@ if _BACKEND_ROOT not in sys.path:
    sys.path.insert(0, _BACKEND_ROOT)
 # ---------------------------------------------------------------------------
 # 1) Hijack sys.modules BEFORE production celery imports anything.
 # ---------------------------------------------------------------------------
 import tests.e2e.fakes.composio_module as _fake_composio  # noqa: E402
 import tests.e2e.fakes.notion_module as _fake_notion  # noqa: E402
 sys.modules["composio"] = _fake_composio
 sys.modules["notion_client"] = _fake_notion
 sys.modules["notion_client.errors"] = _fake_notion.errors
 # ---------------------------------------------------------------------------
 # 2) Logging + dotenv.
 # ---------------------------------------------------------------------------
 from dotenv import load_dotenv  # noqa: E402
 load_dotenv()
 os.environ.setdefault("ATLASSIAN_CLIENT_ID", "fake-atlassian-client-id")
 os.environ.setdefault("ATLASSIAN_CLIENT_SECRET", "fake-atlassian-client-secret")
 os.environ.setdefault(
    "CONFLUENCE_REDIRECT_URI",
    "http://localhost:8000/api/v1/auth/confluence/connector/callback",
 )
 os.environ.setdefault("NOTION_CLIENT_ID", "fake-notion-client-id")
 os.environ.setdefault("NOTION_CLIENT_SECRET", "fake-notion-client-secret")
 os.environ.setdefault(
    "NOTION_REDIRECT_URI",
    "http://localhost:8000/api/v1/auth/notion/connector/callback",
 )
 os.environ.setdefault("MICROSOFT_CLIENT_ID", "fake-microsoft-client-id")
 os.environ.setdefault("MICROSOFT_CLIENT_SECRET", "fake-microsoft-client-secret")
 os.environ.setdefault(
    "ONEDRIVE_REDIRECT_URI",
    "http://localhost:8000/api/v1/auth/onedrive/connector/callback",
 )
 os.environ.setdefault("DROPBOX_APP_KEY", "fake-dropbox-app-key")
 os.environ.setdefault("DROPBOX_APP_SECRET", "fake-dropbox-app-secret")
 os.environ.setdefault(
    "DROPBOX_REDIRECT_URI",
    "http://localhost:8000/api/v1/auth/dropbox/connector/callback",
 )
 os.environ["SLACK_CLIENT_ID"] = "fake-slack-mcp-client-id"
 os.environ["SLACK_CLIENT_SECRET"] = "fake-slack-mcp-client-secret"
 logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
    datefmt="%Y-%m-%d %H:%M:%S",
 )
 logger = logging.getLogger("surfsense.e2e.celery")
 logger.warning("*** SURFSENSE E2E CELERY WORKER — fake Composio + LLM + embeddings ***")
 # ---------------------------------------------------------------------------
 # 3) Import the production celery_app. All task modules load here.
 # ---------------------------------------------------------------------------
 # ---------------------------------------------------------------------------
 # 4) Patch LLM + embedding bindings inside the worker process.
 # ---------------------------------------------------------------------------
 from unittest.mock import patch  # noqa: E402
 from app.celery_app import celery_app  # noqa: E402
 from tests.e2e.fakes import (  # noqa: E402
    clickup_module as _fake_clickup_module,
    confluence_indexer as _fake_confluence_indexer,
    confluence_oauth as _fake_confluence_oauth,
    dropbox_api as _fake_dropbox_api,
    embeddings as _fake_embeddings,
    jira_module as _fake_jira_module,
    linear_module as _fake_linear_module,
    mcp_oauth_runtime as _fake_mcp_oauth_runtime,
    mcp_runtime as _fake_mcp_runtime,
    native_google as _fake_native_google,
    notion_module as _fake_notion_module,
    onedrive_graph as _fake_onedrive_graph,
    slack_module as _fake_slack_module,
 )
 from tests.e2e.fakes.chat_llm import (  # noqa: E402
    fake_create_chat_litellm_from_agent_config,
    fake_create_chat_litellm_from_config,
 )
 from tests.e2e.fakes.llm import fake_get_user_long_context_llm  # noqa: E402
 # Patches started during bootstrap are kept alive for the lifetime of the
 # process. We never call .stop() on them.
 _active_patches: list = []
 def _hijack_external_sdks() -> None:
    """Replace composio + notion_client in sys.modules.
    Production does ``from composio import Composio`` and
    ``import notion_client`` at import time. With this hijack in place,
    those imports resolve to our strict fakes.
    MUST run before _import_celery_app().
    """
    import tests.e2e.fakes.composio_module as _fake_composio
    import tests.e2e.fakes.notion_module as _fake_notion
    sys.modules["composio"] = _fake_composio
    sys.modules["notion_client"] = _fake_notion
    sys.modules["notion_client.errors"] = _fake_notion.errors
 def _load_dotenv_and_set_env_defaults() -> None:
    """Load .env and set every env var the production config reads on import.
    MUST run before _import_celery_app(), since app.config consumes
    these values at import time.
    """
    from dotenv import load_dotenv
    load_dotenv()
    os.environ.setdefault(
        "DATABASE_URL",
        "postgresql+asyncpg://postgres:postgres@localhost:5432/surfsense",
    )
    os.environ.setdefault("CELERY_BROKER_URL", "redis://localhost:6379/0")
    os.environ.setdefault("CELERY_RESULT_BACKEND", "redis://localhost:6379/0")
    os.environ.setdefault("REDIS_APP_URL", "redis://localhost:6379/0")
    os.environ.setdefault("CELERY_TASK_DEFAULT_QUEUE", "surfsense")
    os.environ.setdefault("SECRET_KEY", "local-e2e-secret-not-for-production")
    os.environ.setdefault("AUTH_TYPE", "LOCAL")
    os.environ.setdefault("REGISTRATION_ENABLED", "TRUE")
    os.environ.setdefault("ETL_SERVICE", "DOCLING")
    os.environ.setdefault("EMBEDDING_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
    os.environ.setdefault("NEXT_FRONTEND_URL", "http://localhost:3000")
    # Sentinel keys — fakes never read them; turns leaked real calls into 401s.
    os.environ.setdefault("COMPOSIO_API_KEY", "local-deny-real-call-sentinel")
    os.environ.setdefault("COMPOSIO_ENABLED", "TRUE")
    os.environ.setdefault("OPENAI_API_KEY", "local-deny-real-call-sentinel")
    os.environ.setdefault("ANTHROPIC_API_KEY", "local-deny-real-call-sentinel")
    os.environ.setdefault("LITELLM_API_KEY", "local-deny-real-call-sentinel")
    os.environ.setdefault("ATLASSIAN_CLIENT_ID", "fake-atlassian-client-id")
    os.environ.setdefault("ATLASSIAN_CLIENT_SECRET", "fake-atlassian-client-secret")
    os.environ.setdefault(
        "CONFLUENCE_REDIRECT_URI",
        "http://localhost:8000/api/v1/auth/confluence/connector/callback",
    )
    os.environ.setdefault("NOTION_CLIENT_ID", "fake-notion-client-id")
    os.environ.setdefault("NOTION_CLIENT_SECRET", "fake-notion-client-secret")
    os.environ.setdefault(
        "NOTION_REDIRECT_URI",
        "http://localhost:8000/api/v1/auth/notion/connector/callback",
    )
    os.environ.setdefault("MICROSOFT_CLIENT_ID", "fake-microsoft-client-id")
    os.environ.setdefault("MICROSOFT_CLIENT_SECRET", "fake-microsoft-client-secret")
    os.environ.setdefault(
        "ONEDRIVE_REDIRECT_URI",
        "http://localhost:8000/api/v1/auth/onedrive/connector/callback",
    )
    os.environ.setdefault("DROPBOX_APP_KEY", "fake-dropbox-app-key")
    os.environ.setdefault("DROPBOX_APP_SECRET", "fake-dropbox-app-secret")
    os.environ.setdefault(
        "DROPBOX_REDIRECT_URI",
        "http://localhost:8000/api/v1/auth/dropbox/connector/callback",
    )
    # Native Google OAuth — fake Flow in tests.e2e.fakes.native_google raises
    # "Fake Google Flow requires redirect_uri." when these are empty.
    os.environ.setdefault(
        "GOOGLE_DRIVE_REDIRECT_URI",
        "http://localhost:8000/api/v1/auth/google/drive/connector/callback",
    )
    os.environ.setdefault(
        "GOOGLE_GMAIL_REDIRECT_URI",
        "http://localhost:8000/api/v1/auth/google/gmail/connector/callback",
    )
    os.environ.setdefault(
        "GOOGLE_CALENDAR_REDIRECT_URI",
        "http://localhost:8000/api/v1/auth/google/calendar/connector/callback",
    )
    os.environ["SLACK_CLIENT_ID"] = "fake-slack-mcp-client-id"
    os.environ["SLACK_CLIENT_SECRET"] = "fake-slack-mcp-client-secret"
 def _install_synthetic_global_llm_config() -> None:
    """Materialise a fake ``app/config/global_llm_config.yaml`` for E2E.
    The real file is gitignored (production operators ship their own with
    real API keys), so a fresh CI checkout has no YAML at the path
    ``app.config.load_global_llm_configs()`` reads. With an empty
    ``GLOBAL_LLM_CONFIGS`` list, the worker's view of the config diverges
    from the API container.
    We copy the synthetic fixture from ``tests/e2e/fixtures/`` into the
    production-expected location BEFORE _import_celery_app() so
    ``app.config`` picks it up on import. Install-only-if-missing so a
    developer's local config (with real API keys) is preserved.
    MUST run before _import_celery_app().
    """
    import shutil
    src = os.path.join(_THIS_DIR, "fixtures", "global_llm_config.yaml")
    dst = os.path.join(_BACKEND_ROOT, "app", "config", "global_llm_config.yaml")
    if not os.path.exists(src):
        raise RuntimeError(
            f"E2E synthetic global LLM config fixture missing at {src!r}. "
            f"Restore tests/e2e/fixtures/global_llm_config.yaml from VCS."
        )
    if os.path.exists(dst):
        logger.info(
            "[e2e-global-llm-config] %s already exists; leaving it alone "
            "(local dev config preserved)",
            dst,
        )
        return
    os.makedirs(os.path.dirname(dst), exist_ok=True)
    shutil.copyfile(src, dst)
    logger.info("[e2e-global-llm-config] installed %s -> %s", src, dst)
 def _import_celery_app():
    """Import and return the production Celery app.
    Every module under ``app.*`` (including all task modules) loads here,
    creating their bindings. The LLM/embedding factories captured at this
    point will be replaced by patches in _patch_llm_bindings() below.
    """
    from app.celery_app import celery_app
    return celery_app
 def _patch_llm_bindings() -> None:
    """Replace LLM factories at every known binding site in worker tasks."""
    from unittest.mock import patch
    from tests.e2e.fakes.chat_llm import (
        fake_create_chat_litellm_from_agent_config,
        fake_create_chat_litellm_from_config,
    )
    from tests.e2e.fakes.llm import fake_get_user_long_context_llm
    targets = [
        "app.services.llm_service.get_user_long_context_llm",
        "app.tasks.connector_indexers.confluence_indexer.get_user_long_context_llm",
@ -172,38 +242,93 @@ def _patch_llm_bindings() -> None:
            )
-_patch_llm_bindings()
+def _install_runtime_fakes() -> None:
-_fake_embeddings.install(_active_patches)
+    """Run each fake's install() against the active patch stack."""
-_fake_confluence_oauth.install(_active_patches)
+    from tests.e2e.fakes import (
-_fake_confluence_indexer.install(_active_patches)
+        clickup_module as _fake_clickup_module,
-_fake_native_google.install(_active_patches)
+        confluence_indexer as _fake_confluence_indexer,
-_fake_onedrive_graph.install(_active_patches)
+        confluence_oauth as _fake_confluence_oauth,
-_fake_dropbox_api.install(_active_patches)
+        docling_service as _fake_docling_service,
-_fake_notion_module.install(_active_patches)
+        dropbox_api as _fake_dropbox_api,
-_fake_linear_module.install(_active_patches)
+        embeddings as _fake_embeddings,
-_fake_jira_module.install(_active_patches)
+        jira_module as _fake_jira_module,
-_fake_clickup_module.install(_active_patches)
+        linear_module as _fake_linear_module,
-_fake_mcp_runtime.install(_active_patches)
+        mcp_oauth_runtime as _fake_mcp_oauth_runtime,
-_fake_mcp_oauth_runtime.install(_active_patches)
+        mcp_runtime as _fake_mcp_runtime,
-_fake_slack_module.install(_active_patches)
+        native_google as _fake_native_google,
        notion_module as _fake_notion_module,
        onedrive_graph as _fake_onedrive_graph,
        slack_module as _fake_slack_module,
    )
    _fake_embeddings.install(_active_patches)
    _fake_docling_service.install(_active_patches)
    _fake_confluence_oauth.install(_active_patches)
    _fake_confluence_indexer.install(_active_patches)
    _fake_native_google.install(_active_patches)
    _fake_onedrive_graph.install(_active_patches)
    _fake_dropbox_api.install(_active_patches)
    _fake_notion_module.install(_active_patches)
    _fake_linear_module.install(_active_patches)
    _fake_jira_module.install(_active_patches)
    _fake_clickup_module.install(_active_patches)
    _fake_mcp_runtime.install(_active_patches)
    _fake_mcp_oauth_runtime.install(_active_patches)
    _fake_slack_module.install(_active_patches)
-# ---------------------------------------------------------------------------
+def _bootstrap():
-# 5) Start the worker.
+    """Run the full E2E bootstrap and return the production Celery app.
-# ---------------------------------------------------------------------------
+
    Ordering is load-bearing:
      1) Hijack composio + notion_client in sys.modules.
      2) Load .env + set env defaults (app.config reads env on import).
      3) Configure logging.
      4) Materialise the synthetic global_llm_config.yaml so the worker's
         view of GLOBAL_LLM_CONFIGS matches the API container.
      5) Import production celery_app (which transitively imports the
         now-faked external SDKs and reads the env defaults + YAML).
      6) Patch LLM / embedding bindings at every consumer site.
      7) Install runtime fakes for connectors and chat backends.
    """
    _hijack_external_sdks()
    _load_dotenv_and_set_env_defaults()
    logging.basicConfig(
        level=logging.INFO,
        format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
        datefmt="%Y-%m-%d %H:%M:%S",
    )
    logger.warning(
        "*** SURFSENSE E2E CELERY WORKER — fake Composio + LLM + embeddings ***"
    )
    _install_synthetic_global_llm_config()
    celery_app = _import_celery_app()
    _patch_llm_bindings()
    _install_runtime_fakes()
    return celery_app
 celery_app = _bootstrap()
 def _main() -> None:
    # Default queues mirror production (default queue + connectors queue
    # so Drive indexing tasks are picked up).
    queue_name = os.getenv("CELERY_TASK_DEFAULT_QUEUE", "surfsense")
    queues = f"{queue_name},{queue_name}.connectors"
    # macOS forks-after-MPS-init crash prefork workers; threads avoid it.
    default_pool = "threads" if sys.platform == "darwin" else "prefork"
    pool = os.getenv("CELERY_POOL", default_pool)
    concurrency = os.getenv("CELERY_CONCURRENCY", "2")
    celery_app.worker_main(
        argv=[
            "worker",
            "--loglevel=info",
            f"--queues={queues}",
-            "--concurrency=2",
+            f"--pool={pool}",
            f"--concurrency={concurrency}",
            "--without-gossip",
            "--without-mingle",
        ]
--- a/surfsense_desktop/package.json
+++ b/surfsense_desktop/package.json
@ -21,7 +21,7 @@
    "email": "rohan@surfsense.com"
  },
  "license": "MIT",
-  "packageManager": "pnpm@10.24.0",
+  "packageManager": "pnpm@10.26.0",
  "devDependencies": {
    "@electron/rebuild": "^4.0.3",
    "@types/node": "^25.5.0",
--- a/surfsense_web/.gitignore
+++ b/surfsense_web/.gitignore
@ -12,6 +12,10 @@
 # testing
 /coverage
 /playwright/.auth/
 /playwright-report/
 /test-results/
 /blob-report/
 # next.js
 /.next/
@ -48,5 +52,4 @@ next-env.d.ts
 # source
 /.source/
-.pnpm-store/
+.pnpm-store/
--- a/surfsense_web/Dockerfile
+++ b/surfsense_web/Dockerfile
@ -12,7 +12,7 @@ WORKDIR /app
 RUN corepack enable pnpm
 # Copy package files
-COPY package.json pnpm-lock.yaml* .npmrc* ./
+COPY package.json pnpm-lock.yaml* pnpm-workspace.yaml* .npmrc* ./
 # First copy the config file and content to avoid fumadocs-mdx postinstall error
 COPY source.config.ts ./
--- a/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx
@ -208,7 +208,10 @@ const MentionedDocumentInfoSchema = z.object({
 	id: z.number(),
 	title: z.string(),
 	document_type: z.string(),
-	kind: z.union([z.literal("doc"), z.literal("folder")]).optional().default("doc"),
+	kind: z
 		.union([z.literal("doc"), z.literal("folder")])
 		.optional()
 		.default("doc"),
 });
 const MentionedDocumentsPartSchema = z.object({
@ -1029,9 +1032,7 @@ export default function NewChatPage() {
 							mentioned_surfsense_doc_ids: hasSurfsenseDocIds
 								? mentionedDocumentIds.surfsense_doc_ids
 								: undefined,
-							mentioned_folder_ids: hasFolderIds
+							mentioned_folder_ids: hasFolderIds ? mentionedDocumentIds.folder_ids : undefined,
 								? mentionedDocumentIds.folder_ids
 								: undefined,
 							// Full mention metadata (docs + folders, with
 							// ``kind`` discriminator) so the BE can embed a
 							// ``mentioned-documents`` ContentPart on the
@ -1900,12 +1901,10 @@ export default function NewChatPage() {
 					filesystem_mode: selection.filesystem_mode,
 					client_platform: selection.client_platform,
 					local_filesystem_mounts: selection.local_filesystem_mounts,
-					mentioned_document_ids:
+					mentioned_document_ids: regenerateDocIds.length > 0 ? regenerateDocIds : undefined,
 						regenerateDocIds.length > 0 ? regenerateDocIds : undefined,
 					mentioned_surfsense_doc_ids:
 						regenerateSurfsenseDocIds.length > 0 ? regenerateSurfsenseDocIds : undefined,
-					mentioned_folder_ids:
+					mentioned_folder_ids: regenerateFolderIds.length > 0 ? regenerateFolderIds : undefined,
 						regenerateFolderIds.length > 0 ? regenerateFolderIds : undefined,
 					// Full mention metadata for the regenerate-specific
 					// source list. Only meaningful for edit (the BE only
 					// re-persists a user row when ``user_query`` is set);
--- a/surfsense_web/atoms/chat/mentioned-documents.atom.ts
+++ b/surfsense_web/atoms/chat/mentioned-documents.atom.ts
@ -97,9 +97,7 @@ export const mentionedDocumentIdsAtom = atom((get) => {
 		surfsense_doc_ids: docs
 			.filter((doc) => doc.document_type === "SURFSENSE_DOCS")
 			.map((doc) => doc.id),
-		document_ids: docs
+		document_ids: docs.filter((doc) => doc.document_type !== "SURFSENSE_DOCS").map((doc) => doc.id),
 			.filter((doc) => doc.document_type !== "SURFSENSE_DOCS")
 			.map((doc) => doc.id),
 		folder_ids: folders.map((f) => f.id),
 	};
 });
--- a/surfsense_web/biome.json
+++ b/surfsense_web/biome.json
@ -7,7 +7,19 @@
 	},
 	"files": {
 		"ignoreUnknown": true,
-		"includes": ["**", "!!node_modules", "!!.git", "!!.next", "!!dist", "!!build", "!!coverage"],
+		"includes": [
 			"**",
 			"!!node_modules",
 			"!!.git",
 			"!!.next",
 			"!!dist",
 			"!!build",
 			"!!coverage",
 			"!!test-results",
 			"!!playwright-report",
 			"!!blob-report",
 			"!!playwright/.auth"
 		],
 		"maxSize": 1048576
 	},
 	"formatter": {
--- a/surfsense_web/components/assistant-ui/inline-mention-editor.tsx
+++ b/surfsense_web/components/assistant-ui/inline-mention-editor.tsx
@ -47,10 +47,7 @@ export interface InlineMentionEditorRef {
 	setText: (text: string) => void;
 	getText: () => string;
 	getMentionedDocuments: () => MentionedDocument[];
-	insertMentionChip: (
+	insertMentionChip: (mention: MentionChipInput, options?: { removeTriggerText?: boolean }) => void;
 		mention: MentionChipInput,
 		options?: { removeTriggerText?: boolean }
 	) => void;
 	/**
 	 * @deprecated Use ``insertMentionChip``. Kept for one transition
 	 * cycle so we don't break ad-hoc callers; prefer the new name.
@ -364,8 +361,7 @@ export const InlineMentionEditor = forwardRef<InlineMentionEditorRef, InlineMent
 				const selection = editor.selection;
 				const kind: MentionKind = mention.kind ?? "doc";
 				const document_type =
-					mention.document_type ??
+					mention.document_type ?? (kind === "folder" ? FOLDER_MENTION_DOCUMENT_TYPE : undefined);
 					(kind === "folder" ? FOLDER_MENTION_DOCUMENT_TYPE : undefined);
 				const mentionNode: MentionElementNode = {
 					type: MENTION_TYPE,
 					id: mention.id,
--- a/surfsense_web/components/assistant-ui/markdown-text.tsx
+++ b/surfsense_web/components/assistant-ui/markdown-text.tsx
@ -33,8 +33,8 @@ import {
 } from "@/components/ui/table";
 import { useElectronAPI } from "@/hooks/use-platform";
 import { documentsApiService } from "@/lib/apis/documents-api.service";
 import { type CitationUrlMap, preprocessCitationMarkdown } from "@/lib/citations/citation-parser";
 import { getVirtualPathDisplay } from "@/lib/chat/virtual-path-display";
 import { type CitationUrlMap, preprocessCitationMarkdown } from "@/lib/citations/citation-parser";
 import { cn } from "@/lib/utils";
 function MarkdownCodeBlockSkeleton() {
@ -222,11 +222,7 @@ function FilePathLink({ path, className }: { path: string; className?: string })
 		: undefined;
 	const { displayName, isFolder } = getVirtualPathDisplay(path);
-	const icon = isFolder ? (
+	const icon = isFolder ? <FolderIcon className="size-3.5" /> : <FileIcon className="size-3.5" />;
 		<FolderIcon className="size-3.5" />
 	) : (
 		<FileIcon className="size-3.5" />
 	);
 	const handleClick = useCallback(
 		(event: React.MouseEvent<HTMLButtonElement>) => {
--- a/surfsense_web/components/assistant-ui/user-message.tsx
+++ b/surfsense_web/components/assistant-ui/user-message.tsx
@ -111,11 +111,7 @@ const UserTextPart: FC = () => {
 						icon={icon}
 						label={segment.doc.title}
 						tooltip={isFolder ? `Folder: ${segment.doc.title}` : segment.doc.title}
-						onClick={
+						onClick={isFolder ? undefined : () => handleOpenDoc(segment.doc.id, segment.doc.title)}
 							isFolder
 								? undefined
 								: () => handleOpenDoc(segment.doc.id, segment.doc.title)
 						}
 						className="mx-0.5"
 					/>
 				);
--- a/surfsense_web/components/editor/plate-editor.tsx
+++ b/surfsense_web/components/editor/plate-editor.tsx
@ -170,16 +170,10 @@ export function PlateEditor({
 			: markdown
 				? (editor) => {
 						if (!enableCitations) {
-							return safeDeserializeMarkdown(
+							return safeDeserializeMarkdown(editor, escapeMdxExpressions(markdown)) as Value;
 								editor,
 								escapeMdxExpressions(markdown)
 							) as Value;
 						}
 						const { content: rewritten, urlMap } = preprocessCitationMarkdown(markdown);
-						const value = safeDeserializeMarkdown(
+						const value = safeDeserializeMarkdown(editor, escapeMdxExpressions(rewritten));
 							editor,
 							escapeMdxExpressions(rewritten)
 						);
 						return injectCitationNodes(value, urlMap) as Value;
 					}
 				: undefined,
@ -203,10 +197,7 @@ export function PlateEditor({
 			let newValue: Descendant[];
 			if (enableCitations) {
 				const { content: rewritten, urlMap } = preprocessCitationMarkdown(markdown);
-				const deserialized = safeDeserializeMarkdown(
+				const deserialized = safeDeserializeMarkdown(editor, escapeMdxExpressions(rewritten));
 					editor,
 					escapeMdxExpressions(rewritten)
 				);
 				newValue = injectCitationNodes(deserialized, urlMap);
 			} else {
 				newValue = safeDeserializeMarkdown(editor, escapeMdxExpressions(markdown));
--- a/surfsense_web/components/editor/utils/safe-deserialize.ts
+++ b/surfsense_web/components/editor/utils/safe-deserialize.ts
@ -49,10 +49,7 @@ export function safeDeserializeMarkdown(
 		return api.deserialize(markdown, { remarkPlugins: STRICT_PLUGINS }) as Descendant[];
 	} catch (mdxError) {
 		if (process.env.NODE_ENV !== "production") {
-			console.warn(
+			console.warn("[plate-editor] MDX parse failed, retrying without remark-mdx:", mdxError);
 				"[plate-editor] MDX parse failed, retrying without remark-mdx:",
 				mdxError
 			);
 		}
 		try {
 			return api.deserialize(markdown, { remarkPlugins: LENIENT_PLUGINS }) as Descendant[];
--- a/surfsense_web/components/layout/ui/sidebar/DocumentsSidebar.tsx
+++ b/surfsense_web/components/layout/ui/sidebar/DocumentsSidebar.tsx
@ -24,10 +24,7 @@ import type React from "react";
 import { useCallback, useEffect, useMemo, useRef, useState } from "react";
 import { toast } from "sonner";
 import { agentFlagsAtom } from "@/atoms/agent/agent-flags-query.atom";
-import {
+import { makeFolderMention, mentionedDocumentsAtom } from "@/atoms/chat/mentioned-documents.atom";
 	makeFolderMention,
 	mentionedDocumentsAtom,
 } from "@/atoms/chat/mentioned-documents.atom";
 import { connectorDialogOpenAtom } from "@/atoms/connector-dialog/connector-dialog.atoms";
 import { connectorsAtom } from "@/atoms/connectors/connector-query.atoms";
 import { deleteDocumentMutationAtom } from "@/atoms/documents/document-mutation.atoms";
--- a/surfsense_web/components/new-chat/document-mention-picker.tsx
+++ b/surfsense_web/components/new-chat/document-mention-picker.tsx
@ -301,8 +301,7 @@ export const DocumentMentionPicker = forwardRef<
 	// folder entries lift the existing kind-aware key so the same
 	// matchers used by the chip atom apply unchanged.
 	const selectedKeys = useMemo(
-		() =>
+		() => new Set(initialSelectedDocuments.map((d) => getMentionDocKey(d))),
 			new Set(initialSelectedDocuments.map((d) => getMentionDocKey(d))),
 		[initialSelectedDocuments]
 	);
@ -583,9 +582,7 @@ export const DocumentMentionPicker = forwardRef<
 								{(surfsenseDocsList.length > 0 || userDocsList.length > 0) && (
 									<div className="mx-2 my-4 border-t border-border dark:border-white/5" />
 								)}
-								<div className="px-3 py-2 text-xs font-bold text-muted-foreground/55">
+								<div className="px-3 py-2 text-xs font-bold text-muted-foreground/55">Folders</div>
 									Folders
 								</div>
 								{folderMentions.map((folder) => {
 									const folderKey = getMentionDocKey(folder);
 									const isAlreadySelected = selectedKeys.has(folderKey);
--- a/surfsense_web/package.json
+++ b/surfsense_web/package.json
@ -2,6 +2,7 @@
 	"name": "surfsense_web",
 	"version": "0.0.23",
 	"private": true,
 	"packageManager": "pnpm@10.26.0",
 	"description": "SurfSense Frontend",
 	"scripts": {
 		"dev": "next dev --turbopack",
@ -20,6 +21,7 @@
 		"db:studio": "drizzle-kit studio",
 		"format:fix": "npx @biomejs/biome check --fix",
 		"test:e2e": "playwright test",
 		"test:e2e:prod": "cross-env CI=1 playwright test",
 		"test:e2e:ui": "playwright test --ui",
 		"test:e2e:headed": "playwright test --headed",
 		"test:e2e:debug": "playwright test --debug",
--- a/surfsense_web/playwright.config.ts
+++ b/surfsense_web/playwright.config.ts
@ -4,6 +4,11 @@ const PORT = process.env.PORT || "3000";
 const BACKEND_PORT = process.env.BACKEND_PORT || "8000";
 const baseURL = process.env.PLAYWRIGHT_BASE_URL || `http://localhost:${PORT}`;
 process.env.PLAYWRIGHT_TEST_EMAIL ??= "e2e-test@surfsense.net";
 process.env.PLAYWRIGHT_TEST_PASSWORD ??= "E2eTestPassword123!";
 process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL ??= `http://localhost:${BACKEND_PORT}`;
 process.env.NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE ??= "LOCAL";
 /**
 * Playwright configuration for SurfSense web E2E tests.
 *
@ -22,8 +27,8 @@ export default defineConfig({
 	expect: { timeout: 15_000 },
 	fullyParallel: true,
 	forbidOnly: !!process.env.CI,
-	retries: process.env.CI ? 2 : 0,
+	retries: process.env.CI ? 1 : 0,
-	workers: process.env.CI ? 1 : undefined,
+	workers: 1,
 	reporter: process.env.CI
 		? [["html", { open: "never" }], ["github"], ["list"]]
 		: [["html", { open: "on-failure" }], ["list"]],
@ -31,7 +36,7 @@ export default defineConfig({
 		baseURL,
 		trace: "on-first-retry",
 		screenshot: "only-on-failure",
-		video: "retain-on-failure",
+		video: process.env.CI ? "off" : "retain-on-failure",
 		extraHTTPHeaders: {
 			"x-playwright-test": "true",
 		},
@ -53,14 +58,16 @@ export default defineConfig({
 	webServer: process.env.PLAYWRIGHT_NO_WEB_SERVER
 		? undefined
 		: {
-				// Pin to webpack dev (Turbopack has caused stale-lock panics in E2E).
+				// Local stays on webpack dev (Turbopack caused stale-lock panics in E2E).
-				command: "pnpm exec next dev",
+				command: process.env.CI ? "pnpm build && pnpm start" : "pnpm exec next dev",
 				url: `http://localhost:${PORT}`,
 				reuseExistingServer: !process.env.CI,
-				timeout: 180_000,
+				timeout: process.env.CI ? 300_000 : 180_000,
 				stdout: "pipe",
 				stderr: "pipe",
 				env: {
-					NEXT_PUBLIC_FASTAPI_BACKEND_URL: `http://localhost:${BACKEND_PORT}`,
+					NEXT_PUBLIC_FASTAPI_BACKEND_URL: process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL,
-					NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE: "LOCAL",
+					NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE: process.env.NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE,
 				},
 			},
 });
--- a/surfsense_web/pnpm-workspace.yaml
+++ b/surfsense_web/pnpm-workspace.yaml
@ -0,0 +1,11 @@
 allowBuilds:
  "@parcel/watcher": true
  "@rocicorp/zero-sqlite3": true
  "@swc/core": true
  core-js: true
  esbuild: true
  protobufjs: true
  sharp: true
  unrs-resolver: true
 minimumReleaseAge: 10080
--- a/surfsense_web/tests/README.md
+++ b/surfsense_web/tests/README.md
@ -5,29 +5,6 @@ Celery + Postgres + Redis). Designed to scale from one connector
 (Composio Drive in Phase 1) to every connector + manual file upload
 without rewriting the harness.
 ## Layout
 ```
 tests/
 ├── auth.setup.ts                    # one-time login, persists localStorage
 ├── smoke/                           # tracer-bullet tests (dashboard renders)
 ├── connectors/
 │   └── composio/
 │       └── drive/                   # Composio Google Drive — Phase 1
 │           └── journey.spec.ts      # connect -> select -> index -> canary assertion
 ├── fixtures/                        # test.extend() fixtures
 │   ├── index.ts                     # named `test` exports per spec category
 │   ├── search-space.fixture.ts      # apiToken + per-test search space
 │   └── connectors/
 │       └── composio-drive.fixture.ts
 ├── helpers/                         # reusable building blocks
 │   ├── api/                         # backend HTTP helpers
 │   ├── ui/                          # page-object selectors
 │   ├── waits/                       # deterministic polling
 │   └── canary.ts                    # canary tokens + fixed Drive file ids
 └── README.md                        # this file
 ```
 ## How the deterministic harness works
 There are **three layers of defense** against accidental real-world
@ -47,26 +24,90 @@ calls. None of them touch production code.
 ## Running locally
 The recommended flow runs only Postgres and Redis in Docker, and the backend
 + Celery worker on the host. The E2E entrypoints `setdefault` every backend
 variable they need, so no `.env` file is required on a fresh checkout.
 ### One-time setup
 From `surfsense_web/`:
 ```bash
-# 1. Bring up Postgres + Redis (Docker compose, supabase, whatever you use)
+pnpm install
-docker compose up -d postgres redis
+pnpm exec playwright install --with-deps chromium
 # 2. Backend with E2E entrypoint (note: NOT `uv run main.py`)
 cd surfsense_backend
 uv run alembic upgrade head
 uv run python tests/e2e/run_backend.py &
 # 3. Celery worker with the same entrypoint pattern
 uv run python tests/e2e/run_celery.py &
 # 4. Run Playwright tests (auto-starts `pnpm dev` via webServer config)
 cd ../surfsense_web
 pnpm test:e2e
 ```
-For CI behavior in one go: `pnpm test:e2e:headless`.
+### Each run
-To debug the Drive journey: `pnpm test:e2e -- connectors/composio/drive/journey.spec.ts --headed`.
+**1. Bring up Postgres + Redis** from the repo root:
 ```bash
 docker compose -f docker/docker-compose.deps-only.yml up -d db redis
 ```
 **2. Start the backend** in `surfsense_backend/`, terminal A:
 ```bash
 uv sync
 uv run alembic upgrade head
 uv run python tests/e2e/run_backend.py
 ```
 **3. Start the Celery worker** in `surfsense_backend/`, terminal B:
 ```bash
 uv run python tests/e2e/run_celery.py
 ```
 **4. Register the Playwright user**:
 ```bash
 curl -X POST http://localhost:8000/auth/register \
  -H "Content-Type: application/json" \
  -d '{"email":"e2e-test@surfsense.net","password":"E2eTestPassword123!"}'
 ```
 **5. Run Playwright** from `surfsense_web/`, terminal C:
 ```bash
 pnpm test:e2e             # dev server (fast iteration)
 pnpm test:e2e:headed      # show the browser
 pnpm test:e2e:ui          # Playwright UI mode
 pnpm test:e2e:debug       # Playwright Inspector
 pnpm test:e2e:prod        # build + start (matches CI exactly)
 pnpm test:e2e:report      # open the last HTML report
 ```
 `playwright.config.ts` and the backend run scripts share defaults, so the
 above works without exporting any env vars. Override
 `PLAYWRIGHT_TEST_EMAIL`, `PLAYWRIGHT_TEST_PASSWORD`, or
 `NEXT_PUBLIC_FASTAPI_BACKEND_URL` only when pointing tests at a different
 stack.
 To debug a single journey:
 ```bash
 pnpm test:e2e:headed connectors/composio/drive/journey.spec.ts
 ```
 ### Hermetic alternative (matches CI)
 To reproduce the CI environment exactly: backend and Celery in containers
 with L3 egress denied, replace steps 1–3 with:
 ```bash
 docker compose -f docker/docker-compose.e2e.yml up -d --build --wait
 ```
 Then run steps 4 (curl register) and 5 (`pnpm test:e2e:prod`) as above. Tear
 down with:
 ```bash
 docker compose -f docker/docker-compose.e2e.yml down -v --remove-orphans
 ```
 This builds the ~9 GB e2e backend image, so the deps-only flow is faster for
 day-to-day work.
 ## Adding a new connector
--- a/surfsense_web/tests/auth.setup.ts
+++ b/surfsense_web/tests/auth.setup.ts
@ -1,47 +1,21 @@
 import path from "node:path";
 import { expect, test as setup } from "@playwright/test";
 import { acquireTestToken } from "./helpers/api/auth";
 /**
- * One-time authentication setup. Logs in via the FastAPI backend directly
+ * One-time authentication setup. Acquires a bearer token for the seeded
- * (skipping the UI) and persists the resulting localStorage token so every
+ * e2e user (rate-limit-free /__e2e__/auth/token first, /auth/jwt/login
- * test in the chromium project starts already authenticated.
+ * fallback) and persists it via localStorage so every test in the
- *
+ * chromium project starts already authenticated.
 * Mirrors the real auth flow in `lib/apis/auth-api.service.ts`:
 *   POST /auth/jwt/login  ->  { access_token }
 *   localStorage.setItem("surfsense_bearer_token", access_token)
 *
 * Requires a seeded test user in the dev/test DB. Configure via env:
 *   PLAYWRIGHT_TEST_EMAIL, PLAYWRIGHT_TEST_PASSWORD
 *   NEXT_PUBLIC_FASTAPI_BACKEND_URL  (defaults to http://localhost:8000)
 */
 const authFile = path.join(__dirname, "..", "playwright", ".auth", "user.json");
 const TEST_USER_EMAIL = process.env.PLAYWRIGHT_TEST_EMAIL || "test@surfsense.net";
 const TEST_USER_PASSWORD = process.env.PLAYWRIGHT_TEST_PASSWORD || "TestPassword123!";
 const BACKEND_URL = process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL || "http://localhost:8000";
 const STORAGE_KEY = "surfsense_bearer_token";
 setup("authenticate", async ({ page, request }) => {
-	const response = await request.post(`${BACKEND_URL}/auth/jwt/login`, {
+	const access_token = await acquireTestToken(request);
-		form: {
+	expect(access_token, "Failed to acquire e2e bearer token").toBeTruthy();
 			username: TEST_USER_EMAIL,
 			password: TEST_USER_PASSWORD,
 			grant_type: "password",
 		},
 		headers: { "Content-Type": "application/x-www-form-urlencoded" },
 	});
 	expect(
 		response.ok(),
 		`Login to ${BACKEND_URL}/auth/jwt/login failed (${response.status()}). ` +
 			`Check that the backend is running and that PLAYWRIGHT_TEST_EMAIL ` +
 			`(${TEST_USER_EMAIL}) is seeded with PLAYWRIGHT_TEST_PASSWORD. ` +
 			`Body: ${await response.text()}`
 	).toBeTruthy();
 	const { access_token } = (await response.json()) as { access_token: string };
 	expect(access_token, "Backend response missing access_token").toBeTruthy();
 	await page.addInitScript(
 		({ key, token }) => {
--- a/surfsense_web/tests/documents/file-upload/journey.spec.ts
+++ b/surfsense_web/tests/documents/file-upload/journey.spec.ts
@ -107,14 +107,14 @@ test.describe("Manual file upload journey", () => {
 		});
 	});
-	test("user uploads a PDF (DOCUMENT branch via real Docling)", async ({
+	test("user uploads a PDF (DOCUMENT branch)", async ({
 		page,
 		request,
 		apiToken,
 		searchSpace,
 		chatThread,
 	}) => {
-		test.setTimeout(240_000); // Docling cold-start can take 30-60s on first invocation.
+		test.setTimeout(180_000);
 		await uploadAndAssert({
 			page,
--- a/surfsense_web/tests/fixtures/search-space.fixture.ts
+++ b/surfsense_web/tests/fixtures/search-space.fixture.ts
@ -1,5 +1,7 @@
 import fs from "node:fs";
 import path from "node:path";
 import { test as base } from "@playwright/test";
-import { loginAsTestUser } from "../helpers/api/auth";
+import { acquireTestToken } from "../helpers/api/auth";
 import {
 	createSearchSpace,
 	deleteSearchSpace,
@ -20,12 +22,45 @@ export type SearchSpaceFixtures = {
 	searchSpace: SearchSpaceRow;
 };
 const STORAGE_KEY = "surfsense_bearer_token";
 // Reuse the token written by tests/auth.setup.ts; on cache miss we
 // mint a fresh one via /__e2e__/auth/token (rate-limit-free).
 const AUTH_STATE_PATH = path.join(__dirname, "..", "..", "playwright", ".auth", "user.json");
 function loadCachedBearerToken(): string | null {
 	try {
 		const raw = fs.readFileSync(AUTH_STATE_PATH, "utf8");
 		const parsed = JSON.parse(raw) as {
 			origins?: Array<{
 				origin?: string;
 				localStorage?: Array<{ name?: string; value?: string }>;
 			}>;
 		};
 		for (const origin of parsed.origins ?? []) {
 			for (const entry of origin.localStorage ?? []) {
 				if (entry.name === STORAGE_KEY && entry.value) {
 					return entry.value;
 				}
 			}
 		}
 	} catch {
 		// Fall back to a fresh login.
 	}
 	return null;
 }
 export const searchSpaceFixtures = base.extend<SearchSpaceFixtures, { apiTokenWorker: string }>({
 	apiTokenWorker: [
 		async ({ playwright }, use) => {
 			const cached = loadCachedBearerToken();
 			if (cached) {
 				await use(cached);
 				return;
 			}
 			const ctx = await playwright.request.newContext();
 			try {
-				const token = await loginAsTestUser(ctx);
+				const token = await acquireTestToken(ctx);
 				await use(token);
 			} finally {
 				await ctx.dispose();
--- a/surfsense_web/tests/helpers/api/auth.ts
+++ b/surfsense_web/tests/helpers/api/auth.ts
@ -11,8 +11,39 @@ import type { APIRequestContext } from "@playwright/test";
 export const BACKEND_URL = process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL || "http://localhost:8000";
-const TEST_USER_EMAIL = process.env.PLAYWRIGHT_TEST_EMAIL || "test@surfsense.net";
+const TEST_USER_EMAIL = process.env.PLAYWRIGHT_TEST_EMAIL || "e2e-test@surfsense.net";
-const TEST_USER_PASSWORD = process.env.PLAYWRIGHT_TEST_PASSWORD || "TestPassword123!";
+const TEST_USER_PASSWORD = process.env.PLAYWRIGHT_TEST_PASSWORD || "E2eTestPassword123!";
 const E2E_MINT_SECRET = process.env.E2E_MINT_SECRET || "local-e2e-mint-secret-not-for-production";
 /**
 * Mints a JWT for the seeded e2e user via the test-only endpoint mounted
 * by surfsense_backend/tests/e2e/run_backend.py. Bypasses the production
 * /auth/jwt/login rate limit (5/min/IP), so it's safe to call from any
 * worker / retry. Returns 404 from the backend when the endpoint isn't
 * mounted (i.e. someone is pointing the suite at a non-e2e backend).
 */
 export async function mintTestToken(
 	request: APIRequestContext,
 	email: string = TEST_USER_EMAIL
 ): Promise<string> {
 	const response = await request.post(`${BACKEND_URL}/__e2e__/auth/token`, {
 		data: { email },
 		headers: {
 			"Content-Type": "application/json",
 			"X-E2E-Mint-Secret": E2E_MINT_SECRET,
 		},
 	});
 	if (!response.ok()) {
 		throw new Error(
 			`Mint token at ${BACKEND_URL}/__e2e__/auth/token failed (${response.status()}): ${await response.text()}`
 		);
 	}
 	const { access_token } = (await response.json()) as { access_token: string };
 	if (!access_token) {
 		throw new Error("Mint response missing access_token");
 	}
 	return access_token;
 }
 export async function loginAsTestUser(request: APIRequestContext): Promise<string> {
 	const response = await request.post(`${BACKEND_URL}/auth/jwt/login`, {
@ -37,6 +68,23 @@ export async function loginAsTestUser(request: APIRequestContext): Promise<strin
 	return access_token;
 }
 /**
 * Get a bearer token by trying the rate-limit-free mint endpoint first
 * and falling back to /auth/jwt/login if the e2e endpoint isn't mounted
 * (e.g. running against a non-e2e backend in local dev).
 */
 export async function acquireTestToken(request: APIRequestContext): Promise<string> {
 	try {
 		return await mintTestToken(request);
 	} catch (err) {
 		const msg = err instanceof Error ? err.message : String(err);
 		if (msg.includes("(404)") || msg.includes("(405)")) {
 			return loginAsTestUser(request);
 		}
 		throw err;
 	}
 }
 /**
 * Standard auth headers for backend API calls. Optionally injects an
 * X-E2E-Scenario header that the test-only ScenarioMiddleware in