chore: implement E2E testing setup with Docker Compose and update workflow for backend and Redis services

2026-05-17 18:35:19 +02:00 · 2026-05-11 03:09:01 +05:30 · 2026-05-11 03:09:01 +05:30 · 68f45335bc
commit 68f45335bc
parent 2c8828f60c
9 changed files with 433 additions and 233 deletions
--- a/.github/workflows/e2e-tests.yml
+++ b/.github/workflows/e2e-tests.yml
@ -7,6 +7,7 @@ on:
    paths:
      - 'surfsense_web/**'
      - 'surfsense_backend/**'
+      - 'docker/docker-compose.e2e.yml'
      - '.github/workflows/e2e-tests.yml'
  workflow_dispatch:

@ -19,173 +20,36 @@ jobs:
    name: Journey
    runs-on: ubuntu-latest
    if: github.event.pull_request.draft == false
-    timeout-minutes: 45
-
-    # Postgres runs as a step (not a service)
-    services:
-      redis:
-        image: redis:8-alpine
-        ports:
-          - 6379:6379
-        options: >-
-          --health-cmd "redis-cli ping"
-          --health-interval 10s
-          --health-timeout 5s
-          --health-retries 5
+    timeout-minutes: 30

    env:
-      DATABASE_URL: postgresql+asyncpg://postgres:postgres@localhost:5432/surfsense_e2e
-      CELERY_BROKER_URL: redis://localhost:6379/0
-      CELERY_RESULT_BACKEND: redis://localhost:6379/0
-      REDIS_APP_URL: redis://localhost:6379/0
-      SECRET_KEY: ci-test-secret-key-not-for-production
-      AUTH_TYPE: LOCAL
-      REGISTRATION_ENABLED: "TRUE"
-      ETL_SERVICE: DOCLING
-      EMBEDDING_MODEL: sentence-transformers/all-MiniLM-L6-v2
-      NEXT_FRONTEND_URL: http://localhost:3000
-
-      # Sentinel keys — fakes never read them; turns leaked real calls into 401s.
-      COMPOSIO_API_KEY: e2e-deny-real-call-sentinel
-      COMPOSIO_ENABLED: "TRUE"
-      OPENAI_API_KEY: e2e-deny-real-call-sentinel
-      ANTHROPIC_API_KEY: e2e-deny-real-call-sentinel
-      LITELLM_API_KEY: e2e-deny-real-call-sentinel
-
-      MICROSOFT_CLIENT_ID: fake-microsoft-client-id
-      MICROSOFT_CLIENT_SECRET: fake-microsoft-client-secret
-      ONEDRIVE_REDIRECT_URI: http://localhost:8000/api/v1/auth/onedrive/connector/callback
-      DROPBOX_APP_KEY: fake-dropbox-app-key
-      DROPBOX_APP_SECRET: fake-dropbox-app-secret
-      DROPBOX_REDIRECT_URI: http://localhost:8000/api/v1/auth/dropbox/connector/callback
-
+      # Test user that the backend creates via /auth/register before Playwright runs.
+      PLAYWRIGHT_TEST_EMAIL: e2e-test@surfsense.net
+      PLAYWRIGHT_TEST_PASSWORD: E2eTestPassword123!
+      # Frontend env: Playwright's webServer (surfsense_web/playwright.config.ts)
+      # spawns `pnpm build && pnpm start` in CI; these get baked into the build.
      NEXT_PUBLIC_FASTAPI_BACKEND_URL: http://localhost:8000
      NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE: LOCAL

-      PLAYWRIGHT_TEST_EMAIL: e2e-test@surfsense.net
-      PLAYWRIGHT_TEST_PASSWORD: E2eTestPassword123!
-
    steps:
-      - name: Checkout code
-        uses: actions/checkout@v6
+      - uses: actions/checkout@v6

-      # Started early so it warms up while Python deps install.
-      - name: Start Postgres
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      # ─── Backend stack ─────────────────────────────────────────────────
+      # Builds the e2e image (multi-stage, deps cached via GHA), brings up
+      # db + redis + backend + celery_worker, blocks until every healthcheck
+      # is green. No `uv` invocation on the runner; no PID files; no curl
+      # polling loops; readiness is gated by Docker healthchecks.
+      - name: Build & start backend stack
        run: |
-          docker run -d \
-            --name surfsense_postgres \
-            -p 5432:5432 \
-            -e POSTGRES_USER=postgres \
-            -e POSTGRES_PASSWORD=postgres \
-            -e POSTGRES_DB=surfsense_e2e \
-            pgvector/pgvector:pg17 \
-            postgres \
-              -c wal_level=logical \
-              -c max_wal_senders=10 \
-              -c max_replication_slots=10
+          docker compose -f docker/docker-compose.e2e.yml \
+            up -d --build --wait --wait-timeout 300

-      - name: Set up Python
-        uses: actions/setup-python@v6
-        with:
-          python-version: '3.12'
-
-      - name: Install uv
-        uses: astral-sh/setup-uv@v8.1.0
-
-      - name: Cache backend dependencies
-        uses: actions/cache@v5
-        with:
-          path: |
-            ~/.cache/uv
-            surfsense_backend/.venv
-          key: python-deps-${{ hashFiles('surfsense_backend/uv.lock') }}
-          restore-keys: |
-            python-deps-
-
-      - name: Cache HuggingFace models
-        uses: actions/cache@v5
-        with:
-          path: ~/.cache/huggingface
-          key: hf-models-${{ env.EMBEDDING_MODEL }}-${{ env.ETL_SERVICE }}
-
-      - name: Install backend dependencies
-        working-directory: surfsense_backend
-        run: uv sync
-
-      - name: Wait for Postgres readiness
-        run: |
-          for i in $(seq 1 30); do
-            if docker exec surfsense_postgres pg_isready -U postgres -d surfsense_e2e > /dev/null 2>&1; then
-              echo "Postgres ready after ${i} attempts"
-              exit 0
-            fi
-            sleep 2
-          done
-          echo "::error::Postgres failed to become ready within 60s"
-          docker logs surfsense_postgres --tail 100
-          exit 1
-
-      - name: Run database migrations
-        working-directory: surfsense_backend
-        run: uv run alembic upgrade head
-
-      # Do NOT replace with `uvicorn main:app`. run_backend.py hijacks
-      # sys.modules["composio"] before app import; production binds it
-      # at import time so plain uvicorn would call the real SDK.
-      - name: Start backend (E2E entrypoint with sys.modules hijack)
-        working-directory: surfsense_backend
-        env:
-          HTTPS_PROXY: http://127.0.0.1:1
-          HTTP_PROXY: http://127.0.0.1:1
-          NO_PROXY: localhost,127.0.0.1,0.0.0.0,huggingface.co,*.huggingface.co,*.hf.co,cdn-lfs.huggingface.co
-        run: |
-          uv run python tests/e2e/run_backend.py \
-            > backend.log 2>&1 &
-          echo $! > backend.pid
-
-      # Worker is a separate interpreter, so the composio hijack must be reapplied.
-      - name: Start Celery worker (E2E entrypoint)
-        working-directory: surfsense_backend
-        env:
-          HTTPS_PROXY: http://127.0.0.1:1
-          HTTP_PROXY: http://127.0.0.1:1
-          NO_PROXY: localhost,127.0.0.1,0.0.0.0,huggingface.co,*.huggingface.co,*.hf.co,cdn-lfs.huggingface.co
-        run: |
-          uv run python tests/e2e/run_celery.py \
-            > celery.log 2>&1 &
-          echo $! > celery.pid
-
-      - name: Wait for backend readiness
-        run: |
-          for i in $(seq 1 60); do
-            if curl -sf http://localhost:8000/openapi.json > /dev/null; then
-              echo "Backend up after ${i} attempts"
-              exit 0
-            fi
-            sleep 2
-          done
-          echo "::error::Backend failed to start within 120s"
-          echo "===== backend.log (tail 200) ====="
-          tail -200 surfsense_backend/backend.log || true
-          echo "===== celery.log (tail 200) ====="
-          tail -200 surfsense_backend/celery.log || true
-          exit 1
-
-      - name: Wait for Celery worker readiness
-        working-directory: surfsense_backend
-        run: |
-          for i in $(seq 1 30); do
-            if uv run celery -A app.celery_app inspect ping --timeout 2 \
-                > /dev/null 2>&1; then
-              echo "Celery worker up after ${i} attempts"
-              exit 0
-            fi
-            sleep 2
-          done
-          echo "::error::Celery worker failed to start within 60s"
-          echo "===== celery.log (tail 200) ====="
-          tail -200 celery.log || true
-          exit 1
+      - name: Show backend stack status
+        if: always()
+        run: docker compose -f docker/docker-compose.e2e.yml ps

      - name: Register E2E test user
        run: |
@ -201,13 +65,14 @@ jobs:
            exit 1
          fi

-      - name: Setup Node.js
-        uses: actions/setup-node@v6
+      # ─── Frontend (host-side) ──────────────────────────────────────────
+      # Playwright's webServer block in playwright.config.ts spawns
+      # `pnpm build && pnpm start` in CI mode and waits for :3000.
+      - uses: actions/setup-node@v6
        with:
          node-version: '20'

-      - name: Install pnpm
-        uses: pnpm/action-setup@v6
+      - uses: pnpm/action-setup@v6
        with:
          version: 10

@ -221,8 +86,7 @@ jobs:
        with:
          path: ${{ steps.pnpm-cache.outputs.STORE_PATH }}
          key: pnpm-${{ runner.os }}-${{ hashFiles('surfsense_web/pnpm-lock.yaml') }}
-          restore-keys: |
-            pnpm-${{ runner.os }}-
+          restore-keys: pnpm-${{ runner.os }}-

      - name: Install web dependencies
        working-directory: surfsense_web
@ -253,10 +117,26 @@ jobs:
          restore-keys: |
            nextjs-${{ runner.os }}-${{ hashFiles('surfsense_web/pnpm-lock.yaml') }}-

+      # ─── Tests ─────────────────────────────────────────────────────────
      - name: Run Playwright tests
        working-directory: surfsense_web
        run: pnpm test:e2e:prod

+      # ─── Failure diagnostics ───────────────────────────────────────────
+      - name: Dump backend stack logs on failure
+        if: failure()
+        run: |
+          mkdir -p ./compose-logs
+          docker compose -f docker/docker-compose.e2e.yml logs --no-color --timestamps \
+            > ./compose-logs/all-services.log 2>&1 || true
+          for svc in db redis backend celery_worker; do
+            docker compose -f docker/docker-compose.e2e.yml logs --no-color --timestamps "$svc" \
+              > "./compose-logs/${svc}.log" 2>&1 || true
+          done
+          docker compose -f docker/docker-compose.e2e.yml ps \
+            > ./compose-logs/ps.txt 2>&1 || true
+
+      # ─── Artifacts ─────────────────────────────────────────────────────
      - name: Upload Playwright HTML report
        if: always()
        uses: actions/upload-artifact@v7
@ -273,26 +153,15 @@ jobs:
          path: surfsense_web/test-results/
          retention-days: 14

-      - name: Upload backend + celery logs
+      - name: Upload backend stack logs
        if: failure()
        uses: actions/upload-artifact@v7
        with:
-          name: backend-celery-logs
-          path: |
-            surfsense_backend/backend.log
-            surfsense_backend/celery.log
+          name: backend-stack-logs
+          path: ./compose-logs/
          retention-days: 7

-      - name: Stop backend + Celery worker
+      # ─── Teardown ──────────────────────────────────────────────────────
+      - name: Tear down backend stack
        if: always()
-        working-directory: surfsense_backend
-        run: |
-          for f in backend.pid celery.pid; do
-            if [ -f "$f" ]; then
-              kill "$(cat $f)" 2>/dev/null || true
-            fi
-          done
-
-      - name: Stop Postgres
-        if: always()
-        run: docker rm -f surfsense_postgres 2>/dev/null || true
+        run: docker compose -f docker/docker-compose.e2e.yml down -v --remove-orphans
--- a/docker/docker-compose.e2e.yml
+++ b/docker/docker-compose.e2e.yml
@ -0,0 +1,168 @@
+# =============================================================================
+# SurfSense — E2E Docker Compose stack
+# =============================================================================
+# Hermetic backend stack for Playwright E2E tests:
+#   - db / redis on an internal-only network (no internet egress)
+#   - backend (FastAPI) joins the internal network AND a separate ingress
+#     bridge so the host runner can reach :8000
+#   - celery_worker on the internal network only — zero egress surface
+#
+# The backend image is built from surfsense_backend/Dockerfile target=e2e,
+# which adds tests/ via the `tests-source` additional context (tests/ is
+# excluded from the main context by .dockerignore so production never ships
+# test fakes). See surfsense_backend/Dockerfile for stage layout.
+#
+# Usage from repo root:
+#   docker compose -f docker/docker-compose.e2e.yml up -d --build --wait
+#   curl -X POST http://localhost:8000/auth/register ...
+#   ( run Playwright on host, pointing at localhost:8000 + localhost:3000 )
+#   docker compose -f docker/docker-compose.e2e.yml down -v
+# =============================================================================
+
+name: surfsense-e2e
+
+x-backend-env: &backend-env
+  DATABASE_URL: postgresql+asyncpg://postgres:postgres@db:5432/surfsense_e2e
+  CELERY_BROKER_URL: redis://redis:6379/0
+  CELERY_RESULT_BACKEND: redis://redis:6379/0
+  REDIS_APP_URL: redis://redis:6379/0
+  CELERY_TASK_DEFAULT_QUEUE: surfsense
+  SECRET_KEY: ci-test-secret-key-not-for-production
+  AUTH_TYPE: LOCAL
+  REGISTRATION_ENABLED: "TRUE"
+  ETL_SERVICE: DOCLING
+  EMBEDDING_MODEL: sentence-transformers/all-MiniLM-L6-v2
+  NEXT_FRONTEND_URL: http://host.docker.internal:3000
+  # Sentinel keys — fakes never read them; turns leaked real calls into 401s.
+  COMPOSIO_API_KEY: e2e-deny-real-call-sentinel
+  COMPOSIO_ENABLED: "TRUE"
+  OPENAI_API_KEY: e2e-deny-real-call-sentinel
+  ANTHROPIC_API_KEY: e2e-deny-real-call-sentinel
+  LITELLM_API_KEY: e2e-deny-real-call-sentinel
+  MICROSOFT_CLIENT_ID: fake-microsoft-client-id
+  MICROSOFT_CLIENT_SECRET: fake-microsoft-client-secret
+  ONEDRIVE_REDIRECT_URI: http://localhost:8000/api/v1/auth/onedrive/connector/callback
+  DROPBOX_APP_KEY: fake-dropbox-app-key
+  DROPBOX_APP_SECRET: fake-dropbox-app-secret
+  DROPBOX_REDIRECT_URI: http://localhost:8000/api/v1/auth/dropbox/connector/callback
+  # Defense-in-depth: even though L3 egress is denied for the worker via
+  # `internal: true`, the backend still has a route via `ingress`. Setting
+  # HTTPS_PROXY to an unreachable port turns any leaked Python outbound HTTP
+  # call into a fast Connection refused. UNLIKE the old runner-shell setup,
+  # this proxy is set on the container env and `uv` is never invoked here,
+  # so there is no interaction with uv's implicit-sync behaviour.
+  HTTPS_PROXY: http://127.0.0.1:1
+  HTTP_PROXY: http://127.0.0.1:1
+  NO_PROXY: localhost,127.0.0.1,0.0.0.0,db,redis,host.docker.internal
+
+services:
+  db:
+    image: pgvector/pgvector:pg17
+    command: >
+      postgres
+        -c wal_level=logical
+        -c max_wal_senders=10
+        -c max_replication_slots=10
+    environment:
+      POSTGRES_USER: postgres
+      POSTGRES_PASSWORD: postgres
+      POSTGRES_DB: surfsense_e2e
+    # Ephemeral storage — every CI run gets a clean DB, no volume cleanup needed.
+    tmpfs:
+      - /var/lib/postgresql/data
+    healthcheck:
+      test: ["CMD-SHELL", "pg_isready -U postgres -d surfsense_e2e"]
+      interval: 2s
+      timeout: 3s
+      retries: 30
+    networks: [internal]
+
+  redis:
+    image: redis:8-alpine
+    healthcheck:
+      test: ["CMD", "redis-cli", "ping"]
+      interval: 2s
+      timeout: 3s
+      retries: 30
+    networks: [internal]
+
+  backend:
+    build:
+      context: ../surfsense_backend
+      dockerfile: Dockerfile
+      target: e2e
+      additional_contexts:
+        # tests/ is excluded from the main context by .dockerignore;
+        # the e2e stage's `COPY --from=tests-source` pulls it in here.
+        tests-source: ../surfsense_backend/tests
+      cache_from:
+        - type=gha,scope=surfsense-e2e-backend
+      cache_to:
+        - type=gha,mode=max,scope=surfsense-e2e-backend
+    image: surfsense-e2e-backend:local
+    environment:
+      <<: *backend-env
+      SERVICE_ROLE: api
+    extra_hosts:
+      - "host.docker.internal:host-gateway"
+    ports:
+      - "8000:8000"
+    depends_on:
+      db: { condition: service_healthy }
+      redis: { condition: service_healthy }
+    healthcheck:
+      # Use Python (already in the image) instead of curl/wget to avoid
+      # depending on either tool being installed in the runtime layers.
+      test:
+        - CMD
+        - python
+        - -c
+        - |
+          import sys, urllib.request
+          try:
+              r = urllib.request.urlopen("http://localhost:8000/openapi.json", timeout=2)
+              sys.exit(0 if r.status == 200 else 1)
+          except Exception:
+              sys.exit(1)
+      interval: 3s
+      timeout: 5s
+      retries: 60
+      start_period: 30s
+    networks:
+      - internal      # to reach db/redis
+      - ingress       # so host can reach :8000
+
+  celery_worker:
+    image: surfsense-e2e-backend:local
+    pull_policy: never
+    # No build: section — reuses the image built by the `backend` service.
+    # Compose v2 builds shared images exactly once across services that
+    # reference the same `image:` tag.
+    environment:
+      <<: *backend-env
+      SERVICE_ROLE: worker
+    depends_on:
+      backend: { condition: service_healthy }
+    healthcheck:
+      test:
+        - CMD-SHELL
+        - "celery -A app.celery_app inspect ping --timeout 2 | grep -q pong"
+      interval: 5s
+      timeout: 5s
+      retries: 12
+      start_period: 20s
+    networks: [internal]
+
+networks:
+  # Internal network: containers attached only to this network have NO route
+  # to the host or the internet. This is the L3 deny-egress mechanism that
+  # replaces the fragile HTTPS_PROXY-on-the-runner approach.
+  internal:
+    driver: bridge
+    internal: true
+
+  # Regular bridge network. Only the `backend` service joins it, solely so
+  # the host can reach :8000 via the published port. celery_worker / db /
+  # redis stay off this network entirely.
+  ingress:
+    driver: bridge
--- a/surfsense_backend/Dockerfile
+++ b/surfsense_backend/Dockerfile
@ -1,8 +1,23 @@
-FROM python:3.12-slim
+# =============================================================================
+# SurfSense Backend — Multi-stage Dockerfile
+# =============================================================================
+# Stages:
+#   base       — system deps + Pandoc 3.x
+#   deps       — Python deps frozen from uv.lock (no dev deps)
+#   models     — pre-baked offline assets (EasyOCR, Docling, Playwright)
+#   e2e        — adds tests/ via additional_contexts, swaps entrypoint
+#   production — production runtime (LAST stage = default `docker build` target)
+#
+# IMPORTANT: `production` MUST remain the last stage. .github/workflows/docker-build.yml
+# builds without `target:` and BuildKit defaults to the last stage. Reordering will
+# silently break ghcr.io/modsetter/surfsense-backend.
+# =============================================================================
+
+# ─── Stage 1: base (system deps, Pandoc, certificates) ──────────────────────
+FROM python:3.12-slim AS base

 WORKDIR /app

-# Install system dependencies including SSL tools, CUDA dependencies, and Tesseract OCR
 RUN apt-get update && apt-get install -y --no-install-recommends \
    gcc \
    python3-dev \
@ -22,21 +37,24 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
    git \
    && rm -rf /var/lib/apt/lists/*

-# Install Pandoc 3.x from GitHub as a fallback for Linux where pypandoc_binary
-# may not bundle pandoc (apt ships 2.17 which has broken table rendering).
-# pypandoc_binary bundles pandoc on Windows/macOS; on Linux it picks this up.
+# Pandoc 3.x from GitHub Releases — apt ships 2.17 which has broken table rendering.
+# pypandoc_binary bundles pandoc on Windows/macOS; on Linux it picks up this binary.
 RUN ARCH=$(dpkg --print-architecture) && \
    wget -qO /tmp/pandoc.deb "https://github.com/jgm/pandoc/releases/download/3.9/pandoc-3.9-1-${ARCH}.deb" && \
    dpkg -i /tmp/pandoc.deb && \
    rm /tmp/pandoc.deb

-# Update certificates and install SSL tools
 RUN update-ca-certificates
 RUN pip install --upgrade certifi pip-system-certs

-# Copy requirements
-COPY pyproject.toml .
-COPY uv.lock .
+ENV SSL_CERT_FILE=/usr/local/lib/python3.12/site-packages/certifi/cacert.pem
+ENV REQUESTS_CA_BUNDLE=/usr/local/lib/python3.12/site-packages/certifi/cacert.pem
+
+
+# ─── Stage 2: deps (Python deps frozen from uv.lock) ────────────────────────
+FROM base AS deps
+
+COPY pyproject.toml uv.lock ./

 # Install all Python dependencies from uv.lock for deterministic builds.
 #
@ -49,9 +67,7 @@ COPY uv.lock .
 # Note on torch/CUDA: we do NOT install torch from a separate cu* index here.
 # PyPI's torch wheels for Linux x86_64 already ship CUDA-enabled and pull
 # nvidia-cudnn-cu13, nvidia-nccl-cu13, triton, etc. as install deps (all
-# captured in uv.lock). Installing from cu121 first only wasted ~2GB of
-# downloads that the lock-based install immediately replaced. If a specific
-# CUDA version is needed (driver compatibility, etc.), wire it through
+# captured in uv.lock). If a specific CUDA version is needed, wire it through
 # [tool.uv.sources] in pyproject.toml so the lock stays the source of truth.
 RUN pip install --no-cache-dir uv && \
    uv export --frozen --no-dev --no-hashes --no-emit-project \
@ -59,49 +75,32 @@ RUN pip install --no-cache-dir uv && \
    uv pip install --system --no-cache-dir -r /tmp/requirements.txt && \
    rm /tmp/requirements.txt

-# Set SSL environment variables dynamically
-RUN CERTIFI_PATH=$(python -c "import certifi; print(certifi.where())") && \
-    echo "Setting SSL_CERT_FILE to $CERTIFI_PATH" && \
-    echo "export SSL_CERT_FILE=$CERTIFI_PATH" >> /root/.bashrc && \
-    echo "export REQUESTS_CA_BUNDLE=$CERTIFI_PATH" >> /root/.bashrc
-ENV SSL_CERT_FILE=/usr/local/lib/python3.12/site-packages/certifi/cacert.pem
-ENV REQUESTS_CA_BUNDLE=/usr/local/lib/python3.12/site-packages/certifi/cacert.pem
+
+# ─── Stage 3: models (pre-baked offline assets) ─────────────────────────────
+FROM deps AS models

 # Pre-download EasyOCR models to avoid runtime SSL issues
-RUN mkdir -p /root/.EasyOCR/model
-RUN wget --no-check-certificate https://github.com/JaidedAI/EasyOCR/releases/download/v1.3/english_g2.zip -O /root/.EasyOCR/model/english_g2.zip || true
-RUN wget --no-check-certificate https://github.com/JaidedAI/EasyOCR/releases/download/pre-v1.1.6/craft_mlt_25k.zip -O /root/.EasyOCR/model/craft_mlt_25k.zip || true
-RUN cd /root/.EasyOCR/model && (unzip -o english_g2.zip || true) && (unzip -o craft_mlt_25k.zip || true)
+RUN mkdir -p /root/.EasyOCR/model && \
+    wget --no-check-certificate https://github.com/JaidedAI/EasyOCR/releases/download/v1.3/english_g2.zip      -O /root/.EasyOCR/model/english_g2.zip      || true && \
+    wget --no-check-certificate https://github.com/JaidedAI/EasyOCR/releases/download/pre-v1.1.6/craft_mlt_25k.zip -O /root/.EasyOCR/model/craft_mlt_25k.zip || true && \
+    cd /root/.EasyOCR/model && \
+    (unzip -o english_g2.zip || true) && \
+    (unzip -o craft_mlt_25k.zip || true)

 # Pre-download Docling models
 RUN python -c "try:\n    from docling.document_converter import DocumentConverter\n    conv = DocumentConverter()\nexcept:\n    pass" || true

-# Install Playwright browsers for web scraping (the playwright package itself
-# is already installed via uv.lock above)
+# Install Playwright browsers (the playwright python package itself is in deps)
 RUN playwright install chromium --with-deps

-# Copy source code
-COPY . .
-
-# Install the project itself in editable mode. Dependencies were already
-# installed deterministically from uv.lock above, so --no-deps prevents any
-# re-resolution that could pull newer versions.
-RUN uv pip install --system --no-cache-dir --no-deps -e .
-
-# Copy and set permissions for entrypoint script
-# Use dos2unix to ensure LF line endings (fixes CRLF issues from Windows checkouts)
-COPY scripts/docker/entrypoint.sh /app/scripts/docker/entrypoint.sh
-RUN dos2unix /app/scripts/docker/entrypoint.sh && chmod +x /app/scripts/docker/entrypoint.sh
-
 # Shared temp directory for file uploads between API and Worker containers.
 # Python's tempfile module uses TMPDIR, so uploaded files land here.
 # Mount the SAME volume at /shared_tmp on both API and Worker in Coolify.
 RUN mkdir -p /shared_tmp
-ENV TMPDIR=/shared_tmp

-# Prevent uvloop compatibility issues
 ENV PYTHONPATH=/app
 ENV UVICORN_LOOP=asyncio
+ENV TMPDIR=/shared_tmp

 # Tune glibc malloc to return freed memory to the OS more aggressively.
 # Without these, Python's gc.collect() frees objects but the underlying
@ -110,6 +109,58 @@ ENV MALLOC_MMAP_THRESHOLD_=65536
 ENV MALLOC_TRIM_THRESHOLD_=131072
 ENV MALLOC_MMAP_MAX_=65536

+
+# ─── Stage 4: e2e (production source + tests/ + e2e entrypoint) ─────────────
+# Built via `docker buildx build --target e2e`. The default build target is
+# `production` (the last stage), so this stage is opt-in for CI only.
+#
+# `tests/` is excluded from the main build context by .dockerignore (so prod
+# can never accidentally ship test fakes). The e2e stage receives tests/
+# through an "additional context" passed by docker-compose.e2e.yml — see
+# https://docs.docker.com/reference/compose-file/build/#additional_contexts
+FROM models AS e2e
+
+# Same source copy as production. .dockerignore filters out tests/.
+COPY . .
+
+# Bring tests/ in via the named additional build context. CI passes
+#   --build-context tests-source=./tests
+# (or the equivalent additional_contexts entry in docker-compose.e2e.yml).
+COPY --from=tests-source . ./tests/
+
+# Install the project itself in editable mode. Dependencies were already
+# installed deterministically from uv.lock above, so --no-deps prevents any
+# re-resolution that could pull newer versions.
+RUN uv pip install --system --no-cache-dir --no-deps -e .
+
+COPY scripts/docker/entrypoint.e2e.sh /app/scripts/docker/entrypoint.e2e.sh
+RUN dos2unix /app/scripts/docker/entrypoint.e2e.sh && chmod +x /app/scripts/docker/entrypoint.e2e.sh
+
+# SERVICE_ROLE is overridden per service in docker-compose.e2e.yml (api / worker).
+ENV SERVICE_ROLE=api
+
+EXPOSE 8000-8001
+CMD ["/app/scripts/docker/entrypoint.e2e.sh"]
+
+
+# ─── Stage 5: production (LAST stage — default `docker build` target) ───────
+# Behavior is byte-identical to the previous single-stage Dockerfile.
+# .github/workflows/docker-build.yml builds without `target:` and BuildKit
+# defaults to the last stage, so this MUST stay last.
+FROM models AS production
+
+# Copy source code (tests/ excluded by .dockerignore — production never ships tests).
+COPY . .
+
+# Install the project itself in editable mode. Dependencies were already
+# installed deterministically from uv.lock above, so --no-deps prevents any
+# re-resolution that could pull newer versions.
+RUN uv pip install --system --no-cache-dir --no-deps -e .
+
+# Use dos2unix to ensure LF line endings (fixes CRLF issues from Windows checkouts)
+COPY scripts/docker/entrypoint.sh /app/scripts/docker/entrypoint.sh
+RUN dos2unix /app/scripts/docker/entrypoint.sh && chmod +x /app/scripts/docker/entrypoint.sh
+
 # SERVICE_ROLE controls which process this container runs:
 #   api     – FastAPI backend only (runs migrations on startup)
 #   worker  – Celery worker only
@ -127,6 +178,5 @@ ENV CELERY_MAX_TASKS_PER_CHILD=50
 #   ""                       – both queues (default, for single-worker setups)
 ENV CELERY_QUEUES=""

-# Run
 EXPOSE 8000-8001
-CMD ["/app/scripts/docker/entrypoint.sh"]
+CMD ["/app/scripts/docker/entrypoint.sh"]
--- a/surfsense_backend/scripts/docker/entrypoint.e2e.sh
+++ b/surfsense_backend/scripts/docker/entrypoint.e2e.sh
@ -0,0 +1,52 @@
+#!/bin/bash
+# =============================================================================
+# E2E entrypoint for the multi-stage Dockerfile's `e2e` target.
+#
+# Dispatches on SERVICE_ROLE to the test-only entrypoints under tests/e2e/.
+# Those scripts apply sys.modules hijacks and LLM/embedding patches BEFORE
+# importing production app code (see tests/e2e/run_backend.py for rationale).
+#
+# Production never sees this file: tests/ is excluded from the production
+# stage, and the production stage uses scripts/docker/entrypoint.sh.
+# =============================================================================
+set -euo pipefail
+
+SERVICE_ROLE="${SERVICE_ROLE:-api}"
+echo "[e2e-entrypoint] starting role=${SERVICE_ROLE}"
+
+wait_for_db() {
+    # Block until the database is reachable. We don't loop forever — Compose
+    # depends_on/healthchecks already gate on db readiness, this is just
+    # belt-and-suspenders so a slow first connection doesn't race migrations.
+    for i in {1..60}; do
+        if python -c "from app.db import engine; import asyncio; asyncio.run(engine.dispose())" 2>/dev/null; then
+            echo "[e2e-entrypoint] db reachable after ${i} attempts"
+            return 0
+        fi
+        sleep 1
+    done
+    echo "[e2e-entrypoint] ERROR: db not reachable after 60s" >&2
+    return 1
+}
+
+case "${SERVICE_ROLE}" in
+    api)
+        wait_for_db
+        echo "[e2e-entrypoint] running alembic upgrade head"
+        alembic upgrade head
+        # `exec` so SIGTERM from `docker stop` reaches Python directly,
+        # without a shell wrapper interposing.
+        exec python tests/e2e/run_backend.py
+        ;;
+    worker)
+        # Worker doesn't run migrations — the api role does that exactly once.
+        # We still wait for db so Celery's broker connection check doesn't
+        # race against an unready Postgres on cold start.
+        wait_for_db
+        exec python tests/e2e/run_celery.py
+        ;;
+    *)
+        echo "[e2e-entrypoint] ERROR: unknown SERVICE_ROLE='${SERVICE_ROLE}' (expected: api | worker)" >&2
+        exit 1
+        ;;
+esac
--- a/surfsense_backend/tests/e2e/run_backend.py
+++ b/surfsense_backend/tests/e2e/run_backend.py
@ -57,6 +57,29 @@ sys.modules["notion_client.errors"] = _fake_notion.errors
 from dotenv import load_dotenv  # noqa: E402

 load_dotenv()
+
+os.environ.setdefault(
+    "DATABASE_URL",
+    "postgresql+asyncpg://postgres:postgres@localhost:5432/surfsense",
+)
+os.environ.setdefault("CELERY_BROKER_URL", "redis://localhost:6379/0")
+os.environ.setdefault("CELERY_RESULT_BACKEND", "redis://localhost:6379/0")
+os.environ.setdefault("REDIS_APP_URL", "redis://localhost:6379/0")
+os.environ.setdefault("CELERY_TASK_DEFAULT_QUEUE", "surfsense")
+os.environ.setdefault("SECRET_KEY", "local-e2e-secret-not-for-production")
+os.environ.setdefault("AUTH_TYPE", "LOCAL")
+os.environ.setdefault("REGISTRATION_ENABLED", "TRUE")
+os.environ.setdefault("ETL_SERVICE", "DOCLING")
+os.environ.setdefault("EMBEDDING_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
+os.environ.setdefault("NEXT_FRONTEND_URL", "http://localhost:3000")
+
+# Sentinel keys — fakes never read them; turns leaked real calls into 401s.
+os.environ.setdefault("COMPOSIO_API_KEY", "local-deny-real-call-sentinel")
+os.environ.setdefault("COMPOSIO_ENABLED", "TRUE")
+os.environ.setdefault("OPENAI_API_KEY", "local-deny-real-call-sentinel")
+os.environ.setdefault("ANTHROPIC_API_KEY", "local-deny-real-call-sentinel")
+os.environ.setdefault("LITELLM_API_KEY", "local-deny-real-call-sentinel")
+
 os.environ.setdefault("ATLASSIAN_CLIENT_ID", "fake-atlassian-client-id")
 os.environ.setdefault("ATLASSIAN_CLIENT_SECRET", "fake-atlassian-client-secret")
 os.environ.setdefault(
--- a/surfsense_backend/tests/e2e/run_celery.py
+++ b/surfsense_backend/tests/e2e/run_celery.py
@ -44,6 +44,29 @@ sys.modules["notion_client.errors"] = _fake_notion.errors
 from dotenv import load_dotenv  # noqa: E402

 load_dotenv()
+
+os.environ.setdefault(
+    "DATABASE_URL",
+    "postgresql+asyncpg://postgres:postgres@localhost:5432/surfsense",
+)
+os.environ.setdefault("CELERY_BROKER_URL", "redis://localhost:6379/0")
+os.environ.setdefault("CELERY_RESULT_BACKEND", "redis://localhost:6379/0")
+os.environ.setdefault("REDIS_APP_URL", "redis://localhost:6379/0")
+os.environ.setdefault("CELERY_TASK_DEFAULT_QUEUE", "surfsense")
+os.environ.setdefault("SECRET_KEY", "local-e2e-secret-not-for-production")
+os.environ.setdefault("AUTH_TYPE", "LOCAL")
+os.environ.setdefault("REGISTRATION_ENABLED", "TRUE")
+os.environ.setdefault("ETL_SERVICE", "DOCLING")
+os.environ.setdefault("EMBEDDING_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
+os.environ.setdefault("NEXT_FRONTEND_URL", "http://localhost:3000")
+
+# Sentinel keys — fakes never read them; turns leaked real calls into 401s.
+os.environ.setdefault("COMPOSIO_API_KEY", "local-deny-real-call-sentinel")
+os.environ.setdefault("COMPOSIO_ENABLED", "TRUE")
+os.environ.setdefault("OPENAI_API_KEY", "local-deny-real-call-sentinel")
+os.environ.setdefault("ANTHROPIC_API_KEY", "local-deny-real-call-sentinel")
+os.environ.setdefault("LITELLM_API_KEY", "local-deny-real-call-sentinel")
+
 os.environ.setdefault("ATLASSIAN_CLIENT_ID", "fake-atlassian-client-id")
 os.environ.setdefault("ATLASSIAN_CLIENT_SECRET", "fake-atlassian-client-secret")
 os.environ.setdefault(
@ -198,12 +221,19 @@ def _main() -> None:
    # so Drive indexing tasks are picked up).
    queue_name = os.getenv("CELERY_TASK_DEFAULT_QUEUE", "surfsense")
    queues = f"{queue_name},{queue_name}.connectors"
+
+    # macOS forks-after-MPS-init crash prefork workers; threads avoid it.
+    default_pool = "threads" if sys.platform == "darwin" else "prefork"
+    pool = os.getenv("CELERY_POOL", default_pool)
+    concurrency = os.getenv("CELERY_CONCURRENCY", "2")
+
    celery_app.worker_main(
        argv=[
            "worker",
            "--loglevel=info",
            f"--queues={queues}",
-            "--concurrency=2",
+            f"--pool={pool}",
+            f"--concurrency={concurrency}",
            "--without-gossip",
            "--without-mingle",
        ]
--- a/surfsense_web/playwright.config.ts
+++ b/surfsense_web/playwright.config.ts
@ -4,6 +4,11 @@ const PORT = process.env.PORT || "3000";
 const BACKEND_PORT = process.env.BACKEND_PORT || "8000";
 const baseURL = process.env.PLAYWRIGHT_BASE_URL || `http://localhost:${PORT}`;

+process.env.PLAYWRIGHT_TEST_EMAIL ??= "e2e-test@surfsense.net";
+process.env.PLAYWRIGHT_TEST_PASSWORD ??= "E2eTestPassword123!";
+process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL ??= `http://localhost:${BACKEND_PORT}`;
+process.env.NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE ??= "LOCAL";
+
 /**
 * Playwright configuration for SurfSense web E2E tests.
 *
@ -60,9 +65,13 @@ export default defineConfig({
 				url: `http://localhost:${PORT}`,
 				reuseExistingServer: !process.env.CI,
 				timeout: process.env.CI ? 300_000 : 180_000,
+				stdout: "pipe",
+      			stderr: "pipe",
 				env: {
-					NEXT_PUBLIC_FASTAPI_BACKEND_URL: `http://localhost:${BACKEND_PORT}`,
-					NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE: "LOCAL",
+					NEXT_PUBLIC_FASTAPI_BACKEND_URL:
+						process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL,
+					NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE:
+						process.env.NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE,
 				},
 			},
 });
--- a/surfsense_web/tests/auth.setup.ts
+++ b/surfsense_web/tests/auth.setup.ts
@ -10,15 +10,14 @@ import { expect, test as setup } from "@playwright/test";
 *   POST /auth/jwt/login  ->  { access_token }
 *   localStorage.setItem("surfsense_bearer_token", access_token)
 *
- * Requires a seeded test user in the dev/test DB. Configure via env:
- *   PLAYWRIGHT_TEST_EMAIL, PLAYWRIGHT_TEST_PASSWORD
- *   NEXT_PUBLIC_FASTAPI_BACKEND_URL  (defaults to http://localhost:8000)
+ * Requires a seeded test user in the dev/test DB. Defaults match the
+ * docker/docker-compose.e2e.yml local stack and can be overridden via env.
 */

 const authFile = path.join(__dirname, "..", "playwright", ".auth", "user.json");

-const TEST_USER_EMAIL = process.env.PLAYWRIGHT_TEST_EMAIL || "test@surfsense.net";
-const TEST_USER_PASSWORD = process.env.PLAYWRIGHT_TEST_PASSWORD || "TestPassword123!";
+const TEST_USER_EMAIL = process.env.PLAYWRIGHT_TEST_EMAIL || "e2e-test@surfsense.net";
+const TEST_USER_PASSWORD = process.env.PLAYWRIGHT_TEST_PASSWORD || "E2eTestPassword123!";
 const BACKEND_URL = process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL || "http://localhost:8000";
 const STORAGE_KEY = "surfsense_bearer_token";

--- a/surfsense_web/tests/helpers/api/auth.ts
+++ b/surfsense_web/tests/helpers/api/auth.ts
@ -11,8 +11,8 @@ import type { APIRequestContext } from "@playwright/test";

 export const BACKEND_URL = process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL || "http://localhost:8000";

-const TEST_USER_EMAIL = process.env.PLAYWRIGHT_TEST_EMAIL || "test@surfsense.net";
-const TEST_USER_PASSWORD = process.env.PLAYWRIGHT_TEST_PASSWORD || "TestPassword123!";
+const TEST_USER_EMAIL = process.env.PLAYWRIGHT_TEST_EMAIL || "e2e-test@surfsense.net";
+const TEST_USER_PASSWORD = process.env.PLAYWRIGHT_TEST_PASSWORD || "E2eTestPassword123!";

 export async function loginAsTestUser(request: APIRequestContext): Promise<string> {
 	const response = await request.post(`${BACKEND_URL}/auth/jwt/login`, {