diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml index 8de55ba91..224591d1f 100644 --- a/.github/workflows/docker-build.yml +++ b/.github/workflows/docker-build.yml @@ -31,7 +31,7 @@ jobs: new_tag: ${{ steps.tag_version.outputs.next_version }} steps: - name: Checkout code - uses: actions/checkout@v4 + uses: actions/checkout@v6 with: fetch-depth: 0 ref: ${{ github.event.inputs.branch }} @@ -108,16 +108,18 @@ jobs: name: surfsense-backend context: ./surfsense_backend file: ./surfsense_backend/Dockerfile + target: production - image: web name: surfsense-web context: ./surfsense_web file: ./surfsense_web/Dockerfile + target: runner env: REGISTRY_IMAGE: ghcr.io/${{ github.repository_owner }}/${{ matrix.name }} steps: - name: Checkout code - uses: actions/checkout@v4 + uses: actions/checkout@v6 - name: Set lowercase image name id: image @@ -125,19 +127,19 @@ jobs: - name: Docker meta id: meta - uses: docker/metadata-action@v5 + uses: docker/metadata-action@v6 with: images: ${{ steps.image.outputs.name }} - name: Login to GitHub Container Registry - uses: docker/login-action@v3 + uses: docker/login-action@v4 with: registry: ghcr.io username: ${{ github.repository_owner }} password: ${{ secrets.GITHUB_TOKEN }} - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 + uses: docker/setup-buildx-action@v4 - name: Free up disk space run: | @@ -149,10 +151,11 @@ jobs: - name: Build and push by digest ${{ matrix.name }} (${{ matrix.suffix }}) id: build - uses: docker/build-push-action@v6 + uses: docker/build-push-action@v7 with: context: ${{ matrix.context }} file: ${{ matrix.file }} + target: ${{ matrix.target }} labels: ${{ steps.meta.outputs.labels }} tags: ${{ steps.image.outputs.name }} outputs: type=image,push-by-digest=true,name-canonical=true,push=true @@ -174,7 +177,7 @@ jobs: touch "/tmp/digests/${digest#sha256:}" - name: Upload digest - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v7 with: name: digests-${{ matrix.image }}-${{ matrix.suffix }} path: /tmp/digests/* @@ -205,22 +208,22 @@ jobs: run: echo "name=${REGISTRY_IMAGE,,}" >> $GITHUB_OUTPUT - name: Download amd64 digest - uses: actions/download-artifact@v4 + uses: actions/download-artifact@v8 with: name: digests-${{ matrix.image }}-amd64 path: /tmp/digests - name: Download arm64 digest - uses: actions/download-artifact@v4 + uses: actions/download-artifact@v8 with: name: digests-${{ matrix.image }}-arm64 path: /tmp/digests - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 + uses: docker/setup-buildx-action@v4 - name: Login to GitHub Container Registry - uses: docker/login-action@v3 + uses: docker/login-action@v4 with: registry: ghcr.io username: ${{ github.repository_owner }} @@ -239,7 +242,7 @@ jobs: - name: Docker meta id: meta - uses: docker/metadata-action@v5 + uses: docker/metadata-action@v6 with: images: ${{ steps.image.outputs.name }} tags: | diff --git a/.github/workflows/e2e-tests.yml b/.github/workflows/e2e-tests.yml new file mode 100644 index 000000000..b87537dab --- /dev/null +++ b/.github/workflows/e2e-tests.yml @@ -0,0 +1,174 @@ +name: E2E Tests + +on: + pull_request: + branches: [main, dev] + types: [opened, synchronize, reopened, ready_for_review] + paths: + - 'surfsense_web/**' + - 'surfsense_backend/**' + - 'docker/docker-compose.e2e.yml' + - '.github/workflows/e2e-tests.yml' + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + e2e: + name: Journey + runs-on: ubuntu-latest + if: github.event.pull_request.draft == false + timeout-minutes: 30 + + env: + # Test user that the backend creates via /auth/register before Playwright runs. + PLAYWRIGHT_TEST_EMAIL: e2e-test@surfsense.net + PLAYWRIGHT_TEST_PASSWORD: E2eTestPassword123! + # Frontend env: Playwright's webServer (surfsense_web/playwright.config.ts) + # spawns `pnpm build && pnpm start` in CI; these get baked into the build. + NEXT_PUBLIC_FASTAPI_BACKEND_URL: http://localhost:8000 + NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE: LOCAL + # Shared secret for the test-only POST /__e2e__/auth/token endpoint. + # Must match docker-compose.e2e.yml's backend env (x-backend-env). + E2E_MINT_SECRET: e2e-mint-secret-not-for-production + + steps: + - uses: actions/checkout@v6 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v4 + + # ─── Backend stack ───────────────────────────────────────────────── + # Builds the e2e image (multi-stage, deps cached via GHA), brings up + # db + redis + backend + celery_worker, blocks until every healthcheck + # is green. No `uv` invocation on the runner; no PID files; no curl + # polling loops; readiness is gated by Docker healthchecks. + - name: Build & start backend stack + run: | + docker compose -f docker/docker-compose.e2e.yml \ + up -d --build --wait --wait-timeout 300 + + - name: Show backend stack status + if: always() + run: docker compose -f docker/docker-compose.e2e.yml ps + + - name: Register E2E test user + run: | + # 200/201 = created, 400 = already exists (idempotent across reruns). + STATUS=$(curl -s -o /tmp/register.json -w "%{http_code}" \ + -X POST http://localhost:8000/auth/register \ + -H "Content-Type: application/json" \ + -d "{\"email\":\"${PLAYWRIGHT_TEST_EMAIL}\",\"password\":\"${PLAYWRIGHT_TEST_PASSWORD}\"}") + echo "Register status: ${STATUS}" + cat /tmp/register.json + if [ "${STATUS}" != "200" ] && [ "${STATUS}" != "201" ] && [ "${STATUS}" != "400" ]; then + echo "::error::Failed to register test user (status ${STATUS})" + exit 1 + fi + + # Flush auth rate-limit counters so Playwright starts clean. + docker compose -f docker/docker-compose.e2e.yml exec -T redis \ + sh -c "redis-cli --scan --pattern 'surfsense:auth_rate_limit:*' \ + | xargs -r redis-cli DEL" || true + + # ─── Frontend (host-side) ────────────────────────────────────────── + # Playwright's webServer block in playwright.config.ts spawns + # `pnpm build && pnpm start` in CI mode and waits for :3000. + - uses: actions/setup-node@v6 + with: + node-version: '20' + + - uses: pnpm/action-setup@v6 + + - name: Get pnpm store directory + id: pnpm-cache + shell: bash + run: echo "STORE_PATH=$(pnpm store path --silent)" >> $GITHUB_OUTPUT + + - name: Cache pnpm store + uses: actions/cache@v5 + with: + path: ${{ steps.pnpm-cache.outputs.STORE_PATH }} + key: pnpm-${{ runner.os }}-${{ hashFiles('surfsense_web/pnpm-lock.yaml') }} + restore-keys: pnpm-${{ runner.os }}- + + - name: Install web dependencies + working-directory: surfsense_web + run: pnpm install --frozen-lockfile + + - name: Cache Playwright browsers + id: playwright-cache + uses: actions/cache@v5 + with: + path: ~/.cache/ms-playwright + key: playwright-${{ runner.os }}-${{ hashFiles('surfsense_web/pnpm-lock.yaml') }} + + - name: Install Playwright browsers + if: steps.playwright-cache.outputs.cache-hit != 'true' + working-directory: surfsense_web + run: pnpm exec playwright install --with-deps chromium + + - name: Install Playwright system deps (cache hit) + if: steps.playwright-cache.outputs.cache-hit == 'true' + working-directory: surfsense_web + run: pnpm exec playwright install-deps chromium + + - name: Cache Next.js build + uses: actions/cache@v5 + with: + path: surfsense_web/.next/cache + key: nextjs-${{ runner.os }}-${{ hashFiles('surfsense_web/pnpm-lock.yaml') }}-${{ github.sha }} + restore-keys: | + nextjs-${{ runner.os }}-${{ hashFiles('surfsense_web/pnpm-lock.yaml') }}- + nextjs-${{ runner.os }}- + + # ─── Tests ───────────────────────────────────────────────────────── + - name: Run Playwright tests + working-directory: surfsense_web + run: pnpm test:e2e:prod + + # ─── Failure diagnostics ─────────────────────────────────────────── + - name: Dump backend stack logs on failure + if: ${{ failure() || cancelled() }} + run: | + mkdir -p ./compose-logs + docker compose -f docker/docker-compose.e2e.yml logs --no-color --timestamps \ + > ./compose-logs/all-services.log 2>&1 || true + for svc in db redis backend celery_worker; do + docker compose -f docker/docker-compose.e2e.yml logs --no-color --timestamps "$svc" \ + > "./compose-logs/${svc}.log" 2>&1 || true + done + docker compose -f docker/docker-compose.e2e.yml ps \ + > ./compose-logs/ps.txt 2>&1 || true + + # ─── Artifacts ───────────────────────────────────────────────────── + - name: Upload Playwright HTML report + if: always() + uses: actions/upload-artifact@v7 + with: + name: playwright-report + path: surfsense_web/playwright-report/ + retention-days: 14 + + - name: Upload Playwright traces + if: failure() + uses: actions/upload-artifact@v7 + with: + name: playwright-traces + path: surfsense_web/test-results/ + retention-days: 14 + + - name: Upload backend stack logs + if: ${{ failure() || cancelled() }} + uses: actions/upload-artifact@v7 + with: + name: backend-stack-logs + path: ./compose-logs/ + retention-days: 7 + + # ─── Teardown ────────────────────────────────────────────────────── + - name: Tear down backend stack + if: always() + run: docker compose -f docker/docker-compose.e2e.yml down -v --remove-orphans diff --git a/docker/.env.example b/docker/.env.example index aba15f13f..4de35a5e9 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -4,7 +4,7 @@ # Database, Redis, and internal service wiring are handled automatically. # ============================================================================== -# SurfSense version (use "latest", a clean version like "0.0.14", or a specific build like "0.0.14.1") +# SurfSense version (use "latest" or a specific version like "0.0.14") SURFSENSE_VERSION=latest # ------------------------------------------------------------------------------ diff --git a/docker/docker-compose.dev.yml b/docker/docker-compose.dev.yml index bbe758d4f..28b00a044 100644 --- a/docker/docker-compose.dev.yml +++ b/docker/docker-compose.dev.yml @@ -10,6 +10,11 @@ name: surfsense-dev +x-backend-build: &backend-build + context: ../surfsense_backend + args: + EMBEDDING_MODEL: ${EMBEDDING_MODEL:-sentence-transformers/all-MiniLM-L6-v2} + services: db: image: pgvector/pgvector:pg17 @@ -69,7 +74,7 @@ services: retries: 5 backend: - build: ../surfsense_backend + build: *backend-build ports: - "${BACKEND_PORT:-8000}:8000" volumes: @@ -114,7 +119,7 @@ services: start_period: 200s celery_worker: - build: ../surfsense_backend + build: *backend-build volumes: - ../surfsense_backend/app:/app/app - shared_temp:/shared_tmp @@ -140,7 +145,7 @@ services: condition: service_healthy celery_beat: - build: ../surfsense_backend + build: *backend-build env_file: - ../surfsense_backend/.env environment: @@ -159,7 +164,7 @@ services: condition: service_started # flower: - # build: ../surfsense_backend + # build: *backend-build # ports: # - "${FLOWER_PORT:-5555}:5555" # env_file: diff --git a/docker/docker-compose.e2e.yml b/docker/docker-compose.e2e.yml new file mode 100644 index 000000000..2d55595f7 --- /dev/null +++ b/docker/docker-compose.e2e.yml @@ -0,0 +1,181 @@ +# ============================================================================= +# SurfSense — E2E Docker Compose stack +# ============================================================================= +# Hermetic backend stack for Playwright E2E tests: +# - db / redis on an internal-only network (no internet egress) +# - backend (FastAPI) joins the internal network AND a separate ingress +# bridge so the host runner can reach :8000 +# - celery_worker on the internal network only — zero egress surface +# +# The backend image is built from surfsense_backend/Dockerfile target=e2e, +# which adds tests/ via the `tests-source` additional context (tests/ is +# excluded from the main context by .dockerignore so production never ships +# test fakes). See surfsense_backend/Dockerfile for stage layout. +# +# Usage from repo root: +# docker compose -f docker/docker-compose.e2e.yml up -d --build --wait +# curl -X POST http://localhost:8000/auth/register ... +# ( run Playwright on host, pointing at localhost:8000 + localhost:3000 ) +# docker compose -f docker/docker-compose.e2e.yml down -v +# ============================================================================= + +name: surfsense-e2e + +x-backend-env: &backend-env + DATABASE_URL: postgresql+asyncpg://postgres:postgres@db:5432/surfsense_e2e + CELERY_BROKER_URL: redis://redis:6379/0 + CELERY_RESULT_BACKEND: redis://redis:6379/0 + REDIS_APP_URL: redis://redis:6379/0 + CELERY_TASK_DEFAULT_QUEUE: surfsense + SECRET_KEY: ci-test-secret-key-not-for-production + AUTH_TYPE: LOCAL + REGISTRATION_ENABLED: "TRUE" + ETL_SERVICE: DOCLING + EMBEDDING_MODEL: sentence-transformers/all-MiniLM-L6-v2 + NEXT_FRONTEND_URL: http://host.docker.internal:3000 + # Sentinel keys — fakes never read them; turns leaked real calls into 401s. + COMPOSIO_API_KEY: e2e-deny-real-call-sentinel + COMPOSIO_ENABLED: "TRUE" + OPENAI_API_KEY: e2e-deny-real-call-sentinel + ANTHROPIC_API_KEY: e2e-deny-real-call-sentinel + LITELLM_API_KEY: e2e-deny-real-call-sentinel + MICROSOFT_CLIENT_ID: fake-microsoft-client-id + MICROSOFT_CLIENT_SECRET: fake-microsoft-client-secret + ONEDRIVE_REDIRECT_URI: http://localhost:8000/api/v1/auth/onedrive/connector/callback + DROPBOX_APP_KEY: fake-dropbox-app-key + DROPBOX_APP_SECRET: fake-dropbox-app-secret + DROPBOX_REDIRECT_URI: http://localhost:8000/api/v1/auth/dropbox/connector/callback + # Defense-in-depth: even though L3 egress is denied for the worker via + # `internal: true`, the backend still has a route via `ingress`. Setting + # HTTPS_PROXY to an unreachable port turns any leaked Python outbound HTTP + # call into a fast Connection refused. UNLIKE the old runner-shell setup, + # this proxy is set on the container env and `uv` is never invoked here, + # so there is no interaction with uv's implicit-sync behaviour. + HTTPS_PROXY: http://127.0.0.1:1 + HTTP_PROXY: http://127.0.0.1:1 + NO_PROXY: localhost,127.0.0.1,0.0.0.0,db,redis,host.docker.internal + HF_HUB_OFFLINE: "1" + TRANSFORMERS_OFFLINE: "1" + # Test-only token-mint endpoint secret (see tests/e2e/run_backend.py). + E2E_MINT_SECRET: e2e-mint-secret-not-for-production + +services: + db: + image: pgvector/pgvector:pg17 + command: > + postgres + -c wal_level=logical + -c max_wal_senders=10 + -c max_replication_slots=10 + environment: + POSTGRES_USER: postgres + POSTGRES_PASSWORD: postgres + POSTGRES_DB: surfsense_e2e + # Ephemeral storage — every CI run gets a clean DB, no volume cleanup needed. + tmpfs: + - /var/lib/postgresql/data + healthcheck: + test: ["CMD-SHELL", "pg_isready -U postgres -d surfsense_e2e"] + interval: 2s + timeout: 3s + retries: 30 + networks: [internal] + + redis: + image: redis:8-alpine + healthcheck: + test: ["CMD", "redis-cli", "ping"] + interval: 2s + timeout: 3s + retries: 30 + networks: [internal] + + backend: + build: + context: ../surfsense_backend + dockerfile: Dockerfile + target: e2e + additional_contexts: + # tests/ is excluded from the main context by .dockerignore; + # the e2e stage's `COPY --from=tests-source` pulls it in here. + tests-source: ../surfsense_backend/tests + args: + EMBEDDING_MODEL: sentence-transformers/all-MiniLM-L6-v2 + cache_from: + - type=gha,scope=surfsense-e2e-backend + cache_to: + - type=gha,mode=max,scope=surfsense-e2e-backend + image: surfsense-e2e-backend:local + environment: + <<: *backend-env + SERVICE_ROLE: api + volumes: + - shared_temp:/shared_tmp + extra_hosts: + - "host.docker.internal:host-gateway" + ports: + - "8000:8000" + depends_on: + db: { condition: service_healthy } + redis: { condition: service_healthy } + healthcheck: + # Use Python (already in the image) instead of curl/wget to avoid + # depending on either tool being installed in the runtime layers. + test: + - CMD + - python + - -c + - | + import sys, urllib.request + try: + r = urllib.request.urlopen("http://localhost:8000/openapi.json", timeout=2) + sys.exit(0 if r.status == 200 else 1) + except Exception: + sys.exit(1) + interval: 3s + timeout: 5s + retries: 60 + start_period: 30s + networks: + - internal # to reach db/redis + - ingress # so host can reach :8000 + + celery_worker: + image: surfsense-e2e-backend:local + pull_policy: never + # No build: section — reuses the image built by the `backend` service. + # Compose v2 builds shared images exactly once across services that + # reference the same `image:` tag. + environment: + <<: *backend-env + SERVICE_ROLE: worker + volumes: + - shared_temp:/shared_tmp + depends_on: + backend: { condition: service_healthy } + healthcheck: + test: + - CMD-SHELL + - "celery -A app.celery_app inspect ping --timeout 2 | grep -q pong" + interval: 5s + timeout: 5s + retries: 12 + start_period: 20s + networks: [internal] + +networks: + # Internal network: containers attached only to this network have NO route + # to the host or the internet. This is the L3 deny-egress mechanism that + # replaces the fragile HTTPS_PROXY-on-the-runner approach. + internal: + driver: bridge + internal: true + + # Regular bridge network. Only the `backend` service joins it, solely so + # the host can reach :8000 via the published port. celery_worker / db / + # redis stay off this network entirely. + ingress: + driver: bridge + +volumes: + shared_temp: diff --git a/package.json b/package.json index 8a1a6add8..1e45c1706 100644 --- a/package.json +++ b/package.json @@ -1,5 +1,5 @@ { "name": "surfsense", "private": true, - "packageManager": "pnpm@10.24.0" + "packageManager": "pnpm@10.26.0" } diff --git a/surfsense_backend/.gitignore b/surfsense_backend/.gitignore index 1cd7fd32c..47fd53aef 100644 --- a/surfsense_backend/.gitignore +++ b/surfsense_backend/.gitignore @@ -13,5 +13,5 @@ celerybeat-schedule* celerybeat-schedule.* celerybeat-schedule.dir celerybeat-schedule.bak -global_llm_config.yaml +/app/config/global_llm_config.yaml app/templates/_generated/ \ No newline at end of file diff --git a/surfsense_backend/Dockerfile b/surfsense_backend/Dockerfile index 73d5819b9..6e1b2481e 100644 --- a/surfsense_backend/Dockerfile +++ b/surfsense_backend/Dockerfile @@ -1,8 +1,16 @@ -FROM python:3.12-slim +# ============================================================================= +# SurfSense Backend — Multi-stage Dockerfile +# ============================================================================= +# Graph: base → deps → models → {e2e, production} +# e2e — tests/ via additional_contexts (docker-compose.e2e.yml) +# production — published ghcr.io image (docker-build.yml pins target) +# ============================================================================= + +# ─── Stage 1: base (system deps, Pandoc, certificates) ────────────────────── +FROM python:3.12-slim AS base WORKDIR /app -# Install system dependencies including SSL tools, CUDA dependencies, and Tesseract OCR RUN apt-get update && apt-get install -y --no-install-recommends \ gcc \ python3-dev \ @@ -11,6 +19,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ wget \ unzip \ gnupg2 \ + ffmpeg \ espeak-ng \ libsndfile1 \ libgl1 \ @@ -22,21 +31,27 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ git \ && rm -rf /var/lib/apt/lists/* -# Install Pandoc 3.x from GitHub as a fallback for Linux where pypandoc_binary -# may not bundle pandoc (apt ships 2.17 which has broken table rendering). -# pypandoc_binary bundles pandoc on Windows/macOS; on Linux it picks this up. +RUN which ffmpeg && ffmpeg -version + +# Pandoc 3.x from GitHub Releases — apt ships 2.17 which has broken table rendering. +# pypandoc_binary bundles pandoc on Windows/macOS; on Linux it picks up this binary. RUN ARCH=$(dpkg --print-architecture) && \ wget -qO /tmp/pandoc.deb "https://github.com/jgm/pandoc/releases/download/3.9/pandoc-3.9-1-${ARCH}.deb" && \ dpkg -i /tmp/pandoc.deb && \ rm /tmp/pandoc.deb -# Update certificates and install SSL tools RUN update-ca-certificates RUN pip install --upgrade certifi pip-system-certs -# Copy requirements -COPY pyproject.toml . -COPY uv.lock . +ENV SSL_CERT_FILE=/usr/local/lib/python3.12/site-packages/certifi/cacert.pem +ENV REQUESTS_CA_BUNDLE=/usr/local/lib/python3.12/site-packages/certifi/cacert.pem +ENV SURFSENSE_ALLOW_STATIC_FFMPEG_DOWNLOAD=FALSE + + +# ─── Stage 2: deps (Python deps frozen from uv.lock) ──────────────────────── +FROM base AS deps + +COPY pyproject.toml uv.lock ./ # Install all Python dependencies from uv.lock for deterministic builds. # @@ -49,9 +64,7 @@ COPY uv.lock . # Note on torch/CUDA: we do NOT install torch from a separate cu* index here. # PyPI's torch wheels for Linux x86_64 already ship CUDA-enabled and pull # nvidia-cudnn-cu13, nvidia-nccl-cu13, triton, etc. as install deps (all -# captured in uv.lock). Installing from cu121 first only wasted ~2GB of -# downloads that the lock-based install immediately replaced. If a specific -# CUDA version is needed (driver compatibility, etc.), wire it through +# captured in uv.lock). If a specific CUDA version is needed, wire it through # [tool.uv.sources] in pyproject.toml so the lock stays the source of truth. RUN pip install --no-cache-dir uv && \ uv export --frozen --no-dev --no-hashes --no-emit-project \ @@ -59,49 +72,42 @@ RUN pip install --no-cache-dir uv && \ uv pip install --system --no-cache-dir -r /tmp/requirements.txt && \ rm /tmp/requirements.txt -# Set SSL environment variables dynamically -RUN CERTIFI_PATH=$(python -c "import certifi; print(certifi.where())") && \ - echo "Setting SSL_CERT_FILE to $CERTIFI_PATH" && \ - echo "export SSL_CERT_FILE=$CERTIFI_PATH" >> /root/.bashrc && \ - echo "export REQUESTS_CA_BUNDLE=$CERTIFI_PATH" >> /root/.bashrc -ENV SSL_CERT_FILE=/usr/local/lib/python3.12/site-packages/certifi/cacert.pem -ENV REQUESTS_CA_BUNDLE=/usr/local/lib/python3.12/site-packages/certifi/cacert.pem + +# ─── Stage 3: models (pre-baked offline assets) ───────────────────────────── +FROM deps AS models # Pre-download EasyOCR models to avoid runtime SSL issues -RUN mkdir -p /root/.EasyOCR/model -RUN wget --no-check-certificate https://github.com/JaidedAI/EasyOCR/releases/download/v1.3/english_g2.zip -O /root/.EasyOCR/model/english_g2.zip || true -RUN wget --no-check-certificate https://github.com/JaidedAI/EasyOCR/releases/download/pre-v1.1.6/craft_mlt_25k.zip -O /root/.EasyOCR/model/craft_mlt_25k.zip || true -RUN cd /root/.EasyOCR/model && (unzip -o english_g2.zip || true) && (unzip -o craft_mlt_25k.zip || true) +RUN mkdir -p /root/.EasyOCR/model && \ + wget --no-check-certificate https://github.com/JaidedAI/EasyOCR/releases/download/v1.3/english_g2.zip -O /root/.EasyOCR/model/english_g2.zip || true && \ + wget --no-check-certificate https://github.com/JaidedAI/EasyOCR/releases/download/pre-v1.1.6/craft_mlt_25k.zip -O /root/.EasyOCR/model/craft_mlt_25k.zip || true && \ + cd /root/.EasyOCR/model && \ + (unzip -o english_g2.zip || true) && \ + (unzip -o craft_mlt_25k.zip || true) # Pre-download Docling models -RUN python -c "try:\n from docling.document_converter import DocumentConverter\n conv = DocumentConverter()\nexcept:\n pass" || true +RUN printf '%s\n' \ + 'try:' \ + ' from docling.document_converter import DocumentConverter' \ + ' DocumentConverter()' \ + 'except Exception:' \ + ' pass' \ + | python || true -# Install Playwright browsers for web scraping (the playwright package itself -# is already installed via uv.lock above) +ARG EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2 +RUN python -c "from chonkie import AutoEmbeddings; AutoEmbeddings.get_embeddings('${EMBEDDING_MODEL}')" + +# Install Playwright browsers (the playwright python package itself is in deps) RUN playwright install chromium --with-deps -# Copy source code -COPY . . - -# Install the project itself in editable mode. Dependencies were already -# installed deterministically from uv.lock above, so --no-deps prevents any -# re-resolution that could pull newer versions. -RUN uv pip install --system --no-cache-dir --no-deps -e . - -# Copy and set permissions for entrypoint script -# Use dos2unix to ensure LF line endings (fixes CRLF issues from Windows checkouts) -COPY scripts/docker/entrypoint.sh /app/scripts/docker/entrypoint.sh -RUN dos2unix /app/scripts/docker/entrypoint.sh && chmod +x /app/scripts/docker/entrypoint.sh - # Shared temp directory for file uploads between API and Worker containers. # Python's tempfile module uses TMPDIR, so uploaded files land here. # Mount the SAME volume at /shared_tmp on both API and Worker in Coolify. RUN mkdir -p /shared_tmp -ENV TMPDIR=/shared_tmp -# Prevent uvloop compatibility issues ENV PYTHONPATH=/app ENV UVICORN_LOOP=asyncio +ENV TMPDIR=/shared_tmp +ENV PYTHONUNBUFFERED=1 # Tune glibc malloc to return freed memory to the OS more aggressively. # Without these, Python's gc.collect() frees objects but the underlying @@ -110,6 +116,56 @@ ENV MALLOC_MMAP_THRESHOLD_=65536 ENV MALLOC_TRIM_THRESHOLD_=131072 ENV MALLOC_MMAP_MAX_=65536 + +# ─── Stage 4: e2e (production source + tests/ + e2e entrypoint) ───────────── +# Built via `docker buildx build --target e2e`. The default build target is +# `production` (the last stage), so this stage is opt-in for CI only. +# +# `tests/` is excluded from the main build context by .dockerignore (so prod +# can never accidentally ship test fakes). The e2e stage receives tests/ +# through an "additional context" passed by docker-compose.e2e.yml — see +# https://docs.docker.com/reference/compose-file/build/#additional_contexts +FROM models AS e2e + +# Same source copy as production. .dockerignore filters out tests/. +COPY . . + +# Bring tests/ in via the named additional build context. CI passes +# --build-context tests-source=./tests +# (or the equivalent additional_contexts entry in docker-compose.e2e.yml). +COPY --from=tests-source . ./tests/ + +# Install the project itself in editable mode. Dependencies were already +# installed deterministically from uv.lock above, so --no-deps prevents any +# re-resolution that could pull newer versions. +RUN uv pip install --system --no-cache-dir --no-deps -e . + +COPY scripts/docker/entrypoint.e2e.sh /app/scripts/docker/entrypoint.e2e.sh +RUN dos2unix /app/scripts/docker/entrypoint.e2e.sh && chmod +x /app/scripts/docker/entrypoint.e2e.sh + +# SERVICE_ROLE is overridden per service in docker-compose.e2e.yml (api / worker). +ENV SERVICE_ROLE=api + +EXPOSE 8000-8001 +CMD ["/app/scripts/docker/entrypoint.e2e.sh"] + + +# ─── Stage 5: production (published ghcr.io image) ────────────────────────── +# CI pins `target: production`; also the default for `docker build` / dev compose. +FROM models AS production + +# Copy source code (tests/ excluded by .dockerignore — production never ships tests). +COPY . . + +# Install the project itself in editable mode. Dependencies were already +# installed deterministically from uv.lock above, so --no-deps prevents any +# re-resolution that could pull newer versions. +RUN uv pip install --system --no-cache-dir --no-deps -e . + +# Use dos2unix to ensure LF line endings (fixes CRLF issues from Windows checkouts) +COPY scripts/docker/entrypoint.sh /app/scripts/docker/entrypoint.sh +RUN dos2unix /app/scripts/docker/entrypoint.sh && chmod +x /app/scripts/docker/entrypoint.sh + # SERVICE_ROLE controls which process this container runs: # api – FastAPI backend only (runs migrations on startup) # worker – Celery worker only @@ -127,6 +183,5 @@ ENV CELERY_MAX_TASKS_PER_CHILD=50 # "" – both queues (default, for single-worker setups) ENV CELERY_QUEUES="" -# Run EXPOSE 8000-8001 -CMD ["/app/scripts/docker/entrypoint.sh"] \ No newline at end of file +CMD ["/app/scripts/docker/entrypoint.sh"] diff --git a/surfsense_backend/alembic/env.py b/surfsense_backend/alembic/env.py index bd8c20356..5354211aa 100644 --- a/surfsense_backend/alembic/env.py +++ b/surfsense_backend/alembic/env.py @@ -67,7 +67,11 @@ def run_migrations_offline() -> None: def do_run_migrations(connection: Connection) -> None: - context.configure(connection=connection, target_metadata=target_metadata) + context.configure( + connection=connection, + target_metadata=target_metadata, + transaction_per_migration=True, + ) with context.begin_transaction(): context.run_migrations() diff --git a/surfsense_backend/alembic/versions/130_add_agent_action_log.py b/surfsense_backend/alembic/versions/130_add_agent_action_log.py index f86a8a3b5..5978848d0 100644 --- a/surfsense_backend/alembic/versions/130_add_agent_action_log.py +++ b/surfsense_backend/alembic/versions/130_add_agent_action_log.py @@ -26,6 +26,10 @@ depends_on: str | Sequence[str] | None = None def upgrade() -> None: + bind = op.get_bind() + if sa.inspect(bind).has_table("agent_action_log"): + return + op.create_table( "agent_action_log", sa.Column("id", sa.Integer(), primary_key=True, index=True), diff --git a/surfsense_backend/alembic/versions/131_add_document_revisions.py b/surfsense_backend/alembic/versions/131_add_document_revisions.py index 95ce0e032..c1e9b6068 100644 --- a/surfsense_backend/alembic/versions/131_add_document_revisions.py +++ b/surfsense_backend/alembic/versions/131_add_document_revisions.py @@ -29,6 +29,21 @@ depends_on: str | Sequence[str] | None = None def upgrade() -> None: + bind = op.get_bind() + inspector = sa.inspect(bind) + + if inspector.has_table("document_revisions") and inspector.has_table( + "folder_revisions" + ): + return + + if not inspector.has_table("document_revisions"): + _create_document_revisions() + if not inspector.has_table("folder_revisions"): + _create_folder_revisions() + + +def _create_document_revisions() -> None: op.create_table( "document_revisions", sa.Column("id", sa.Integer(), primary_key=True, index=True), @@ -74,6 +89,8 @@ def upgrade() -> None: ), ) + +def _create_folder_revisions() -> None: op.create_table( "folder_revisions", sa.Column("id", sa.Integer(), primary_key=True, index=True), diff --git a/surfsense_backend/alembic/versions/132_add_agent_permission_rules.py b/surfsense_backend/alembic/versions/132_add_agent_permission_rules.py index ff5b52e18..1ee3cd2f0 100644 --- a/surfsense_backend/alembic/versions/132_add_agent_permission_rules.py +++ b/surfsense_backend/alembic/versions/132_add_agent_permission_rules.py @@ -26,6 +26,10 @@ depends_on: str | Sequence[str] | None = None def upgrade() -> None: + bind = op.get_bind() + if sa.inspect(bind).has_table("agent_permission_rules"): + return + op.create_table( "agent_permission_rules", sa.Column("id", sa.Integer(), primary_key=True, index=True), diff --git a/surfsense_backend/alembic/versions/135_action_log_correlation_ids.py b/surfsense_backend/alembic/versions/135_action_log_correlation_ids.py index 9ae368b81..e40c4fb26 100644 --- a/surfsense_backend/alembic/versions/135_action_log_correlation_ids.py +++ b/surfsense_backend/alembic/versions/135_action_log_correlation_ids.py @@ -50,29 +50,39 @@ depends_on: str | Sequence[str] | None = None def upgrade() -> None: - op.add_column( - "agent_action_log", - sa.Column("tool_call_id", sa.String(length=64), nullable=True), - ) - op.add_column( - "agent_action_log", - sa.Column("chat_turn_id", sa.String(length=64), nullable=True), - ) + bind = op.get_bind() + inspector = sa.inspect(bind) + columns = {c["name"] for c in inspector.get_columns("agent_action_log")} + indexes = {i["name"] for i in inspector.get_indexes("agent_action_log")} - op.create_index( - "ix_agent_action_log_tool_call_id", - "agent_action_log", - ["tool_call_id"], - ) - op.create_index( - "ix_agent_action_log_chat_turn_id", - "agent_action_log", - ["chat_turn_id"], - ) + if "tool_call_id" not in columns: + op.add_column( + "agent_action_log", + sa.Column("tool_call_id", sa.String(length=64), nullable=True), + ) + if "chat_turn_id" not in columns: + op.add_column( + "agent_action_log", + sa.Column("chat_turn_id", sa.String(length=64), nullable=True), + ) - op.execute( - "UPDATE agent_action_log SET tool_call_id = turn_id WHERE tool_call_id IS NULL" - ) + if "ix_agent_action_log_tool_call_id" not in indexes: + op.create_index( + "ix_agent_action_log_tool_call_id", + "agent_action_log", + ["tool_call_id"], + ) + if "ix_agent_action_log_chat_turn_id" not in indexes: + op.create_index( + "ix_agent_action_log_chat_turn_id", + "agent_action_log", + ["chat_turn_id"], + ) + + if "turn_id" in columns: + op.execute( + "UPDATE agent_action_log SET tool_call_id = turn_id WHERE tool_call_id IS NULL" + ) def downgrade() -> None: diff --git a/surfsense_backend/alembic/versions/136_new_chat_message_turn_id.py b/surfsense_backend/alembic/versions/136_new_chat_message_turn_id.py index 8d4350424..ee02e453c 100644 --- a/surfsense_backend/alembic/versions/136_new_chat_message_turn_id.py +++ b/surfsense_backend/alembic/versions/136_new_chat_message_turn_id.py @@ -36,15 +36,22 @@ depends_on: str | Sequence[str] | None = None def upgrade() -> None: - op.add_column( - "new_chat_messages", - sa.Column("turn_id", sa.String(length=64), nullable=True), - ) - op.create_index( - "ix_new_chat_messages_turn_id", - "new_chat_messages", - ["turn_id"], - ) + bind = op.get_bind() + inspector = sa.inspect(bind) + columns = {c["name"] for c in inspector.get_columns("new_chat_messages")} + indexes = {i["name"] for i in inspector.get_indexes("new_chat_messages")} + + if "turn_id" not in columns: + op.add_column( + "new_chat_messages", + sa.Column("turn_id", sa.String(length=64), nullable=True), + ) + if "ix_new_chat_messages_turn_id" not in indexes: + op.create_index( + "ix_new_chat_messages_turn_id", + "new_chat_messages", + ["turn_id"], + ) def downgrade() -> None: diff --git a/surfsense_backend/alembic/versions/137_unique_reverse_of_in_action_log.py b/surfsense_backend/alembic/versions/137_unique_reverse_of_in_action_log.py index d606a00f9..47421e712 100644 --- a/surfsense_backend/alembic/versions/137_unique_reverse_of_in_action_log.py +++ b/surfsense_backend/alembic/versions/137_unique_reverse_of_in_action_log.py @@ -27,6 +27,8 @@ from __future__ import annotations from collections.abc import Sequence +import sqlalchemy as sa + from alembic import op revision: str = "137" @@ -39,6 +41,11 @@ _INDEX_NAME = "ux_agent_action_log_reverse_of" def upgrade() -> None: + bind = op.get_bind() + indexes = {i["name"] for i in sa.inspect(bind).get_indexes("agent_action_log")} + if _INDEX_NAME in indexes: + return + # Defensively de-dup any pre-existing double-revert rows before # adding the unique index. Keeps the OLDEST row (smallest id) and # NULLs out the duplicates' ``reverse_of`` so they survive as audit diff --git a/surfsense_backend/alembic/versions/141_unique_chat_message_turn_role.py b/surfsense_backend/alembic/versions/141_unique_chat_message_turn_role.py index 9a27e7ed0..1226a59b4 100644 --- a/surfsense_backend/alembic/versions/141_unique_chat_message_turn_role.py +++ b/surfsense_backend/alembic/versions/141_unique_chat_message_turn_role.py @@ -53,6 +53,11 @@ TABLE_NAME = "new_chat_messages" def upgrade() -> None: + bind = op.get_bind() + indexes = {i["name"] for i in sa.inspect(bind).get_indexes(TABLE_NAME)} + if INDEX_NAME in indexes: + return + op.create_index( INDEX_NAME, TABLE_NAME, diff --git a/surfsense_backend/app/config/__init__.py b/surfsense_backend/app/config/__init__.py index f6f0c7f62..448818e88 100644 --- a/surfsense_backend/app/config/__init__.py +++ b/surfsense_backend/app/config/__init__.py @@ -473,10 +473,16 @@ def initialize_vision_llm_router(): class Config: # Check if ffmpeg is installed if not is_ffmpeg_installed(): - import static_ffmpeg + allow_static_ffmpeg = ( + os.getenv("SURFSENSE_ALLOW_STATIC_FFMPEG_DOWNLOAD", "TRUE").upper() + == "TRUE" + ) + if allow_static_ffmpeg: + import static_ffmpeg + + # ffmpeg installed on first call to add_paths(), threadsafe. + static_ffmpeg.add_paths() - # ffmpeg installed on first call to add_paths(), threadsafe. - static_ffmpeg.add_paths() # check if ffmpeg is installed again if not is_ffmpeg_installed(): raise ValueError( diff --git a/surfsense_backend/scripts/docker/entrypoint.e2e.sh b/surfsense_backend/scripts/docker/entrypoint.e2e.sh new file mode 100755 index 000000000..b44e1ee95 --- /dev/null +++ b/surfsense_backend/scripts/docker/entrypoint.e2e.sh @@ -0,0 +1,53 @@ +#!/bin/bash +# ============================================================================= +# E2E entrypoint for the multi-stage Dockerfile's `e2e` target. +# +# Dispatches on SERVICE_ROLE to the test-only entrypoints under tests/e2e/. +# Those scripts apply sys.modules hijacks and LLM/embedding patches BEFORE +# importing production app code (see tests/e2e/run_backend.py for rationale). +# +# Production never sees this file: tests/ is excluded from the production +# stage, and the production stage uses scripts/docker/entrypoint.sh. +# ============================================================================= +set -euo pipefail + +SERVICE_ROLE="${SERVICE_ROLE:-api}" +echo "[e2e-entrypoint] starting role=${SERVICE_ROLE}" + +wait_for_db() { + # Block until the database is reachable. We don't loop forever — Compose + # depends_on/healthchecks already gate on db readiness, this is just + # belt-and-suspenders so a slow first connection doesn't race migrations. + for i in {1..60}; do + echo "[e2e-entrypoint] db check attempt ${i}/60" + if python -c "from app.db import engine; import asyncio; asyncio.run(engine.dispose())"; then + echo "[e2e-entrypoint] db reachable after ${i} attempts" + return 0 + fi + sleep 1 + done + echo "[e2e-entrypoint] ERROR: db not reachable after 60s" >&2 + return 1 +} + +case "${SERVICE_ROLE}" in + api) + wait_for_db + echo "[e2e-entrypoint] running alembic upgrade head" + alembic upgrade head + # `exec` so SIGTERM from `docker stop` reaches Python directly, + # without a shell wrapper interposing. + exec python tests/e2e/run_backend.py + ;; + worker) + # Worker doesn't run migrations — the api role does that exactly once. + # We still wait for db so Celery's broker connection check doesn't + # race against an unready Postgres on cold start. + wait_for_db + exec python tests/e2e/run_celery.py + ;; + *) + echo "[e2e-entrypoint] ERROR: unknown SERVICE_ROLE='${SERVICE_ROLE}' (expected: api | worker)" >&2 + exit 1 + ;; +esac diff --git a/surfsense_backend/tests/e2e/README.md b/surfsense_backend/tests/e2e/README.md index 800d61dfb..caa0f89b0 100644 --- a/surfsense_backend/tests/e2e/README.md +++ b/surfsense_backend/tests/e2e/README.md @@ -1,48 +1,48 @@ -# Backend E2E Test Harness +# Backend E2E Harness -Strict fakes + alternative entrypoints used **only** by Playwright E2E. -Excluded from the production Docker image via `.dockerignore`. +This directory contains the test-only backend entrypoints and fakes used by +Playwright. They are not part of the production image: `.dockerignore` excludes +`tests/`, and the E2E Docker stage copies this directory through a separate +build context. ## Files -| Path | Role | -| -------------------------------- | ------------------------------------------------------------------------------- | -| `run_backend.py` | FastAPI entrypoint that hijacks `sys.modules` before importing `app.app:app` | -| `run_celery.py` | Celery worker entrypoint with the same hijack + patch logic | -| `middleware/scenario.py` | `X-E2E-Scenario` header → ContextVar (read by fakes) | -| `fakes/composio_module.py` | Strict drop-in for the `composio` package; raises on unknown surface | -| `fakes/llm.py` | `fake_get_user_long_context_llm` returning a `FakeListChatModel` | -| `fakes/embeddings.py` | Deterministic 0.1-vector `embed_text` / `embed_texts` | -| `fakes/fixtures/drive_files.json`| Canned Drive listings + file contents (incl. canary tokens) | +| Path | Purpose | +| --- | --- | +| `run_backend.py` | Starts FastAPI after installing the test fakes into `sys.modules`. | +| `run_celery.py` | Starts the Celery worker with the same fake setup. | +| `middleware/scenario.py` | Reads `X-E2E-Scenario` into a request-scoped context var. | +| `fakes/composio_module.py` | Fake `composio` package used by connector flows. | +| `fakes/llm.py` | Fake chat model factory. | +| `fakes/embeddings.py` | Deterministic embedding helpers. | +| `fakes/fixtures/drive_files.json` | Drive fixture data and canary file contents. | -## Why a sys.modules hijack? +## Why the import hook exists -Production code does `from composio import Composio` at module load -time. By the time the FastAPI app object exists, that binding has -already been resolved. The hijack runs **before** any `app.*` import, -so the binding resolves to our strict fake. No production source -changes; fakes are physically excluded from production images. +Some production modules import SDK clients at module load time, for example +`from composio import Composio`. By the time `app.app` has been imported, those +bindings are already fixed. -Belt + suspenders + no internet: the strict `__getattr__` in every -fake raises `NotImplementedError` if a future production code path -introduces a new SDK call. CI also sets `HTTPS_PROXY=http://127.0.0.1:1` -plus sentinel API keys so any leaked outbound HTTP fails immediately. +The E2E entrypoints install fake modules in `sys.modules` before importing any +`app.*` module. That lets the normal production code run while SDK calls resolve +to local fakes. -## Adding a new fake +The fakes should fail loudly. If production starts using a new SDK method that +the fake does not implement, add that method to the fake instead of letting the +test call the real service. -1. Create `fakes/_module.py` modelled on `composio_module.py`. -2. In `run_backend.py` and `run_celery.py`, register - `sys.modules[""] = _fake_` before the `from app.app import app` - line. -3. If the new fake needs scenario branching, read from +## Adding a fake + +1. Add `fakes/_module.py`. +2. Register it in both `run_backend.py` and `run_celery.py` before importing + `app.app` or `app.celery_app`. +3. If the fake needs per-test behavior, read the current scenario from `tests.e2e.middleware.scenario.current_scenario()`. -## Reused by backend integration tests +## Shared with backend integration tests -The strict fakes are not only for Playwright. Backend route integration -tests can import the same fake before importing `app.app`, so Composio -route tests exercise production route code without touching the real -SDK: +Backend integration tests can use the same fakes when they need production route +code without the real SDK: ```python from tests.e2e.fakes import composio_module as _fake_composio @@ -50,20 +50,93 @@ sys.modules["composio"] = _fake_composio from app.app import app ``` -See `surfsense_backend/tests/integration/composio/conftest.py` for the -current pattern. +See `surfsense_backend/tests/integration/composio/conftest.py` for the current +pattern. ## Running locally +The recommended local flow runs only Postgres and Redis in Docker, and the +backend + Celery worker on the host. No `.env` file is required: both +entrypoints `setdefault` every variable they need (DB URL, Redis URL, +sentinel API keys, etc.) to values that match `docker-compose.deps-only.yml`. + +### One-time setup + +From `surfsense_web/`: + ```bash -cd surfsense_backend +pnpm install +pnpm exec playwright install --with-deps chromium +``` + +### Each run + +**1. Bring up Postgres + Redis** from the repo root (the other deps-only +services (SearXNG, Zero, pgAdmin) are not needed for E2E): + +```bash +docker compose -f docker/docker-compose.deps-only.yml up -d db redis +``` + +**2. Start the backend** in `surfsense_backend/`, terminal A: + +```bash +uv sync +uv run alembic upgrade head uv run python tests/e2e/run_backend.py -# in a second shell: +``` + +**3. Start the Celery worker** in `surfsense_backend/`, terminal B: + +```bash uv run python tests/e2e/run_celery.py ``` -Then in `surfsense_web`: +**4. Register the Playwright user**: ```bash -pnpm test:e2e +curl -X POST http://localhost:8000/auth/register \ + -H "Content-Type: application/json" \ + -d '{"email":"e2e-test@surfsense.net","password":"E2eTestPassword123!"}' ``` + +**5. Run Playwright** from `surfsense_web/`, terminal C: + +```bash +pnpm test:e2e # dev server (fast iteration) +pnpm test:e2e:headed # show the browser +pnpm test:e2e:ui # Playwright UI mode +pnpm test:e2e:prod # build + start (matches CI exactly) +``` + +`playwright.config.ts` and the run scripts share defaults, so this works on a +fresh checkout. Set `PLAYWRIGHT_TEST_EMAIL`, `PLAYWRIGHT_TEST_PASSWORD`, +`NEXT_PUBLIC_FASTAPI_BACKEND_URL`, or any backend env (e.g. `DATABASE_URL`) +only when pointing tests at a different stack. + +### Cleanup + +```bash +docker compose -f docker/docker-compose.deps-only.yml down +``` + +Add `-v` to also wipe the Postgres volume. + +### Hermetic alternative (matches CI) + +To reproduce the CI environment exactly — backend and Celery in containers, +network egress denied at L3 — replace steps 1–3 with: + +```bash +docker compose -f docker/docker-compose.e2e.yml up -d --build --wait +``` + +Then run steps 4 (curl register) and 5 (`pnpm test:e2e:prod`) as above. Tear +down with: + +```bash +docker compose -f docker/docker-compose.e2e.yml down -v --remove-orphans +``` + +This builds the ~9 GB `surfsense-e2e-backend:local` image, so the deps-only +flow above is faster for day-to-day development. diff --git a/surfsense_backend/tests/e2e/auth_mint.py b/surfsense_backend/tests/e2e/auth_mint.py new file mode 100644 index 000000000..f489ed274 --- /dev/null +++ b/surfsense_backend/tests/e2e/auth_mint.py @@ -0,0 +1,66 @@ +"""Test-only token mint endpoint for the E2E backend entrypoint. + +Mounted by ``tests/e2e/run_backend.py`` so Playwright can authenticate +the seeded e2e user without hitting ``/auth/jwt/login`` (rate-limited +to 5/min/IP in production). NEVER ships to production: this whole +``tests/`` tree is excluded from the production Docker image by +``surfsense_backend/.dockerignore``. + +Authn: shared secret in ``X-E2E-Mint-Secret``. Same value is set on the +backend container env (``docker/docker-compose.e2e.yml``) and exported +to the Playwright runner (``.github/workflows/e2e-tests.yml``). +""" + +from __future__ import annotations + +import logging +import os + +from fastapi import APIRouter, FastAPI, Header, HTTPException +from pydantic import BaseModel +from sqlalchemy import select + +from app.db import User, async_session_maker +from app.users import get_jwt_strategy + +_logger = logging.getLogger("surfsense.e2e.auth_mint") + + +class MintRequest(BaseModel): + email: str = "e2e-test@surfsense.net" + + +class MintResponse(BaseModel): + access_token: str + token_type: str = "bearer" + + +def _expected_secret() -> str: + return os.environ.get("E2E_MINT_SECRET", "local-e2e-mint-secret-not-for-production") + + +router = APIRouter(prefix="/__e2e__", tags=["__e2e__"]) + + +@router.post("/auth/token", response_model=MintResponse) +async def mint_test_token( + body: MintRequest, + x_e2e_mint_secret: str = Header(..., alias="X-E2E-Mint-Secret"), +) -> MintResponse: + if x_e2e_mint_secret != _expected_secret(): + raise HTTPException(status_code=403, detail="invalid e2e mint secret") + async with async_session_maker() as session: + result = await session.execute(select(User).where(User.email == body.email)) + user = result.scalar_one_or_none() + if user is None: + raise HTTPException( + status_code=404, detail=f"e2e user {body.email!r} not seeded" + ) + token = await get_jwt_strategy().write_token(user) + return MintResponse(access_token=token) + + +def install(app: FastAPI) -> None: + """Mount the test-only mint router onto the given FastAPI app.""" + app.include_router(router) + _logger.warning("[e2e] mounted POST /__e2e__/auth/token (test-only token mint)") diff --git a/surfsense_backend/tests/e2e/fakes/docling_service.py b/surfsense_backend/tests/e2e/fakes/docling_service.py new file mode 100644 index 000000000..9dd09d603 --- /dev/null +++ b/surfsense_backend/tests/e2e/fakes/docling_service.py @@ -0,0 +1,141 @@ +"""Stub DoclingService.process_document for E2E. + +The real ``DoclingService.process_document`` calls +``DocumentConverter.convert(file_path)`` which lazily downloads the +``docling-project/docling-layout-heron`` model from Hugging Face Hub. +The hermetic E2E container sets ``HF_HUB_OFFLINE=1`` (see +``docker/docker-compose.e2e.yml``), so that download fails with +``LocalEntryNotFoundError`` and the indexing Celery task retries until +the Playwright test hits its ~4-minute step timeout. In CI that is the +difference between the suite finishing and the 30-minute job timeout +killing the run before any report can upload. + +Stubbing ``process_document`` bypasses ``DocumentConverter.convert()`` +entirely. ``DoclingService.__init__`` is intentionally left untouched +because constructing ``DocumentConverter(...)`` is cheap and offline — +it is only ``.convert()`` that triggers the offline-model download. + +Every canary PDF under ``tests/e2e/fakes/fixtures/binary/`` is produced +by ``generate_canary_pdfs.py`` and embeds its canary token as plain +``(text) Tj`` PDF text operators. Extracting those operators gives us +the canary string back, which is what the Playwright assertions look +for in the resulting Document row. +""" + +from __future__ import annotations + +import logging +import re +from pathlib import Path +from typing import Any + +logger = logging.getLogger(__name__) + +# Matches the `(escaped text) Tj` text-show operator emitted by +# generate_canary_pdfs.py. Inside the parens, the escape rules are: +# \\ -> backslash +# \( -> literal ( +# \) -> literal ) +# The character class [^\\()] consumes any non-escape byte; \\. consumes +# an escape sequence. Sufficient for our synthetic fixtures. +_TJ_PATTERN = re.compile(rb"\(((?:[^\\()]|\\.)*)\)\s*Tj") + + +def _extract_text_from_synthetic_pdf(file_path: str) -> str: + """Pull every ``(text) Tj`` payload out of a fixture PDF in order. + + Returns an empty string if the file cannot be read. We do not try to + handle arbitrary PDFs because the fake is only ever invoked against + fixtures we generate ourselves. + """ + try: + data = Path(file_path).read_bytes() + except OSError as exc: + logger.warning("[fake-docling] could not read %s: %s", file_path, exc) + return "" + + lines: list[str] = [] + for match in _TJ_PATTERN.finditer(data): + raw = match.group(1) + # Order-sensitive unescape via sentinel: protect `\\` first so + # the subsequent `\(` / `\)` passes do not corrupt it. + text = ( + raw.replace(rb"\\", b"\x00") + .replace(rb"\(", b"(") + .replace(rb"\)", b")") + .replace(b"\x00", b"\\") + ) + try: + lines.append(text.decode("utf-8")) + except UnicodeDecodeError: + lines.append(text.decode("latin-1")) + return "\n".join(lines) + + +async def fake_process_document( + self, + file_path: str, + filename: str | None = None, +) -> dict[str, Any]: + """Drop-in replacement for ``DoclingService.process_document``. + + Returns the same dict shape as the production method so callers + (``app/etl_pipeline/parsers/docling.py``) can keep reading + ``result["content"]`` without changes. + """ + extracted = _extract_text_from_synthetic_pdf(file_path) + display_name = filename or Path(file_path).name + + if extracted: + content = f"# {display_name}\n\n{extracted}\n" + else: + # Empty fallback so the indexing pipeline does not error out on + # an unexpected payload. A failing canary assertion is a much + # clearer failure mode than a hard parser exception. + content = ( + f"# {display_name}\n\n(empty docling fake — no text-show operators found)\n" + ) + + logger.info( + "[fake-docling] returning %d chars for %s", + len(content), + display_name, + ) + + return { + "content": content, + "full_text": content, + "service_used": "docling-fake", + "status": "success", + "processing_notes": "e2e fake DoclingService — no real PDF parsing", + } + + +def install(patches: list[Any]) -> None: + """Patch ``DoclingService.process_document`` at the class level. + + Patching the class method (rather than each call site) is correct + here because every consumer goes through + ``create_docling_service()`` → ``DoclingService()`` → instance method + dispatch, so the descriptor protocol picks up our replacement. There + is exactly one such consumer today + (``app/etl_pipeline/parsers/docling.py``), but patching the class is + future-proof. + + Fails loud rather than warning, because a silent passthrough means + real Docling + ``HF_HUB_OFFLINE=1`` = 4 minutes of CI hang per test. + """ + from unittest.mock import patch as _patch + + target = "app.services.docling_service.DoclingService.process_document" + try: + p = _patch(target, fake_process_document) + p.start() + patches.append(p) + logger.info("[fake-docling] patched %s", target) + except (ModuleNotFoundError, AttributeError) as exc: + raise RuntimeError( + f"Could not patch Docling binding {target!r}: {exc!s}. " + f"Update surfsense_backend/tests/e2e/fakes/docling_service.py " + f"to point at the new binding site." + ) from exc diff --git a/surfsense_backend/tests/e2e/fixtures/global_llm_config.yaml b/surfsense_backend/tests/e2e/fixtures/global_llm_config.yaml new file mode 100644 index 000000000..017fa1eb3 --- /dev/null +++ b/surfsense_backend/tests/e2e/fixtures/global_llm_config.yaml @@ -0,0 +1,71 @@ +# Synthetic Global LLM configuration for E2E ONLY. +# +# Why this file exists: +# surfsense_backend/app/config/global_llm_config.yaml is gitignored +# (operators ship real API keys there). In CI that file does not exist, +# so app.config.load_global_llm_configs() returns [], every chat-stream +# test fails fast with "No usable global LLM configs are available for +# Auto mode" raised by auto_model_pin_service._global_candidates(). +# +# What this file does: +# tests/e2e/run_backend.py and tests/e2e/run_celery.py copy this file +# to app/config/global_llm_config.yaml at startup, BEFORE app.config +# is imported. The copy lives only inside the E2E Docker container. +# +# Why a fake api_key is safe: +# tests.e2e.fakes.chat_llm patches +# app.tasks.chat.stream_new_chat.create_chat_litellm_from_agent_config +# app.tasks.chat.stream_new_chat.create_chat_litellm_from_config +# so the resolved auto-pin id is never sent to a real LLM provider. +# The values below only need to pass +# auto_model_pin_service._is_usable_global_config() +# which requires id / model_name / provider / api_key all truthy. +# +# Why TWO entries (premium + free): +# auto_model_pin_service.resolve_or_get_pinned_llm_config_id() splits +# candidates by billing_tier based on _is_premium_eligible(user): +# premium_eligible == True -> keeps only tier=="premium" configs +# premium_eligible == False -> keeps only tier!="premium" configs +# A single-tier fixture would fail one of the two branches with +# "Auto mode could not find an eligible LLM config for this user and +# quota state". Shipping one of each guarantees every quota state +# resolves to a viable pin in E2E. + +router_settings: + routing_strategy: "simple-shuffle" + num_retries: 0 + allowed_fails: 1 + cooldown_time: 1 + +global_llm_configs: + - id: -9001 + name: "E2E Fake Auto Model (premium)" + billing_tier: "premium" + anonymous_enabled: false + seo_enabled: false + quality_score: 1.0 + provider: "OPENAI" + model_name: "fake-e2e-model-premium" + api_key: "fake-e2e-api-key-not-for-production" + supports_image_input: false + quota_reserve_tokens: 1024 + rpm: 1000 + tpm: 100000 + litellm_params: + model: "openai/fake-e2e-model-premium" + + - id: -9002 + name: "E2E Fake Auto Model (free)" + billing_tier: "free" + anonymous_enabled: false + seo_enabled: false + quality_score: 1.0 + provider: "OPENAI" + model_name: "fake-e2e-model-free" + api_key: "fake-e2e-api-key-not-for-production" + supports_image_input: false + quota_reserve_tokens: 1024 + rpm: 1000 + tpm: 100000 + litellm_params: + model: "openai/fake-e2e-model-free" diff --git a/surfsense_backend/tests/e2e/run_backend.py b/surfsense_backend/tests/e2e/run_backend.py index 4156a4ea4..5a787ac52 100644 --- a/surfsense_backend/tests/e2e/run_backend.py +++ b/surfsense_backend/tests/e2e/run_backend.py @@ -23,15 +23,12 @@ Usage: from __future__ import annotations +import asyncio import logging import os import sys -# --------------------------------------------------------------------------- -# 1) Hijack sys.modules BEFORE any production import. -# Production: composio_service.py:11 does `from composio import Composio`. -# With this hijack in place, that import resolves to our strict fake. -# --------------------------------------------------------------------------- +import uvicorn # Make the surfsense_backend root importable as a top-level package so # `import tests.e2e.fakes...` works regardless of how the entrypoint is @@ -42,97 +39,175 @@ _BACKEND_ROOT = os.path.abspath(os.path.join(_THIS_DIR, "..", "..")) if _BACKEND_ROOT not in sys.path: sys.path.insert(0, _BACKEND_ROOT) -import tests.e2e.fakes.composio_module as _fake_composio # noqa: E402 -import tests.e2e.fakes.notion_module as _fake_notion # noqa: E402 -sys.modules["composio"] = _fake_composio -sys.modules["notion_client"] = _fake_notion -sys.modules["notion_client.errors"] = _fake_notion.errors - - -# --------------------------------------------------------------------------- -# 2) Standard logging + dotenv so the rest of the app behaves like main.py. -# --------------------------------------------------------------------------- - -from dotenv import load_dotenv # noqa: E402 - -load_dotenv() -os.environ.setdefault("ATLASSIAN_CLIENT_ID", "fake-atlassian-client-id") -os.environ.setdefault("ATLASSIAN_CLIENT_SECRET", "fake-atlassian-client-secret") -os.environ.setdefault( - "CONFLUENCE_REDIRECT_URI", - "http://localhost:8000/api/v1/auth/confluence/connector/callback", -) -os.environ.setdefault("NOTION_CLIENT_ID", "fake-notion-client-id") -os.environ.setdefault("NOTION_CLIENT_SECRET", "fake-notion-client-secret") -os.environ.setdefault( - "NOTION_REDIRECT_URI", - "http://localhost:8000/api/v1/auth/notion/connector/callback", -) -os.environ.setdefault("MICROSOFT_CLIENT_ID", "fake-microsoft-client-id") -os.environ.setdefault("MICROSOFT_CLIENT_SECRET", "fake-microsoft-client-secret") -os.environ.setdefault( - "ONEDRIVE_REDIRECT_URI", - "http://localhost:8000/api/v1/auth/onedrive/connector/callback", -) -os.environ.setdefault("DROPBOX_APP_KEY", "fake-dropbox-app-key") -os.environ.setdefault("DROPBOX_APP_SECRET", "fake-dropbox-app-secret") -os.environ.setdefault( - "DROPBOX_REDIRECT_URI", - "http://localhost:8000/api/v1/auth/dropbox/connector/callback", -) -os.environ["SLACK_CLIENT_ID"] = "fake-slack-mcp-client-id" -os.environ["SLACK_CLIENT_SECRET"] = "fake-slack-mcp-client-secret" - -logging.basicConfig( - level=logging.INFO, - format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", - datefmt="%Y-%m-%d %H:%M:%S", -) logger = logging.getLogger("surfsense.e2e.backend") -logger.warning( - "*** SURFSENSE E2E BACKEND ENTRYPOINT — fake Composio + LLM + embeddings ***" -) - - -# --------------------------------------------------------------------------- -# 3) Now import the production app. Every module in app.* loads here, -# creating their bindings (some of which we will patch in step 4). -# --------------------------------------------------------------------------- - -# --------------------------------------------------------------------------- -# 4) Patch LLM + embedding bindings at every consumer site. -# Composio is already covered by the sys.modules hijack in step 1. -# --------------------------------------------------------------------------- -from unittest.mock import patch # noqa: E402 - -from app.app import app # noqa: E402 -from tests.e2e.fakes import ( # noqa: E402 - clickup_module as _fake_clickup_module, - confluence_indexer as _fake_confluence_indexer, - confluence_oauth as _fake_confluence_oauth, - dropbox_api as _fake_dropbox_api, - embeddings as _fake_embeddings, - jira_module as _fake_jira_module, - linear_module as _fake_linear_module, - mcp_oauth_runtime as _fake_mcp_oauth_runtime, - mcp_runtime as _fake_mcp_runtime, - native_google as _fake_native_google, - notion_module as _fake_notion_module, - onedrive_graph as _fake_onedrive_graph, - slack_module as _fake_slack_module, -) -from tests.e2e.fakes.chat_llm import ( # noqa: E402 - fake_create_chat_litellm_from_agent_config, - fake_create_chat_litellm_from_config, -) -from tests.e2e.fakes.llm import fake_get_user_long_context_llm # noqa: E402 +# Patches started during bootstrap are kept alive for the lifetime of the +# process. We never call .stop() on them. _active_patches: list = [] +def _hijack_external_sdks() -> None: + """Replace composio + notion_client in sys.modules. + + Production does ``from composio import Composio`` and + ``import notion_client`` at import time. With this hijack in place, + those imports resolve to our strict fakes. + + MUST run before _import_production_app(). + """ + import tests.e2e.fakes.composio_module as _fake_composio + import tests.e2e.fakes.notion_module as _fake_notion + + sys.modules["composio"] = _fake_composio + sys.modules["notion_client"] = _fake_notion + sys.modules["notion_client.errors"] = _fake_notion.errors + + +def _load_dotenv_and_set_env_defaults() -> None: + """Load .env and set every env var the production config reads on import. + + MUST run before _import_production_app(), since app.config consumes + these values at import time. + """ + from dotenv import load_dotenv + + load_dotenv() + + os.environ.setdefault( + "DATABASE_URL", + "postgresql+asyncpg://postgres:postgres@localhost:5432/surfsense", + ) + os.environ.setdefault("CELERY_BROKER_URL", "redis://localhost:6379/0") + os.environ.setdefault("CELERY_RESULT_BACKEND", "redis://localhost:6379/0") + os.environ.setdefault("REDIS_APP_URL", "redis://localhost:6379/0") + os.environ.setdefault("CELERY_TASK_DEFAULT_QUEUE", "surfsense") + os.environ.setdefault("SECRET_KEY", "local-e2e-secret-not-for-production") + os.environ.setdefault("AUTH_TYPE", "LOCAL") + os.environ.setdefault("REGISTRATION_ENABLED", "TRUE") + os.environ.setdefault("ETL_SERVICE", "DOCLING") + os.environ.setdefault("EMBEDDING_MODEL", "sentence-transformers/all-MiniLM-L6-v2") + os.environ.setdefault("NEXT_FRONTEND_URL", "http://localhost:3000") + + # Sentinel keys — fakes never read them; turns leaked real calls into 401s. + os.environ.setdefault("COMPOSIO_API_KEY", "local-deny-real-call-sentinel") + os.environ.setdefault("COMPOSIO_ENABLED", "TRUE") + os.environ.setdefault("OPENAI_API_KEY", "local-deny-real-call-sentinel") + os.environ.setdefault("ANTHROPIC_API_KEY", "local-deny-real-call-sentinel") + os.environ.setdefault("LITELLM_API_KEY", "local-deny-real-call-sentinel") + + os.environ.setdefault("ATLASSIAN_CLIENT_ID", "fake-atlassian-client-id") + os.environ.setdefault("ATLASSIAN_CLIENT_SECRET", "fake-atlassian-client-secret") + os.environ.setdefault( + "CONFLUENCE_REDIRECT_URI", + "http://localhost:8000/api/v1/auth/confluence/connector/callback", + ) + os.environ.setdefault("NOTION_CLIENT_ID", "fake-notion-client-id") + os.environ.setdefault("NOTION_CLIENT_SECRET", "fake-notion-client-secret") + os.environ.setdefault( + "NOTION_REDIRECT_URI", + "http://localhost:8000/api/v1/auth/notion/connector/callback", + ) + os.environ.setdefault("MICROSOFT_CLIENT_ID", "fake-microsoft-client-id") + os.environ.setdefault("MICROSOFT_CLIENT_SECRET", "fake-microsoft-client-secret") + os.environ.setdefault( + "ONEDRIVE_REDIRECT_URI", + "http://localhost:8000/api/v1/auth/onedrive/connector/callback", + ) + os.environ.setdefault("DROPBOX_APP_KEY", "fake-dropbox-app-key") + os.environ.setdefault("DROPBOX_APP_SECRET", "fake-dropbox-app-secret") + os.environ.setdefault( + "DROPBOX_REDIRECT_URI", + "http://localhost:8000/api/v1/auth/dropbox/connector/callback", + ) + # Native Google OAuth — fake Flow in tests.e2e.fakes.native_google + # raises "Fake Google Flow requires redirect_uri." if these are empty, + # so connector/add routes return 500 in CI where no .env supplies them. + os.environ.setdefault( + "GOOGLE_DRIVE_REDIRECT_URI", + "http://localhost:8000/api/v1/auth/google/drive/connector/callback", + ) + os.environ.setdefault( + "GOOGLE_GMAIL_REDIRECT_URI", + "http://localhost:8000/api/v1/auth/google/gmail/connector/callback", + ) + os.environ.setdefault( + "GOOGLE_CALENDAR_REDIRECT_URI", + "http://localhost:8000/api/v1/auth/google/calendar/connector/callback", + ) + os.environ["SLACK_CLIENT_ID"] = "fake-slack-mcp-client-id" + os.environ["SLACK_CLIENT_SECRET"] = "fake-slack-mcp-client-secret" + + +def _install_synthetic_global_llm_config() -> None: + """Materialise a fake ``app/config/global_llm_config.yaml`` for E2E. + + The real file is gitignored (production operators ship their own with + real API keys), so a fresh CI checkout has no YAML at the path + ``app.config.load_global_llm_configs()`` reads. With an empty + ``GLOBAL_LLM_CONFIGS`` list, ``auto_model_pin_service`` raises + ``"No usable global LLM configs are available for Auto mode"`` on + every chat-stream request. + + We copy the synthetic fixture from ``tests/e2e/fixtures/`` into the + production-expected location BEFORE ``_import_production_app()`` so + ``app.config`` picks it up on import. Production code is untouched — + this is purely a test-time scaffold. + + Only installs when the destination is missing. A developer running + the E2E entrypoint locally keeps their real ``global_llm_config.yaml`` + intact (the patched ``create_chat_litellm_from_*`` factories make the + actual model values irrelevant either way). + + MUST run before _import_production_app(). + """ + import shutil + + src = os.path.join(_THIS_DIR, "fixtures", "global_llm_config.yaml") + dst = os.path.join(_BACKEND_ROOT, "app", "config", "global_llm_config.yaml") + + if not os.path.exists(src): + raise RuntimeError( + f"E2E synthetic global LLM config fixture missing at {src!r}. " + f"This file is checked into tests/e2e/fixtures/ — if it has gone " + f"missing, restore it from VCS before running the E2E entrypoint." + ) + + if os.path.exists(dst): + logger.info( + "[e2e-global-llm-config] %s already exists; leaving it alone " + "(local dev config preserved)", + dst, + ) + return + + os.makedirs(os.path.dirname(dst), exist_ok=True) + shutil.copyfile(src, dst) + logger.info("[e2e-global-llm-config] installed %s -> %s", src, dst) + + +def _import_production_app(): + """Import and return the production FastAPI app. + + Every module under ``app.*`` loads here, creating their bindings. + The LLM/embedding factories captured at this point will be replaced + by patches in _patch_llm_bindings() below. + """ + from app.app import app as production_app + + return production_app + + def _patch_llm_bindings() -> None: """Replace LLM factories at every known binding site.""" + from unittest.mock import patch + + from tests.e2e.fakes.chat_llm import ( + fake_create_chat_litellm_from_agent_config, + fake_create_chat_litellm_from_config, + ) + from tests.e2e.fakes.llm import fake_get_user_long_context_llm + targets = [ "app.services.llm_service.get_user_long_context_llm", "app.tasks.connector_indexers.confluence_indexer.get_user_long_context_llm", @@ -190,38 +265,90 @@ def _patch_llm_bindings() -> None: logger.warning("[fake-chat-llm] could not patch %s: %s.", target, exc) -_patch_llm_bindings() -_fake_embeddings.install(_active_patches) -_fake_confluence_oauth.install(_active_patches) -_fake_confluence_indexer.install(_active_patches) -_fake_native_google.install(_active_patches) -_fake_onedrive_graph.install(_active_patches) -_fake_dropbox_api.install(_active_patches) -_fake_notion_module.install(_active_patches) -_fake_linear_module.install(_active_patches) -_fake_jira_module.install(_active_patches) -_fake_clickup_module.install(_active_patches) -_fake_mcp_runtime.install(_active_patches) -_fake_mcp_oauth_runtime.install(_active_patches) -_fake_slack_module.install(_active_patches) +def _install_runtime_fakes() -> None: + """Run each fake's install() against the active patch stack.""" + from tests.e2e.fakes import ( + clickup_module as _fake_clickup_module, + confluence_indexer as _fake_confluence_indexer, + confluence_oauth as _fake_confluence_oauth, + docling_service as _fake_docling_service, + dropbox_api as _fake_dropbox_api, + embeddings as _fake_embeddings, + jira_module as _fake_jira_module, + linear_module as _fake_linear_module, + mcp_oauth_runtime as _fake_mcp_oauth_runtime, + mcp_runtime as _fake_mcp_runtime, + native_google as _fake_native_google, + notion_module as _fake_notion_module, + onedrive_graph as _fake_onedrive_graph, + slack_module as _fake_slack_module, + ) + + _fake_embeddings.install(_active_patches) + _fake_docling_service.install(_active_patches) + _fake_confluence_oauth.install(_active_patches) + _fake_confluence_indexer.install(_active_patches) + _fake_native_google.install(_active_patches) + _fake_onedrive_graph.install(_active_patches) + _fake_dropbox_api.install(_active_patches) + _fake_notion_module.install(_active_patches) + _fake_linear_module.install(_active_patches) + _fake_jira_module.install(_active_patches) + _fake_clickup_module.install(_active_patches) + _fake_mcp_runtime.install(_active_patches) + _fake_mcp_oauth_runtime.install(_active_patches) + _fake_slack_module.install(_active_patches) -# --------------------------------------------------------------------------- -# 5) Mount test-only middleware. Production never reaches this code. -# --------------------------------------------------------------------------- +def _install_test_only_app_extensions(app) -> None: + """Mount test-only middleware + the /__e2e__ token mint router. -from tests.e2e.middleware.scenario import ScenarioMiddleware # noqa: E402 + POST /__e2e__/auth/token bypasses /auth/jwt/login's 5/min/IP rate + limit so Playwright workers can authenticate without thrashing the + production auth surface. See tests/e2e/auth_mint.py. + """ + from tests.e2e.auth_mint import install as install_e2e_mint + from tests.e2e.middleware.scenario import ScenarioMiddleware -app.add_middleware(ScenarioMiddleware) + app.add_middleware(ScenarioMiddleware) + install_e2e_mint(app) -# --------------------------------------------------------------------------- -# 6) Start uvicorn, mirroring main.py's behaviour. -# --------------------------------------------------------------------------- +def _bootstrap(): + """Run the full E2E bootstrap and return the production FastAPI app. -import asyncio # noqa: E402 + Ordering is load-bearing: + 1) Hijack composio + notion_client in sys.modules. + 2) Load .env + set env defaults (app.config reads env on import). + 3) Configure logging. + 4) Materialise the synthetic global_llm_config.yaml so Auto-mode + pin resolution finds at least one usable candidate. + 5) Import production app (which transitively imports the now-faked + external SDKs and reads the env defaults + YAML). + 6) Patch LLM / embedding bindings at every consumer site. + 7) Mount test-only middleware + /__e2e__ routes onto the app. + """ + _hijack_external_sdks() + _load_dotenv_and_set_env_defaults() -import uvicorn # noqa: E402 + logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + ) + logger.warning( + "*** SURFSENSE E2E BACKEND ENTRYPOINT — fake Composio + LLM + embeddings ***" + ) + + _install_synthetic_global_llm_config() + production_app = _import_production_app() + _patch_llm_bindings() + _install_runtime_fakes() + _install_test_only_app_extensions(production_app) + return production_app + + +app = _bootstrap() def _main() -> None: diff --git a/surfsense_backend/tests/e2e/run_celery.py b/surfsense_backend/tests/e2e/run_celery.py index 407fecde3..e4091d689 100644 --- a/surfsense_backend/tests/e2e/run_celery.py +++ b/surfsense_backend/tests/e2e/run_celery.py @@ -25,96 +25,166 @@ if _BACKEND_ROOT not in sys.path: sys.path.insert(0, _BACKEND_ROOT) -# --------------------------------------------------------------------------- -# 1) Hijack sys.modules BEFORE production celery imports anything. -# --------------------------------------------------------------------------- - -import tests.e2e.fakes.composio_module as _fake_composio # noqa: E402 -import tests.e2e.fakes.notion_module as _fake_notion # noqa: E402 - -sys.modules["composio"] = _fake_composio -sys.modules["notion_client"] = _fake_notion -sys.modules["notion_client.errors"] = _fake_notion.errors - - -# --------------------------------------------------------------------------- -# 2) Logging + dotenv. -# --------------------------------------------------------------------------- - -from dotenv import load_dotenv # noqa: E402 - -load_dotenv() -os.environ.setdefault("ATLASSIAN_CLIENT_ID", "fake-atlassian-client-id") -os.environ.setdefault("ATLASSIAN_CLIENT_SECRET", "fake-atlassian-client-secret") -os.environ.setdefault( - "CONFLUENCE_REDIRECT_URI", - "http://localhost:8000/api/v1/auth/confluence/connector/callback", -) -os.environ.setdefault("NOTION_CLIENT_ID", "fake-notion-client-id") -os.environ.setdefault("NOTION_CLIENT_SECRET", "fake-notion-client-secret") -os.environ.setdefault( - "NOTION_REDIRECT_URI", - "http://localhost:8000/api/v1/auth/notion/connector/callback", -) -os.environ.setdefault("MICROSOFT_CLIENT_ID", "fake-microsoft-client-id") -os.environ.setdefault("MICROSOFT_CLIENT_SECRET", "fake-microsoft-client-secret") -os.environ.setdefault( - "ONEDRIVE_REDIRECT_URI", - "http://localhost:8000/api/v1/auth/onedrive/connector/callback", -) -os.environ.setdefault("DROPBOX_APP_KEY", "fake-dropbox-app-key") -os.environ.setdefault("DROPBOX_APP_SECRET", "fake-dropbox-app-secret") -os.environ.setdefault( - "DROPBOX_REDIRECT_URI", - "http://localhost:8000/api/v1/auth/dropbox/connector/callback", -) -os.environ["SLACK_CLIENT_ID"] = "fake-slack-mcp-client-id" -os.environ["SLACK_CLIENT_SECRET"] = "fake-slack-mcp-client-secret" - -logging.basicConfig( - level=logging.INFO, - format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", - datefmt="%Y-%m-%d %H:%M:%S", -) logger = logging.getLogger("surfsense.e2e.celery") -logger.warning("*** SURFSENSE E2E CELERY WORKER — fake Composio + LLM + embeddings ***") - - -# --------------------------------------------------------------------------- -# 3) Import the production celery_app. All task modules load here. -# --------------------------------------------------------------------------- - -# --------------------------------------------------------------------------- -# 4) Patch LLM + embedding bindings inside the worker process. -# --------------------------------------------------------------------------- -from unittest.mock import patch # noqa: E402 - -from app.celery_app import celery_app # noqa: E402 -from tests.e2e.fakes import ( # noqa: E402 - clickup_module as _fake_clickup_module, - confluence_indexer as _fake_confluence_indexer, - confluence_oauth as _fake_confluence_oauth, - dropbox_api as _fake_dropbox_api, - embeddings as _fake_embeddings, - jira_module as _fake_jira_module, - linear_module as _fake_linear_module, - mcp_oauth_runtime as _fake_mcp_oauth_runtime, - mcp_runtime as _fake_mcp_runtime, - native_google as _fake_native_google, - notion_module as _fake_notion_module, - onedrive_graph as _fake_onedrive_graph, - slack_module as _fake_slack_module, -) -from tests.e2e.fakes.chat_llm import ( # noqa: E402 - fake_create_chat_litellm_from_agent_config, - fake_create_chat_litellm_from_config, -) -from tests.e2e.fakes.llm import fake_get_user_long_context_llm # noqa: E402 +# Patches started during bootstrap are kept alive for the lifetime of the +# process. We never call .stop() on them. _active_patches: list = [] +def _hijack_external_sdks() -> None: + """Replace composio + notion_client in sys.modules. + + Production does ``from composio import Composio`` and + ``import notion_client`` at import time. With this hijack in place, + those imports resolve to our strict fakes. + + MUST run before _import_celery_app(). + """ + import tests.e2e.fakes.composio_module as _fake_composio + import tests.e2e.fakes.notion_module as _fake_notion + + sys.modules["composio"] = _fake_composio + sys.modules["notion_client"] = _fake_notion + sys.modules["notion_client.errors"] = _fake_notion.errors + + +def _load_dotenv_and_set_env_defaults() -> None: + """Load .env and set every env var the production config reads on import. + + MUST run before _import_celery_app(), since app.config consumes + these values at import time. + """ + from dotenv import load_dotenv + + load_dotenv() + + os.environ.setdefault( + "DATABASE_URL", + "postgresql+asyncpg://postgres:postgres@localhost:5432/surfsense", + ) + os.environ.setdefault("CELERY_BROKER_URL", "redis://localhost:6379/0") + os.environ.setdefault("CELERY_RESULT_BACKEND", "redis://localhost:6379/0") + os.environ.setdefault("REDIS_APP_URL", "redis://localhost:6379/0") + os.environ.setdefault("CELERY_TASK_DEFAULT_QUEUE", "surfsense") + os.environ.setdefault("SECRET_KEY", "local-e2e-secret-not-for-production") + os.environ.setdefault("AUTH_TYPE", "LOCAL") + os.environ.setdefault("REGISTRATION_ENABLED", "TRUE") + os.environ.setdefault("ETL_SERVICE", "DOCLING") + os.environ.setdefault("EMBEDDING_MODEL", "sentence-transformers/all-MiniLM-L6-v2") + os.environ.setdefault("NEXT_FRONTEND_URL", "http://localhost:3000") + + # Sentinel keys — fakes never read them; turns leaked real calls into 401s. + os.environ.setdefault("COMPOSIO_API_KEY", "local-deny-real-call-sentinel") + os.environ.setdefault("COMPOSIO_ENABLED", "TRUE") + os.environ.setdefault("OPENAI_API_KEY", "local-deny-real-call-sentinel") + os.environ.setdefault("ANTHROPIC_API_KEY", "local-deny-real-call-sentinel") + os.environ.setdefault("LITELLM_API_KEY", "local-deny-real-call-sentinel") + + os.environ.setdefault("ATLASSIAN_CLIENT_ID", "fake-atlassian-client-id") + os.environ.setdefault("ATLASSIAN_CLIENT_SECRET", "fake-atlassian-client-secret") + os.environ.setdefault( + "CONFLUENCE_REDIRECT_URI", + "http://localhost:8000/api/v1/auth/confluence/connector/callback", + ) + os.environ.setdefault("NOTION_CLIENT_ID", "fake-notion-client-id") + os.environ.setdefault("NOTION_CLIENT_SECRET", "fake-notion-client-secret") + os.environ.setdefault( + "NOTION_REDIRECT_URI", + "http://localhost:8000/api/v1/auth/notion/connector/callback", + ) + os.environ.setdefault("MICROSOFT_CLIENT_ID", "fake-microsoft-client-id") + os.environ.setdefault("MICROSOFT_CLIENT_SECRET", "fake-microsoft-client-secret") + os.environ.setdefault( + "ONEDRIVE_REDIRECT_URI", + "http://localhost:8000/api/v1/auth/onedrive/connector/callback", + ) + os.environ.setdefault("DROPBOX_APP_KEY", "fake-dropbox-app-key") + os.environ.setdefault("DROPBOX_APP_SECRET", "fake-dropbox-app-secret") + os.environ.setdefault( + "DROPBOX_REDIRECT_URI", + "http://localhost:8000/api/v1/auth/dropbox/connector/callback", + ) + # Native Google OAuth — fake Flow in tests.e2e.fakes.native_google raises + # "Fake Google Flow requires redirect_uri." when these are empty. + os.environ.setdefault( + "GOOGLE_DRIVE_REDIRECT_URI", + "http://localhost:8000/api/v1/auth/google/drive/connector/callback", + ) + os.environ.setdefault( + "GOOGLE_GMAIL_REDIRECT_URI", + "http://localhost:8000/api/v1/auth/google/gmail/connector/callback", + ) + os.environ.setdefault( + "GOOGLE_CALENDAR_REDIRECT_URI", + "http://localhost:8000/api/v1/auth/google/calendar/connector/callback", + ) + os.environ["SLACK_CLIENT_ID"] = "fake-slack-mcp-client-id" + os.environ["SLACK_CLIENT_SECRET"] = "fake-slack-mcp-client-secret" + + +def _install_synthetic_global_llm_config() -> None: + """Materialise a fake ``app/config/global_llm_config.yaml`` for E2E. + + The real file is gitignored (production operators ship their own with + real API keys), so a fresh CI checkout has no YAML at the path + ``app.config.load_global_llm_configs()`` reads. With an empty + ``GLOBAL_LLM_CONFIGS`` list, the worker's view of the config diverges + from the API container. + + We copy the synthetic fixture from ``tests/e2e/fixtures/`` into the + production-expected location BEFORE _import_celery_app() so + ``app.config`` picks it up on import. Install-only-if-missing so a + developer's local config (with real API keys) is preserved. + + MUST run before _import_celery_app(). + """ + import shutil + + src = os.path.join(_THIS_DIR, "fixtures", "global_llm_config.yaml") + dst = os.path.join(_BACKEND_ROOT, "app", "config", "global_llm_config.yaml") + + if not os.path.exists(src): + raise RuntimeError( + f"E2E synthetic global LLM config fixture missing at {src!r}. " + f"Restore tests/e2e/fixtures/global_llm_config.yaml from VCS." + ) + + if os.path.exists(dst): + logger.info( + "[e2e-global-llm-config] %s already exists; leaving it alone " + "(local dev config preserved)", + dst, + ) + return + + os.makedirs(os.path.dirname(dst), exist_ok=True) + shutil.copyfile(src, dst) + logger.info("[e2e-global-llm-config] installed %s -> %s", src, dst) + + +def _import_celery_app(): + """Import and return the production Celery app. + + Every module under ``app.*`` (including all task modules) loads here, + creating their bindings. The LLM/embedding factories captured at this + point will be replaced by patches in _patch_llm_bindings() below. + """ + from app.celery_app import celery_app + + return celery_app + + def _patch_llm_bindings() -> None: + """Replace LLM factories at every known binding site in worker tasks.""" + from unittest.mock import patch + + from tests.e2e.fakes.chat_llm import ( + fake_create_chat_litellm_from_agent_config, + fake_create_chat_litellm_from_config, + ) + from tests.e2e.fakes.llm import fake_get_user_long_context_llm + targets = [ "app.services.llm_service.get_user_long_context_llm", "app.tasks.connector_indexers.confluence_indexer.get_user_long_context_llm", @@ -172,38 +242,93 @@ def _patch_llm_bindings() -> None: ) -_patch_llm_bindings() -_fake_embeddings.install(_active_patches) -_fake_confluence_oauth.install(_active_patches) -_fake_confluence_indexer.install(_active_patches) -_fake_native_google.install(_active_patches) -_fake_onedrive_graph.install(_active_patches) -_fake_dropbox_api.install(_active_patches) -_fake_notion_module.install(_active_patches) -_fake_linear_module.install(_active_patches) -_fake_jira_module.install(_active_patches) -_fake_clickup_module.install(_active_patches) -_fake_mcp_runtime.install(_active_patches) -_fake_mcp_oauth_runtime.install(_active_patches) -_fake_slack_module.install(_active_patches) +def _install_runtime_fakes() -> None: + """Run each fake's install() against the active patch stack.""" + from tests.e2e.fakes import ( + clickup_module as _fake_clickup_module, + confluence_indexer as _fake_confluence_indexer, + confluence_oauth as _fake_confluence_oauth, + docling_service as _fake_docling_service, + dropbox_api as _fake_dropbox_api, + embeddings as _fake_embeddings, + jira_module as _fake_jira_module, + linear_module as _fake_linear_module, + mcp_oauth_runtime as _fake_mcp_oauth_runtime, + mcp_runtime as _fake_mcp_runtime, + native_google as _fake_native_google, + notion_module as _fake_notion_module, + onedrive_graph as _fake_onedrive_graph, + slack_module as _fake_slack_module, + ) + + _fake_embeddings.install(_active_patches) + _fake_docling_service.install(_active_patches) + _fake_confluence_oauth.install(_active_patches) + _fake_confluence_indexer.install(_active_patches) + _fake_native_google.install(_active_patches) + _fake_onedrive_graph.install(_active_patches) + _fake_dropbox_api.install(_active_patches) + _fake_notion_module.install(_active_patches) + _fake_linear_module.install(_active_patches) + _fake_jira_module.install(_active_patches) + _fake_clickup_module.install(_active_patches) + _fake_mcp_runtime.install(_active_patches) + _fake_mcp_oauth_runtime.install(_active_patches) + _fake_slack_module.install(_active_patches) -# --------------------------------------------------------------------------- -# 5) Start the worker. -# --------------------------------------------------------------------------- +def _bootstrap(): + """Run the full E2E bootstrap and return the production Celery app. + + Ordering is load-bearing: + 1) Hijack composio + notion_client in sys.modules. + 2) Load .env + set env defaults (app.config reads env on import). + 3) Configure logging. + 4) Materialise the synthetic global_llm_config.yaml so the worker's + view of GLOBAL_LLM_CONFIGS matches the API container. + 5) Import production celery_app (which transitively imports the + now-faked external SDKs and reads the env defaults + YAML). + 6) Patch LLM / embedding bindings at every consumer site. + 7) Install runtime fakes for connectors and chat backends. + """ + _hijack_external_sdks() + _load_dotenv_and_set_env_defaults() + + logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + ) + logger.warning( + "*** SURFSENSE E2E CELERY WORKER — fake Composio + LLM + embeddings ***" + ) + + _install_synthetic_global_llm_config() + celery_app = _import_celery_app() + _patch_llm_bindings() + _install_runtime_fakes() + return celery_app + + +celery_app = _bootstrap() def _main() -> None: - # Default queues mirror production (default queue + connectors queue - # so Drive indexing tasks are picked up). queue_name = os.getenv("CELERY_TASK_DEFAULT_QUEUE", "surfsense") queues = f"{queue_name},{queue_name}.connectors" + + # macOS forks-after-MPS-init crash prefork workers; threads avoid it. + default_pool = "threads" if sys.platform == "darwin" else "prefork" + pool = os.getenv("CELERY_POOL", default_pool) + concurrency = os.getenv("CELERY_CONCURRENCY", "2") + celery_app.worker_main( argv=[ "worker", "--loglevel=info", f"--queues={queues}", - "--concurrency=2", + f"--pool={pool}", + f"--concurrency={concurrency}", "--without-gossip", "--without-mingle", ] diff --git a/surfsense_desktop/package.json b/surfsense_desktop/package.json index 4ef624760..b1fff79a5 100644 --- a/surfsense_desktop/package.json +++ b/surfsense_desktop/package.json @@ -21,7 +21,7 @@ "email": "rohan@surfsense.com" }, "license": "MIT", - "packageManager": "pnpm@10.24.0", + "packageManager": "pnpm@10.26.0", "devDependencies": { "@electron/rebuild": "^4.0.3", "@types/node": "^25.5.0", diff --git a/surfsense_web/.gitignore b/surfsense_web/.gitignore index 6ae7fe0c4..3ae7683d3 100644 --- a/surfsense_web/.gitignore +++ b/surfsense_web/.gitignore @@ -12,6 +12,10 @@ # testing /coverage +/playwright/.auth/ +/playwright-report/ +/test-results/ +/blob-report/ # next.js /.next/ @@ -48,5 +52,4 @@ next-env.d.ts # source /.source/ -.pnpm-store/ - +.pnpm-store/ \ No newline at end of file diff --git a/surfsense_web/Dockerfile b/surfsense_web/Dockerfile index da6bc8b7e..0e3ed11de 100644 --- a/surfsense_web/Dockerfile +++ b/surfsense_web/Dockerfile @@ -12,7 +12,7 @@ WORKDIR /app RUN corepack enable pnpm # Copy package files -COPY package.json pnpm-lock.yaml* .npmrc* ./ +COPY package.json pnpm-lock.yaml* pnpm-workspace.yaml* .npmrc* ./ # First copy the config file and content to avoid fumadocs-mdx postinstall error COPY source.config.ts ./ diff --git a/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx index c431ab304..0ebd8dc9a 100644 --- a/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx +++ b/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx @@ -208,7 +208,10 @@ const MentionedDocumentInfoSchema = z.object({ id: z.number(), title: z.string(), document_type: z.string(), - kind: z.union([z.literal("doc"), z.literal("folder")]).optional().default("doc"), + kind: z + .union([z.literal("doc"), z.literal("folder")]) + .optional() + .default("doc"), }); const MentionedDocumentsPartSchema = z.object({ @@ -1029,9 +1032,7 @@ export default function NewChatPage() { mentioned_surfsense_doc_ids: hasSurfsenseDocIds ? mentionedDocumentIds.surfsense_doc_ids : undefined, - mentioned_folder_ids: hasFolderIds - ? mentionedDocumentIds.folder_ids - : undefined, + mentioned_folder_ids: hasFolderIds ? mentionedDocumentIds.folder_ids : undefined, // Full mention metadata (docs + folders, with // ``kind`` discriminator) so the BE can embed a // ``mentioned-documents`` ContentPart on the @@ -1900,12 +1901,10 @@ export default function NewChatPage() { filesystem_mode: selection.filesystem_mode, client_platform: selection.client_platform, local_filesystem_mounts: selection.local_filesystem_mounts, - mentioned_document_ids: - regenerateDocIds.length > 0 ? regenerateDocIds : undefined, + mentioned_document_ids: regenerateDocIds.length > 0 ? regenerateDocIds : undefined, mentioned_surfsense_doc_ids: regenerateSurfsenseDocIds.length > 0 ? regenerateSurfsenseDocIds : undefined, - mentioned_folder_ids: - regenerateFolderIds.length > 0 ? regenerateFolderIds : undefined, + mentioned_folder_ids: regenerateFolderIds.length > 0 ? regenerateFolderIds : undefined, // Full mention metadata for the regenerate-specific // source list. Only meaningful for edit (the BE only // re-persists a user row when ``user_query`` is set); diff --git a/surfsense_web/atoms/chat/mentioned-documents.atom.ts b/surfsense_web/atoms/chat/mentioned-documents.atom.ts index eafdaf87e..9163960f4 100644 --- a/surfsense_web/atoms/chat/mentioned-documents.atom.ts +++ b/surfsense_web/atoms/chat/mentioned-documents.atom.ts @@ -97,9 +97,7 @@ export const mentionedDocumentIdsAtom = atom((get) => { surfsense_doc_ids: docs .filter((doc) => doc.document_type === "SURFSENSE_DOCS") .map((doc) => doc.id), - document_ids: docs - .filter((doc) => doc.document_type !== "SURFSENSE_DOCS") - .map((doc) => doc.id), + document_ids: docs.filter((doc) => doc.document_type !== "SURFSENSE_DOCS").map((doc) => doc.id), folder_ids: folders.map((f) => f.id), }; }); diff --git a/surfsense_web/biome.json b/surfsense_web/biome.json index 738a3636d..aa71f509e 100644 --- a/surfsense_web/biome.json +++ b/surfsense_web/biome.json @@ -7,7 +7,19 @@ }, "files": { "ignoreUnknown": true, - "includes": ["**", "!!node_modules", "!!.git", "!!.next", "!!dist", "!!build", "!!coverage"], + "includes": [ + "**", + "!!node_modules", + "!!.git", + "!!.next", + "!!dist", + "!!build", + "!!coverage", + "!!test-results", + "!!playwright-report", + "!!blob-report", + "!!playwright/.auth" + ], "maxSize": 1048576 }, "formatter": { diff --git a/surfsense_web/components/assistant-ui/inline-mention-editor.tsx b/surfsense_web/components/assistant-ui/inline-mention-editor.tsx index e12556486..c7893b6ac 100644 --- a/surfsense_web/components/assistant-ui/inline-mention-editor.tsx +++ b/surfsense_web/components/assistant-ui/inline-mention-editor.tsx @@ -47,10 +47,7 @@ export interface InlineMentionEditorRef { setText: (text: string) => void; getText: () => string; getMentionedDocuments: () => MentionedDocument[]; - insertMentionChip: ( - mention: MentionChipInput, - options?: { removeTriggerText?: boolean } - ) => void; + insertMentionChip: (mention: MentionChipInput, options?: { removeTriggerText?: boolean }) => void; /** * @deprecated Use ``insertMentionChip``. Kept for one transition * cycle so we don't break ad-hoc callers; prefer the new name. @@ -364,8 +361,7 @@ export const InlineMentionEditor = forwardRef - ) : ( - - ); + const icon = isFolder ? : ; const handleClick = useCallback( (event: React.MouseEvent) => { diff --git a/surfsense_web/components/assistant-ui/user-message.tsx b/surfsense_web/components/assistant-ui/user-message.tsx index b09aa7680..708cefbc0 100644 --- a/surfsense_web/components/assistant-ui/user-message.tsx +++ b/surfsense_web/components/assistant-ui/user-message.tsx @@ -111,11 +111,7 @@ const UserTextPart: FC = () => { icon={icon} label={segment.doc.title} tooltip={isFolder ? `Folder: ${segment.doc.title}` : segment.doc.title} - onClick={ - isFolder - ? undefined - : () => handleOpenDoc(segment.doc.id, segment.doc.title) - } + onClick={isFolder ? undefined : () => handleOpenDoc(segment.doc.id, segment.doc.title)} className="mx-0.5" /> ); diff --git a/surfsense_web/components/editor/plate-editor.tsx b/surfsense_web/components/editor/plate-editor.tsx index 51ad7d700..77845ad2a 100644 --- a/surfsense_web/components/editor/plate-editor.tsx +++ b/surfsense_web/components/editor/plate-editor.tsx @@ -170,16 +170,10 @@ export function PlateEditor({ : markdown ? (editor) => { if (!enableCitations) { - return safeDeserializeMarkdown( - editor, - escapeMdxExpressions(markdown) - ) as Value; + return safeDeserializeMarkdown(editor, escapeMdxExpressions(markdown)) as Value; } const { content: rewritten, urlMap } = preprocessCitationMarkdown(markdown); - const value = safeDeserializeMarkdown( - editor, - escapeMdxExpressions(rewritten) - ); + const value = safeDeserializeMarkdown(editor, escapeMdxExpressions(rewritten)); return injectCitationNodes(value, urlMap) as Value; } : undefined, @@ -203,10 +197,7 @@ export function PlateEditor({ let newValue: Descendant[]; if (enableCitations) { const { content: rewritten, urlMap } = preprocessCitationMarkdown(markdown); - const deserialized = safeDeserializeMarkdown( - editor, - escapeMdxExpressions(rewritten) - ); + const deserialized = safeDeserializeMarkdown(editor, escapeMdxExpressions(rewritten)); newValue = injectCitationNodes(deserialized, urlMap); } else { newValue = safeDeserializeMarkdown(editor, escapeMdxExpressions(markdown)); diff --git a/surfsense_web/components/editor/utils/safe-deserialize.ts b/surfsense_web/components/editor/utils/safe-deserialize.ts index e359a7791..8f3e6275b 100644 --- a/surfsense_web/components/editor/utils/safe-deserialize.ts +++ b/surfsense_web/components/editor/utils/safe-deserialize.ts @@ -49,10 +49,7 @@ export function safeDeserializeMarkdown( return api.deserialize(markdown, { remarkPlugins: STRICT_PLUGINS }) as Descendant[]; } catch (mdxError) { if (process.env.NODE_ENV !== "production") { - console.warn( - "[plate-editor] MDX parse failed, retrying without remark-mdx:", - mdxError - ); + console.warn("[plate-editor] MDX parse failed, retrying without remark-mdx:", mdxError); } try { return api.deserialize(markdown, { remarkPlugins: LENIENT_PLUGINS }) as Descendant[]; diff --git a/surfsense_web/components/layout/ui/sidebar/DocumentsSidebar.tsx b/surfsense_web/components/layout/ui/sidebar/DocumentsSidebar.tsx index 958941928..3ecf046bb 100644 --- a/surfsense_web/components/layout/ui/sidebar/DocumentsSidebar.tsx +++ b/surfsense_web/components/layout/ui/sidebar/DocumentsSidebar.tsx @@ -24,10 +24,7 @@ import type React from "react"; import { useCallback, useEffect, useMemo, useRef, useState } from "react"; import { toast } from "sonner"; import { agentFlagsAtom } from "@/atoms/agent/agent-flags-query.atom"; -import { - makeFolderMention, - mentionedDocumentsAtom, -} from "@/atoms/chat/mentioned-documents.atom"; +import { makeFolderMention, mentionedDocumentsAtom } from "@/atoms/chat/mentioned-documents.atom"; import { connectorDialogOpenAtom } from "@/atoms/connector-dialog/connector-dialog.atoms"; import { connectorsAtom } from "@/atoms/connectors/connector-query.atoms"; import { deleteDocumentMutationAtom } from "@/atoms/documents/document-mutation.atoms"; diff --git a/surfsense_web/components/new-chat/document-mention-picker.tsx b/surfsense_web/components/new-chat/document-mention-picker.tsx index 0881b11b6..0d68c8df8 100644 --- a/surfsense_web/components/new-chat/document-mention-picker.tsx +++ b/surfsense_web/components/new-chat/document-mention-picker.tsx @@ -301,8 +301,7 @@ export const DocumentMentionPicker = forwardRef< // folder entries lift the existing kind-aware key so the same // matchers used by the chip atom apply unchanged. const selectedKeys = useMemo( - () => - new Set(initialSelectedDocuments.map((d) => getMentionDocKey(d))), + () => new Set(initialSelectedDocuments.map((d) => getMentionDocKey(d))), [initialSelectedDocuments] ); @@ -583,9 +582,7 @@ export const DocumentMentionPicker = forwardRef< {(surfsenseDocsList.length > 0 || userDocsList.length > 0) && (
)} -
- Folders -
+
Folders
{folderMentions.map((folder) => { const folderKey = getMentionDocKey(folder); const isAlreadySelected = selectedKeys.has(folderKey); diff --git a/surfsense_web/package.json b/surfsense_web/package.json index fa8d50cdc..95894d2f2 100644 --- a/surfsense_web/package.json +++ b/surfsense_web/package.json @@ -2,6 +2,7 @@ "name": "surfsense_web", "version": "0.0.23", "private": true, + "packageManager": "pnpm@10.26.0", "description": "SurfSense Frontend", "scripts": { "dev": "next dev --turbopack", @@ -20,6 +21,7 @@ "db:studio": "drizzle-kit studio", "format:fix": "npx @biomejs/biome check --fix", "test:e2e": "playwright test", + "test:e2e:prod": "cross-env CI=1 playwright test", "test:e2e:ui": "playwright test --ui", "test:e2e:headed": "playwright test --headed", "test:e2e:debug": "playwright test --debug", diff --git a/surfsense_web/playwright.config.ts b/surfsense_web/playwright.config.ts index 511db6b09..ef066a9be 100644 --- a/surfsense_web/playwright.config.ts +++ b/surfsense_web/playwright.config.ts @@ -4,6 +4,11 @@ const PORT = process.env.PORT || "3000"; const BACKEND_PORT = process.env.BACKEND_PORT || "8000"; const baseURL = process.env.PLAYWRIGHT_BASE_URL || `http://localhost:${PORT}`; +process.env.PLAYWRIGHT_TEST_EMAIL ??= "e2e-test@surfsense.net"; +process.env.PLAYWRIGHT_TEST_PASSWORD ??= "E2eTestPassword123!"; +process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL ??= `http://localhost:${BACKEND_PORT}`; +process.env.NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE ??= "LOCAL"; + /** * Playwright configuration for SurfSense web E2E tests. * @@ -22,8 +27,8 @@ export default defineConfig({ expect: { timeout: 15_000 }, fullyParallel: true, forbidOnly: !!process.env.CI, - retries: process.env.CI ? 2 : 0, - workers: process.env.CI ? 1 : undefined, + retries: process.env.CI ? 1 : 0, + workers: 1, reporter: process.env.CI ? [["html", { open: "never" }], ["github"], ["list"]] : [["html", { open: "on-failure" }], ["list"]], @@ -31,7 +36,7 @@ export default defineConfig({ baseURL, trace: "on-first-retry", screenshot: "only-on-failure", - video: "retain-on-failure", + video: process.env.CI ? "off" : "retain-on-failure", extraHTTPHeaders: { "x-playwright-test": "true", }, @@ -53,14 +58,16 @@ export default defineConfig({ webServer: process.env.PLAYWRIGHT_NO_WEB_SERVER ? undefined : { - // Pin to webpack dev (Turbopack has caused stale-lock panics in E2E). - command: "pnpm exec next dev", + // Local stays on webpack dev (Turbopack caused stale-lock panics in E2E). + command: process.env.CI ? "pnpm build && pnpm start" : "pnpm exec next dev", url: `http://localhost:${PORT}`, reuseExistingServer: !process.env.CI, - timeout: 180_000, + timeout: process.env.CI ? 300_000 : 180_000, + stdout: "pipe", + stderr: "pipe", env: { - NEXT_PUBLIC_FASTAPI_BACKEND_URL: `http://localhost:${BACKEND_PORT}`, - NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE: "LOCAL", + NEXT_PUBLIC_FASTAPI_BACKEND_URL: process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL, + NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE: process.env.NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE, }, }, }); diff --git a/surfsense_web/pnpm-workspace.yaml b/surfsense_web/pnpm-workspace.yaml new file mode 100644 index 000000000..5f1b93969 --- /dev/null +++ b/surfsense_web/pnpm-workspace.yaml @@ -0,0 +1,11 @@ +allowBuilds: + "@parcel/watcher": true + "@rocicorp/zero-sqlite3": true + "@swc/core": true + core-js: true + esbuild: true + protobufjs: true + sharp: true + unrs-resolver: true + +minimumReleaseAge: 10080 \ No newline at end of file diff --git a/surfsense_web/tests/README.md b/surfsense_web/tests/README.md index 51fd35050..89aab1f9b 100644 --- a/surfsense_web/tests/README.md +++ b/surfsense_web/tests/README.md @@ -5,29 +5,6 @@ Celery + Postgres + Redis). Designed to scale from one connector (Composio Drive in Phase 1) to every connector + manual file upload without rewriting the harness. -## Layout - -``` -tests/ -├── auth.setup.ts # one-time login, persists localStorage -├── smoke/ # tracer-bullet tests (dashboard renders) -├── connectors/ -│ └── composio/ -│ └── drive/ # Composio Google Drive — Phase 1 -│ └── journey.spec.ts # connect -> select -> index -> canary assertion -├── fixtures/ # test.extend() fixtures -│ ├── index.ts # named `test` exports per spec category -│ ├── search-space.fixture.ts # apiToken + per-test search space -│ └── connectors/ -│ └── composio-drive.fixture.ts -├── helpers/ # reusable building blocks -│ ├── api/ # backend HTTP helpers -│ ├── ui/ # page-object selectors -│ ├── waits/ # deterministic polling -│ └── canary.ts # canary tokens + fixed Drive file ids -└── README.md # this file -``` - ## How the deterministic harness works There are **three layers of defense** against accidental real-world @@ -47,26 +24,90 @@ calls. None of them touch production code. ## Running locally +The recommended flow runs only Postgres and Redis in Docker, and the backend ++ Celery worker on the host. The E2E entrypoints `setdefault` every backend +variable they need, so no `.env` file is required on a fresh checkout. + +### One-time setup + +From `surfsense_web/`: + ```bash -# 1. Bring up Postgres + Redis (Docker compose, supabase, whatever you use) -docker compose up -d postgres redis - -# 2. Backend with E2E entrypoint (note: NOT `uv run main.py`) -cd surfsense_backend -uv run alembic upgrade head -uv run python tests/e2e/run_backend.py & - -# 3. Celery worker with the same entrypoint pattern -uv run python tests/e2e/run_celery.py & - -# 4. Run Playwright tests (auto-starts `pnpm dev` via webServer config) -cd ../surfsense_web -pnpm test:e2e +pnpm install +pnpm exec playwright install --with-deps chromium ``` -For CI behavior in one go: `pnpm test:e2e:headless`. +### Each run -To debug the Drive journey: `pnpm test:e2e -- connectors/composio/drive/journey.spec.ts --headed`. +**1. Bring up Postgres + Redis** from the repo root: + +```bash +docker compose -f docker/docker-compose.deps-only.yml up -d db redis +``` + +**2. Start the backend** in `surfsense_backend/`, terminal A: + +```bash +uv sync +uv run alembic upgrade head +uv run python tests/e2e/run_backend.py +``` + +**3. Start the Celery worker** in `surfsense_backend/`, terminal B: + +```bash +uv run python tests/e2e/run_celery.py +``` + +**4. Register the Playwright user**: + +```bash +curl -X POST http://localhost:8000/auth/register \ + -H "Content-Type: application/json" \ + -d '{"email":"e2e-test@surfsense.net","password":"E2eTestPassword123!"}' +``` + +**5. Run Playwright** from `surfsense_web/`, terminal C: + +```bash +pnpm test:e2e # dev server (fast iteration) +pnpm test:e2e:headed # show the browser +pnpm test:e2e:ui # Playwright UI mode +pnpm test:e2e:debug # Playwright Inspector +pnpm test:e2e:prod # build + start (matches CI exactly) +pnpm test:e2e:report # open the last HTML report +``` + +`playwright.config.ts` and the backend run scripts share defaults, so the +above works without exporting any env vars. Override +`PLAYWRIGHT_TEST_EMAIL`, `PLAYWRIGHT_TEST_PASSWORD`, or +`NEXT_PUBLIC_FASTAPI_BACKEND_URL` only when pointing tests at a different +stack. + +To debug a single journey: + +```bash +pnpm test:e2e:headed connectors/composio/drive/journey.spec.ts +``` + +### Hermetic alternative (matches CI) + +To reproduce the CI environment exactly: backend and Celery in containers +with L3 egress denied, replace steps 1–3 with: + +```bash +docker compose -f docker/docker-compose.e2e.yml up -d --build --wait +``` + +Then run steps 4 (curl register) and 5 (`pnpm test:e2e:prod`) as above. Tear +down with: + +```bash +docker compose -f docker/docker-compose.e2e.yml down -v --remove-orphans +``` + +This builds the ~9 GB e2e backend image, so the deps-only flow is faster for +day-to-day work. ## Adding a new connector diff --git a/surfsense_web/tests/auth.setup.ts b/surfsense_web/tests/auth.setup.ts index e5d31a257..a33a81b3c 100644 --- a/surfsense_web/tests/auth.setup.ts +++ b/surfsense_web/tests/auth.setup.ts @@ -1,47 +1,21 @@ import path from "node:path"; import { expect, test as setup } from "@playwright/test"; +import { acquireTestToken } from "./helpers/api/auth"; /** - * One-time authentication setup. Logs in via the FastAPI backend directly - * (skipping the UI) and persists the resulting localStorage token so every - * test in the chromium project starts already authenticated. - * - * Mirrors the real auth flow in `lib/apis/auth-api.service.ts`: - * POST /auth/jwt/login -> { access_token } - * localStorage.setItem("surfsense_bearer_token", access_token) - * - * Requires a seeded test user in the dev/test DB. Configure via env: - * PLAYWRIGHT_TEST_EMAIL, PLAYWRIGHT_TEST_PASSWORD - * NEXT_PUBLIC_FASTAPI_BACKEND_URL (defaults to http://localhost:8000) + * One-time authentication setup. Acquires a bearer token for the seeded + * e2e user (rate-limit-free /__e2e__/auth/token first, /auth/jwt/login + * fallback) and persists it via localStorage so every test in the + * chromium project starts already authenticated. */ const authFile = path.join(__dirname, "..", "playwright", ".auth", "user.json"); -const TEST_USER_EMAIL = process.env.PLAYWRIGHT_TEST_EMAIL || "test@surfsense.net"; -const TEST_USER_PASSWORD = process.env.PLAYWRIGHT_TEST_PASSWORD || "TestPassword123!"; -const BACKEND_URL = process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL || "http://localhost:8000"; const STORAGE_KEY = "surfsense_bearer_token"; setup("authenticate", async ({ page, request }) => { - const response = await request.post(`${BACKEND_URL}/auth/jwt/login`, { - form: { - username: TEST_USER_EMAIL, - password: TEST_USER_PASSWORD, - grant_type: "password", - }, - headers: { "Content-Type": "application/x-www-form-urlencoded" }, - }); - - expect( - response.ok(), - `Login to ${BACKEND_URL}/auth/jwt/login failed (${response.status()}). ` + - `Check that the backend is running and that PLAYWRIGHT_TEST_EMAIL ` + - `(${TEST_USER_EMAIL}) is seeded with PLAYWRIGHT_TEST_PASSWORD. ` + - `Body: ${await response.text()}` - ).toBeTruthy(); - - const { access_token } = (await response.json()) as { access_token: string }; - expect(access_token, "Backend response missing access_token").toBeTruthy(); + const access_token = await acquireTestToken(request); + expect(access_token, "Failed to acquire e2e bearer token").toBeTruthy(); await page.addInitScript( ({ key, token }) => { diff --git a/surfsense_web/tests/documents/file-upload/journey.spec.ts b/surfsense_web/tests/documents/file-upload/journey.spec.ts index 6ddfb522f..711963bf0 100644 --- a/surfsense_web/tests/documents/file-upload/journey.spec.ts +++ b/surfsense_web/tests/documents/file-upload/journey.spec.ts @@ -107,14 +107,14 @@ test.describe("Manual file upload journey", () => { }); }); - test("user uploads a PDF (DOCUMENT branch via real Docling)", async ({ + test("user uploads a PDF (DOCUMENT branch)", async ({ page, request, apiToken, searchSpace, chatThread, }) => { - test.setTimeout(240_000); // Docling cold-start can take 30-60s on first invocation. + test.setTimeout(180_000); await uploadAndAssert({ page, diff --git a/surfsense_web/tests/fixtures/search-space.fixture.ts b/surfsense_web/tests/fixtures/search-space.fixture.ts index defde7048..62958caf4 100644 --- a/surfsense_web/tests/fixtures/search-space.fixture.ts +++ b/surfsense_web/tests/fixtures/search-space.fixture.ts @@ -1,5 +1,7 @@ +import fs from "node:fs"; +import path from "node:path"; import { test as base } from "@playwright/test"; -import { loginAsTestUser } from "../helpers/api/auth"; +import { acquireTestToken } from "../helpers/api/auth"; import { createSearchSpace, deleteSearchSpace, @@ -20,12 +22,45 @@ export type SearchSpaceFixtures = { searchSpace: SearchSpaceRow; }; +const STORAGE_KEY = "surfsense_bearer_token"; + +// Reuse the token written by tests/auth.setup.ts; on cache miss we +// mint a fresh one via /__e2e__/auth/token (rate-limit-free). +const AUTH_STATE_PATH = path.join(__dirname, "..", "..", "playwright", ".auth", "user.json"); + +function loadCachedBearerToken(): string | null { + try { + const raw = fs.readFileSync(AUTH_STATE_PATH, "utf8"); + const parsed = JSON.parse(raw) as { + origins?: Array<{ + origin?: string; + localStorage?: Array<{ name?: string; value?: string }>; + }>; + }; + for (const origin of parsed.origins ?? []) { + for (const entry of origin.localStorage ?? []) { + if (entry.name === STORAGE_KEY && entry.value) { + return entry.value; + } + } + } + } catch { + // Fall back to a fresh login. + } + return null; +} + export const searchSpaceFixtures = base.extend({ apiTokenWorker: [ async ({ playwright }, use) => { + const cached = loadCachedBearerToken(); + if (cached) { + await use(cached); + return; + } const ctx = await playwright.request.newContext(); try { - const token = await loginAsTestUser(ctx); + const token = await acquireTestToken(ctx); await use(token); } finally { await ctx.dispose(); diff --git a/surfsense_web/tests/helpers/api/auth.ts b/surfsense_web/tests/helpers/api/auth.ts index c912afedc..6492b09ba 100644 --- a/surfsense_web/tests/helpers/api/auth.ts +++ b/surfsense_web/tests/helpers/api/auth.ts @@ -11,8 +11,39 @@ import type { APIRequestContext } from "@playwright/test"; export const BACKEND_URL = process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL || "http://localhost:8000"; -const TEST_USER_EMAIL = process.env.PLAYWRIGHT_TEST_EMAIL || "test@surfsense.net"; -const TEST_USER_PASSWORD = process.env.PLAYWRIGHT_TEST_PASSWORD || "TestPassword123!"; +const TEST_USER_EMAIL = process.env.PLAYWRIGHT_TEST_EMAIL || "e2e-test@surfsense.net"; +const TEST_USER_PASSWORD = process.env.PLAYWRIGHT_TEST_PASSWORD || "E2eTestPassword123!"; +const E2E_MINT_SECRET = process.env.E2E_MINT_SECRET || "local-e2e-mint-secret-not-for-production"; + +/** + * Mints a JWT for the seeded e2e user via the test-only endpoint mounted + * by surfsense_backend/tests/e2e/run_backend.py. Bypasses the production + * /auth/jwt/login rate limit (5/min/IP), so it's safe to call from any + * worker / retry. Returns 404 from the backend when the endpoint isn't + * mounted (i.e. someone is pointing the suite at a non-e2e backend). + */ +export async function mintTestToken( + request: APIRequestContext, + email: string = TEST_USER_EMAIL +): Promise { + const response = await request.post(`${BACKEND_URL}/__e2e__/auth/token`, { + data: { email }, + headers: { + "Content-Type": "application/json", + "X-E2E-Mint-Secret": E2E_MINT_SECRET, + }, + }); + if (!response.ok()) { + throw new Error( + `Mint token at ${BACKEND_URL}/__e2e__/auth/token failed (${response.status()}): ${await response.text()}` + ); + } + const { access_token } = (await response.json()) as { access_token: string }; + if (!access_token) { + throw new Error("Mint response missing access_token"); + } + return access_token; +} export async function loginAsTestUser(request: APIRequestContext): Promise { const response = await request.post(`${BACKEND_URL}/auth/jwt/login`, { @@ -37,6 +68,23 @@ export async function loginAsTestUser(request: APIRequestContext): Promise { + try { + return await mintTestToken(request); + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + if (msg.includes("(404)") || msg.includes("(405)")) { + return loginAsTestUser(request); + } + throw err; + } +} + /** * Standard auth headers for backend API calls. Optionally injects an * X-E2E-Scenario header that the test-only ScenarioMiddleware in