mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-17 18:35:19 +02:00
chore: implement E2E testing setup with Docker Compose and update workflow for backend and Redis services
This commit is contained in:
parent
2c8828f60c
commit
68f45335bc
9 changed files with 433 additions and 233 deletions
|
|
@ -1,8 +1,23 @@
|
|||
FROM python:3.12-slim
|
||||
# =============================================================================
|
||||
# SurfSense Backend — Multi-stage Dockerfile
|
||||
# =============================================================================
|
||||
# Stages:
|
||||
# base — system deps + Pandoc 3.x
|
||||
# deps — Python deps frozen from uv.lock (no dev deps)
|
||||
# models — pre-baked offline assets (EasyOCR, Docling, Playwright)
|
||||
# e2e — adds tests/ via additional_contexts, swaps entrypoint
|
||||
# production — production runtime (LAST stage = default `docker build` target)
|
||||
#
|
||||
# IMPORTANT: `production` MUST remain the last stage. .github/workflows/docker-build.yml
|
||||
# builds without `target:` and BuildKit defaults to the last stage. Reordering will
|
||||
# silently break ghcr.io/modsetter/surfsense-backend.
|
||||
# =============================================================================
|
||||
|
||||
# ─── Stage 1: base (system deps, Pandoc, certificates) ──────────────────────
|
||||
FROM python:3.12-slim AS base
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Install system dependencies including SSL tools, CUDA dependencies, and Tesseract OCR
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
gcc \
|
||||
python3-dev \
|
||||
|
|
@ -22,21 +37,24 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
|
|||
git \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Install Pandoc 3.x from GitHub as a fallback for Linux where pypandoc_binary
|
||||
# may not bundle pandoc (apt ships 2.17 which has broken table rendering).
|
||||
# pypandoc_binary bundles pandoc on Windows/macOS; on Linux it picks this up.
|
||||
# Pandoc 3.x from GitHub Releases — apt ships 2.17 which has broken table rendering.
|
||||
# pypandoc_binary bundles pandoc on Windows/macOS; on Linux it picks up this binary.
|
||||
RUN ARCH=$(dpkg --print-architecture) && \
|
||||
wget -qO /tmp/pandoc.deb "https://github.com/jgm/pandoc/releases/download/3.9/pandoc-3.9-1-${ARCH}.deb" && \
|
||||
dpkg -i /tmp/pandoc.deb && \
|
||||
rm /tmp/pandoc.deb
|
||||
|
||||
# Update certificates and install SSL tools
|
||||
RUN update-ca-certificates
|
||||
RUN pip install --upgrade certifi pip-system-certs
|
||||
|
||||
# Copy requirements
|
||||
COPY pyproject.toml .
|
||||
COPY uv.lock .
|
||||
ENV SSL_CERT_FILE=/usr/local/lib/python3.12/site-packages/certifi/cacert.pem
|
||||
ENV REQUESTS_CA_BUNDLE=/usr/local/lib/python3.12/site-packages/certifi/cacert.pem
|
||||
|
||||
|
||||
# ─── Stage 2: deps (Python deps frozen from uv.lock) ────────────────────────
|
||||
FROM base AS deps
|
||||
|
||||
COPY pyproject.toml uv.lock ./
|
||||
|
||||
# Install all Python dependencies from uv.lock for deterministic builds.
|
||||
#
|
||||
|
|
@ -49,9 +67,7 @@ COPY uv.lock .
|
|||
# Note on torch/CUDA: we do NOT install torch from a separate cu* index here.
|
||||
# PyPI's torch wheels for Linux x86_64 already ship CUDA-enabled and pull
|
||||
# nvidia-cudnn-cu13, nvidia-nccl-cu13, triton, etc. as install deps (all
|
||||
# captured in uv.lock). Installing from cu121 first only wasted ~2GB of
|
||||
# downloads that the lock-based install immediately replaced. If a specific
|
||||
# CUDA version is needed (driver compatibility, etc.), wire it through
|
||||
# captured in uv.lock). If a specific CUDA version is needed, wire it through
|
||||
# [tool.uv.sources] in pyproject.toml so the lock stays the source of truth.
|
||||
RUN pip install --no-cache-dir uv && \
|
||||
uv export --frozen --no-dev --no-hashes --no-emit-project \
|
||||
|
|
@ -59,49 +75,32 @@ RUN pip install --no-cache-dir uv && \
|
|||
uv pip install --system --no-cache-dir -r /tmp/requirements.txt && \
|
||||
rm /tmp/requirements.txt
|
||||
|
||||
# Set SSL environment variables dynamically
|
||||
RUN CERTIFI_PATH=$(python -c "import certifi; print(certifi.where())") && \
|
||||
echo "Setting SSL_CERT_FILE to $CERTIFI_PATH" && \
|
||||
echo "export SSL_CERT_FILE=$CERTIFI_PATH" >> /root/.bashrc && \
|
||||
echo "export REQUESTS_CA_BUNDLE=$CERTIFI_PATH" >> /root/.bashrc
|
||||
ENV SSL_CERT_FILE=/usr/local/lib/python3.12/site-packages/certifi/cacert.pem
|
||||
ENV REQUESTS_CA_BUNDLE=/usr/local/lib/python3.12/site-packages/certifi/cacert.pem
|
||||
|
||||
# ─── Stage 3: models (pre-baked offline assets) ─────────────────────────────
|
||||
FROM deps AS models
|
||||
|
||||
# Pre-download EasyOCR models to avoid runtime SSL issues
|
||||
RUN mkdir -p /root/.EasyOCR/model
|
||||
RUN wget --no-check-certificate https://github.com/JaidedAI/EasyOCR/releases/download/v1.3/english_g2.zip -O /root/.EasyOCR/model/english_g2.zip || true
|
||||
RUN wget --no-check-certificate https://github.com/JaidedAI/EasyOCR/releases/download/pre-v1.1.6/craft_mlt_25k.zip -O /root/.EasyOCR/model/craft_mlt_25k.zip || true
|
||||
RUN cd /root/.EasyOCR/model && (unzip -o english_g2.zip || true) && (unzip -o craft_mlt_25k.zip || true)
|
||||
RUN mkdir -p /root/.EasyOCR/model && \
|
||||
wget --no-check-certificate https://github.com/JaidedAI/EasyOCR/releases/download/v1.3/english_g2.zip -O /root/.EasyOCR/model/english_g2.zip || true && \
|
||||
wget --no-check-certificate https://github.com/JaidedAI/EasyOCR/releases/download/pre-v1.1.6/craft_mlt_25k.zip -O /root/.EasyOCR/model/craft_mlt_25k.zip || true && \
|
||||
cd /root/.EasyOCR/model && \
|
||||
(unzip -o english_g2.zip || true) && \
|
||||
(unzip -o craft_mlt_25k.zip || true)
|
||||
|
||||
# Pre-download Docling models
|
||||
RUN python -c "try:\n from docling.document_converter import DocumentConverter\n conv = DocumentConverter()\nexcept:\n pass" || true
|
||||
|
||||
# Install Playwright browsers for web scraping (the playwright package itself
|
||||
# is already installed via uv.lock above)
|
||||
# Install Playwright browsers (the playwright python package itself is in deps)
|
||||
RUN playwright install chromium --with-deps
|
||||
|
||||
# Copy source code
|
||||
COPY . .
|
||||
|
||||
# Install the project itself in editable mode. Dependencies were already
|
||||
# installed deterministically from uv.lock above, so --no-deps prevents any
|
||||
# re-resolution that could pull newer versions.
|
||||
RUN uv pip install --system --no-cache-dir --no-deps -e .
|
||||
|
||||
# Copy and set permissions for entrypoint script
|
||||
# Use dos2unix to ensure LF line endings (fixes CRLF issues from Windows checkouts)
|
||||
COPY scripts/docker/entrypoint.sh /app/scripts/docker/entrypoint.sh
|
||||
RUN dos2unix /app/scripts/docker/entrypoint.sh && chmod +x /app/scripts/docker/entrypoint.sh
|
||||
|
||||
# Shared temp directory for file uploads between API and Worker containers.
|
||||
# Python's tempfile module uses TMPDIR, so uploaded files land here.
|
||||
# Mount the SAME volume at /shared_tmp on both API and Worker in Coolify.
|
||||
RUN mkdir -p /shared_tmp
|
||||
ENV TMPDIR=/shared_tmp
|
||||
|
||||
# Prevent uvloop compatibility issues
|
||||
ENV PYTHONPATH=/app
|
||||
ENV UVICORN_LOOP=asyncio
|
||||
ENV TMPDIR=/shared_tmp
|
||||
|
||||
# Tune glibc malloc to return freed memory to the OS more aggressively.
|
||||
# Without these, Python's gc.collect() frees objects but the underlying
|
||||
|
|
@ -110,6 +109,58 @@ ENV MALLOC_MMAP_THRESHOLD_=65536
|
|||
ENV MALLOC_TRIM_THRESHOLD_=131072
|
||||
ENV MALLOC_MMAP_MAX_=65536
|
||||
|
||||
|
||||
# ─── Stage 4: e2e (production source + tests/ + e2e entrypoint) ─────────────
|
||||
# Built via `docker buildx build --target e2e`. The default build target is
|
||||
# `production` (the last stage), so this stage is opt-in for CI only.
|
||||
#
|
||||
# `tests/` is excluded from the main build context by .dockerignore (so prod
|
||||
# can never accidentally ship test fakes). The e2e stage receives tests/
|
||||
# through an "additional context" passed by docker-compose.e2e.yml — see
|
||||
# https://docs.docker.com/reference/compose-file/build/#additional_contexts
|
||||
FROM models AS e2e
|
||||
|
||||
# Same source copy as production. .dockerignore filters out tests/.
|
||||
COPY . .
|
||||
|
||||
# Bring tests/ in via the named additional build context. CI passes
|
||||
# --build-context tests-source=./tests
|
||||
# (or the equivalent additional_contexts entry in docker-compose.e2e.yml).
|
||||
COPY --from=tests-source . ./tests/
|
||||
|
||||
# Install the project itself in editable mode. Dependencies were already
|
||||
# installed deterministically from uv.lock above, so --no-deps prevents any
|
||||
# re-resolution that could pull newer versions.
|
||||
RUN uv pip install --system --no-cache-dir --no-deps -e .
|
||||
|
||||
COPY scripts/docker/entrypoint.e2e.sh /app/scripts/docker/entrypoint.e2e.sh
|
||||
RUN dos2unix /app/scripts/docker/entrypoint.e2e.sh && chmod +x /app/scripts/docker/entrypoint.e2e.sh
|
||||
|
||||
# SERVICE_ROLE is overridden per service in docker-compose.e2e.yml (api / worker).
|
||||
ENV SERVICE_ROLE=api
|
||||
|
||||
EXPOSE 8000-8001
|
||||
CMD ["/app/scripts/docker/entrypoint.e2e.sh"]
|
||||
|
||||
|
||||
# ─── Stage 5: production (LAST stage — default `docker build` target) ───────
|
||||
# Behavior is byte-identical to the previous single-stage Dockerfile.
|
||||
# .github/workflows/docker-build.yml builds without `target:` and BuildKit
|
||||
# defaults to the last stage, so this MUST stay last.
|
||||
FROM models AS production
|
||||
|
||||
# Copy source code (tests/ excluded by .dockerignore — production never ships tests).
|
||||
COPY . .
|
||||
|
||||
# Install the project itself in editable mode. Dependencies were already
|
||||
# installed deterministically from uv.lock above, so --no-deps prevents any
|
||||
# re-resolution that could pull newer versions.
|
||||
RUN uv pip install --system --no-cache-dir --no-deps -e .
|
||||
|
||||
# Use dos2unix to ensure LF line endings (fixes CRLF issues from Windows checkouts)
|
||||
COPY scripts/docker/entrypoint.sh /app/scripts/docker/entrypoint.sh
|
||||
RUN dos2unix /app/scripts/docker/entrypoint.sh && chmod +x /app/scripts/docker/entrypoint.sh
|
||||
|
||||
# SERVICE_ROLE controls which process this container runs:
|
||||
# api – FastAPI backend only (runs migrations on startup)
|
||||
# worker – Celery worker only
|
||||
|
|
@ -127,6 +178,5 @@ ENV CELERY_MAX_TASKS_PER_CHILD=50
|
|||
# "" – both queues (default, for single-worker setups)
|
||||
ENV CELERY_QUEUES=""
|
||||
|
||||
# Run
|
||||
EXPOSE 8000-8001
|
||||
CMD ["/app/scripts/docker/entrypoint.sh"]
|
||||
CMD ["/app/scripts/docker/entrypoint.sh"]
|
||||
|
|
|
|||
52
surfsense_backend/scripts/docker/entrypoint.e2e.sh
Executable file
52
surfsense_backend/scripts/docker/entrypoint.e2e.sh
Executable file
|
|
@ -0,0 +1,52 @@
|
|||
#!/bin/bash
|
||||
# =============================================================================
|
||||
# E2E entrypoint for the multi-stage Dockerfile's `e2e` target.
|
||||
#
|
||||
# Dispatches on SERVICE_ROLE to the test-only entrypoints under tests/e2e/.
|
||||
# Those scripts apply sys.modules hijacks and LLM/embedding patches BEFORE
|
||||
# importing production app code (see tests/e2e/run_backend.py for rationale).
|
||||
#
|
||||
# Production never sees this file: tests/ is excluded from the production
|
||||
# stage, and the production stage uses scripts/docker/entrypoint.sh.
|
||||
# =============================================================================
|
||||
set -euo pipefail
|
||||
|
||||
SERVICE_ROLE="${SERVICE_ROLE:-api}"
|
||||
echo "[e2e-entrypoint] starting role=${SERVICE_ROLE}"
|
||||
|
||||
wait_for_db() {
|
||||
# Block until the database is reachable. We don't loop forever — Compose
|
||||
# depends_on/healthchecks already gate on db readiness, this is just
|
||||
# belt-and-suspenders so a slow first connection doesn't race migrations.
|
||||
for i in {1..60}; do
|
||||
if python -c "from app.db import engine; import asyncio; asyncio.run(engine.dispose())" 2>/dev/null; then
|
||||
echo "[e2e-entrypoint] db reachable after ${i} attempts"
|
||||
return 0
|
||||
fi
|
||||
sleep 1
|
||||
done
|
||||
echo "[e2e-entrypoint] ERROR: db not reachable after 60s" >&2
|
||||
return 1
|
||||
}
|
||||
|
||||
case "${SERVICE_ROLE}" in
|
||||
api)
|
||||
wait_for_db
|
||||
echo "[e2e-entrypoint] running alembic upgrade head"
|
||||
alembic upgrade head
|
||||
# `exec` so SIGTERM from `docker stop` reaches Python directly,
|
||||
# without a shell wrapper interposing.
|
||||
exec python tests/e2e/run_backend.py
|
||||
;;
|
||||
worker)
|
||||
# Worker doesn't run migrations — the api role does that exactly once.
|
||||
# We still wait for db so Celery's broker connection check doesn't
|
||||
# race against an unready Postgres on cold start.
|
||||
wait_for_db
|
||||
exec python tests/e2e/run_celery.py
|
||||
;;
|
||||
*)
|
||||
echo "[e2e-entrypoint] ERROR: unknown SERVICE_ROLE='${SERVICE_ROLE}' (expected: api | worker)" >&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
|
@ -57,6 +57,29 @@ sys.modules["notion_client.errors"] = _fake_notion.errors
|
|||
from dotenv import load_dotenv # noqa: E402
|
||||
|
||||
load_dotenv()
|
||||
|
||||
os.environ.setdefault(
|
||||
"DATABASE_URL",
|
||||
"postgresql+asyncpg://postgres:postgres@localhost:5432/surfsense",
|
||||
)
|
||||
os.environ.setdefault("CELERY_BROKER_URL", "redis://localhost:6379/0")
|
||||
os.environ.setdefault("CELERY_RESULT_BACKEND", "redis://localhost:6379/0")
|
||||
os.environ.setdefault("REDIS_APP_URL", "redis://localhost:6379/0")
|
||||
os.environ.setdefault("CELERY_TASK_DEFAULT_QUEUE", "surfsense")
|
||||
os.environ.setdefault("SECRET_KEY", "local-e2e-secret-not-for-production")
|
||||
os.environ.setdefault("AUTH_TYPE", "LOCAL")
|
||||
os.environ.setdefault("REGISTRATION_ENABLED", "TRUE")
|
||||
os.environ.setdefault("ETL_SERVICE", "DOCLING")
|
||||
os.environ.setdefault("EMBEDDING_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
|
||||
os.environ.setdefault("NEXT_FRONTEND_URL", "http://localhost:3000")
|
||||
|
||||
# Sentinel keys — fakes never read them; turns leaked real calls into 401s.
|
||||
os.environ.setdefault("COMPOSIO_API_KEY", "local-deny-real-call-sentinel")
|
||||
os.environ.setdefault("COMPOSIO_ENABLED", "TRUE")
|
||||
os.environ.setdefault("OPENAI_API_KEY", "local-deny-real-call-sentinel")
|
||||
os.environ.setdefault("ANTHROPIC_API_KEY", "local-deny-real-call-sentinel")
|
||||
os.environ.setdefault("LITELLM_API_KEY", "local-deny-real-call-sentinel")
|
||||
|
||||
os.environ.setdefault("ATLASSIAN_CLIENT_ID", "fake-atlassian-client-id")
|
||||
os.environ.setdefault("ATLASSIAN_CLIENT_SECRET", "fake-atlassian-client-secret")
|
||||
os.environ.setdefault(
|
||||
|
|
|
|||
|
|
@ -44,6 +44,29 @@ sys.modules["notion_client.errors"] = _fake_notion.errors
|
|||
from dotenv import load_dotenv # noqa: E402
|
||||
|
||||
load_dotenv()
|
||||
|
||||
os.environ.setdefault(
|
||||
"DATABASE_URL",
|
||||
"postgresql+asyncpg://postgres:postgres@localhost:5432/surfsense",
|
||||
)
|
||||
os.environ.setdefault("CELERY_BROKER_URL", "redis://localhost:6379/0")
|
||||
os.environ.setdefault("CELERY_RESULT_BACKEND", "redis://localhost:6379/0")
|
||||
os.environ.setdefault("REDIS_APP_URL", "redis://localhost:6379/0")
|
||||
os.environ.setdefault("CELERY_TASK_DEFAULT_QUEUE", "surfsense")
|
||||
os.environ.setdefault("SECRET_KEY", "local-e2e-secret-not-for-production")
|
||||
os.environ.setdefault("AUTH_TYPE", "LOCAL")
|
||||
os.environ.setdefault("REGISTRATION_ENABLED", "TRUE")
|
||||
os.environ.setdefault("ETL_SERVICE", "DOCLING")
|
||||
os.environ.setdefault("EMBEDDING_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
|
||||
os.environ.setdefault("NEXT_FRONTEND_URL", "http://localhost:3000")
|
||||
|
||||
# Sentinel keys — fakes never read them; turns leaked real calls into 401s.
|
||||
os.environ.setdefault("COMPOSIO_API_KEY", "local-deny-real-call-sentinel")
|
||||
os.environ.setdefault("COMPOSIO_ENABLED", "TRUE")
|
||||
os.environ.setdefault("OPENAI_API_KEY", "local-deny-real-call-sentinel")
|
||||
os.environ.setdefault("ANTHROPIC_API_KEY", "local-deny-real-call-sentinel")
|
||||
os.environ.setdefault("LITELLM_API_KEY", "local-deny-real-call-sentinel")
|
||||
|
||||
os.environ.setdefault("ATLASSIAN_CLIENT_ID", "fake-atlassian-client-id")
|
||||
os.environ.setdefault("ATLASSIAN_CLIENT_SECRET", "fake-atlassian-client-secret")
|
||||
os.environ.setdefault(
|
||||
|
|
@ -198,12 +221,19 @@ def _main() -> None:
|
|||
# so Drive indexing tasks are picked up).
|
||||
queue_name = os.getenv("CELERY_TASK_DEFAULT_QUEUE", "surfsense")
|
||||
queues = f"{queue_name},{queue_name}.connectors"
|
||||
|
||||
# macOS forks-after-MPS-init crash prefork workers; threads avoid it.
|
||||
default_pool = "threads" if sys.platform == "darwin" else "prefork"
|
||||
pool = os.getenv("CELERY_POOL", default_pool)
|
||||
concurrency = os.getenv("CELERY_CONCURRENCY", "2")
|
||||
|
||||
celery_app.worker_main(
|
||||
argv=[
|
||||
"worker",
|
||||
"--loglevel=info",
|
||||
f"--queues={queues}",
|
||||
"--concurrency=2",
|
||||
f"--pool={pool}",
|
||||
f"--concurrency={concurrency}",
|
||||
"--without-gossip",
|
||||
"--without-mingle",
|
||||
]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue