SurfSense/surfsense_backend/Dockerfile
DESKTOP-RTLN3BA\$punk 338dd8d282 feat: add health check endpoint and improve rate limiting
- Introduced a health check endpoint to monitor API responsiveness.
- Updated SlowAPI limiter to increase default rate limits and added in-memory fallback for Redis unavailability.
- Implemented a timeout for seeding Surfsense documentation to prevent startup delays.
2026-02-16 23:18:29 -08:00

110 lines
No EOL
4.2 KiB
Docker
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

FROM python:3.12-slim
WORKDIR /app
# Install system dependencies including SSL tools, CUDA dependencies, and Tesseract OCR
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc \
python3-dev \
ca-certificates \
curl \
wget \
unzip \
gnupg2 \
espeak-ng \
libsndfile1 \
libgl1 \
libglib2.0-0 \
libsm6 \
libxext6 \
libxrender1 \
dos2unix \
git \
&& rm -rf /var/lib/apt/lists/*
# Install Pandoc 3.x from GitHub as a fallback for Linux where pypandoc_binary
# may not bundle pandoc (apt ships 2.17 which has broken table rendering).
# pypandoc_binary bundles pandoc on Windows/macOS; on Linux it picks this up.
RUN ARCH=$(dpkg --print-architecture) && \
wget -qO /tmp/pandoc.deb "https://github.com/jgm/pandoc/releases/download/3.9/pandoc-3.9-1-${ARCH}.deb" && \
dpkg -i /tmp/pandoc.deb && \
rm /tmp/pandoc.deb
# Update certificates and install SSL tools
RUN update-ca-certificates
RUN pip install --upgrade certifi pip-system-certs
# Copy requirements
COPY pyproject.toml .
COPY uv.lock .
# Install PyTorch based on architecture
RUN if [ "$(uname -m)" = "x86_64" ]; then \
pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121; \
else \
pip install --no-cache-dir torch torchvision torchaudio; \
fi
# Install python dependencies
RUN pip install --no-cache-dir uv && \
uv pip install --system --no-cache-dir -e .
# Set SSL environment variables dynamically
RUN CERTIFI_PATH=$(python -c "import certifi; print(certifi.where())") && \
echo "Setting SSL_CERT_FILE to $CERTIFI_PATH" && \
echo "export SSL_CERT_FILE=$CERTIFI_PATH" >> /root/.bashrc && \
echo "export REQUESTS_CA_BUNDLE=$CERTIFI_PATH" >> /root/.bashrc
ENV SSL_CERT_FILE=/usr/local/lib/python3.12/site-packages/certifi/cacert.pem
ENV REQUESTS_CA_BUNDLE=/usr/local/lib/python3.12/site-packages/certifi/cacert.pem
# Pre-download EasyOCR models to avoid runtime SSL issues
RUN mkdir -p /root/.EasyOCR/model
RUN wget --no-check-certificate https://github.com/JaidedAI/EasyOCR/releases/download/v1.3/english_g2.zip -O /root/.EasyOCR/model/english_g2.zip || true
RUN wget --no-check-certificate https://github.com/JaidedAI/EasyOCR/releases/download/pre-v1.1.6/craft_mlt_25k.zip -O /root/.EasyOCR/model/craft_mlt_25k.zip || true
RUN cd /root/.EasyOCR/model && (unzip -o english_g2.zip || true) && (unzip -o craft_mlt_25k.zip || true)
# Pre-download Docling models
RUN python -c "try:\n from docling.document_converter import DocumentConverter\n conv = DocumentConverter()\nexcept:\n pass" || true
# Install Playwright browsers for web scraping if needed
RUN pip install playwright && \
playwright install chromium --with-deps
# Copy source code
COPY . .
# Copy and set permissions for entrypoint script
# Use dos2unix to ensure LF line endings (fixes CRLF issues from Windows checkouts)
COPY scripts/docker/entrypoint.sh /app/scripts/docker/entrypoint.sh
RUN dos2unix /app/scripts/docker/entrypoint.sh && chmod +x /app/scripts/docker/entrypoint.sh
# Shared temp directory for file uploads between API and Worker containers.
# Python's tempfile module uses TMPDIR, so uploaded files land here.
# Mount the SAME volume at /shared_tmp on both API and Worker in Coolify.
RUN mkdir -p /shared_tmp
ENV TMPDIR=/shared_tmp
# Prevent uvloop compatibility issues
ENV PYTHONPATH=/app
ENV UVICORN_LOOP=asyncio
# SERVICE_ROLE controls which process this container runs:
# api FastAPI backend only (runs migrations on startup)
# worker Celery worker only
# beat Celery beat scheduler only
# all All three (legacy / dev default)
ENV SERVICE_ROLE=all
# Celery worker tuning (only used when SERVICE_ROLE=worker or all)
ENV CELERY_MAX_WORKERS=10
ENV CELERY_MIN_WORKERS=2
ENV CELERY_MAX_TASKS_PER_CHILD=50
# CELERY_QUEUES: comma-separated queues to consume (empty = all queues)
# "surfsense" fast tasks only (file uploads, podcasts, etc.)
# "surfsense.connectors" slow connector indexing tasks only
# "" both queues (default, for single-worker setups)
ENV CELERY_QUEUES=""
# Run
EXPOSE 8000-8001
CMD ["/app/scripts/docker/entrypoint.sh"]