mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-04-25 08:46:22 +02:00
- Introduced a health check endpoint to monitor API responsiveness. - Updated SlowAPI limiter to increase default rate limits and added in-memory fallback for Redis unavailability. - Implemented a timeout for seeding Surfsense documentation to prevent startup delays.
110 lines
No EOL
4.2 KiB
Docker
110 lines
No EOL
4.2 KiB
Docker
FROM python:3.12-slim
|
||
|
||
WORKDIR /app
|
||
|
||
# Install system dependencies including SSL tools, CUDA dependencies, and Tesseract OCR
|
||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||
gcc \
|
||
python3-dev \
|
||
ca-certificates \
|
||
curl \
|
||
wget \
|
||
unzip \
|
||
gnupg2 \
|
||
espeak-ng \
|
||
libsndfile1 \
|
||
libgl1 \
|
||
libglib2.0-0 \
|
||
libsm6 \
|
||
libxext6 \
|
||
libxrender1 \
|
||
dos2unix \
|
||
git \
|
||
&& rm -rf /var/lib/apt/lists/*
|
||
|
||
# Install Pandoc 3.x from GitHub as a fallback for Linux where pypandoc_binary
|
||
# may not bundle pandoc (apt ships 2.17 which has broken table rendering).
|
||
# pypandoc_binary bundles pandoc on Windows/macOS; on Linux it picks this up.
|
||
RUN ARCH=$(dpkg --print-architecture) && \
|
||
wget -qO /tmp/pandoc.deb "https://github.com/jgm/pandoc/releases/download/3.9/pandoc-3.9-1-${ARCH}.deb" && \
|
||
dpkg -i /tmp/pandoc.deb && \
|
||
rm /tmp/pandoc.deb
|
||
|
||
# Update certificates and install SSL tools
|
||
RUN update-ca-certificates
|
||
RUN pip install --upgrade certifi pip-system-certs
|
||
|
||
# Copy requirements
|
||
COPY pyproject.toml .
|
||
COPY uv.lock .
|
||
|
||
# Install PyTorch based on architecture
|
||
RUN if [ "$(uname -m)" = "x86_64" ]; then \
|
||
pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121; \
|
||
else \
|
||
pip install --no-cache-dir torch torchvision torchaudio; \
|
||
fi
|
||
|
||
# Install python dependencies
|
||
RUN pip install --no-cache-dir uv && \
|
||
uv pip install --system --no-cache-dir -e .
|
||
|
||
# Set SSL environment variables dynamically
|
||
RUN CERTIFI_PATH=$(python -c "import certifi; print(certifi.where())") && \
|
||
echo "Setting SSL_CERT_FILE to $CERTIFI_PATH" && \
|
||
echo "export SSL_CERT_FILE=$CERTIFI_PATH" >> /root/.bashrc && \
|
||
echo "export REQUESTS_CA_BUNDLE=$CERTIFI_PATH" >> /root/.bashrc
|
||
ENV SSL_CERT_FILE=/usr/local/lib/python3.12/site-packages/certifi/cacert.pem
|
||
ENV REQUESTS_CA_BUNDLE=/usr/local/lib/python3.12/site-packages/certifi/cacert.pem
|
||
|
||
# Pre-download EasyOCR models to avoid runtime SSL issues
|
||
RUN mkdir -p /root/.EasyOCR/model
|
||
RUN wget --no-check-certificate https://github.com/JaidedAI/EasyOCR/releases/download/v1.3/english_g2.zip -O /root/.EasyOCR/model/english_g2.zip || true
|
||
RUN wget --no-check-certificate https://github.com/JaidedAI/EasyOCR/releases/download/pre-v1.1.6/craft_mlt_25k.zip -O /root/.EasyOCR/model/craft_mlt_25k.zip || true
|
||
RUN cd /root/.EasyOCR/model && (unzip -o english_g2.zip || true) && (unzip -o craft_mlt_25k.zip || true)
|
||
|
||
# Pre-download Docling models
|
||
RUN python -c "try:\n from docling.document_converter import DocumentConverter\n conv = DocumentConverter()\nexcept:\n pass" || true
|
||
|
||
# Install Playwright browsers for web scraping if needed
|
||
RUN pip install playwright && \
|
||
playwright install chromium --with-deps
|
||
|
||
# Copy source code
|
||
COPY . .
|
||
|
||
# Copy and set permissions for entrypoint script
|
||
# Use dos2unix to ensure LF line endings (fixes CRLF issues from Windows checkouts)
|
||
COPY scripts/docker/entrypoint.sh /app/scripts/docker/entrypoint.sh
|
||
RUN dos2unix /app/scripts/docker/entrypoint.sh && chmod +x /app/scripts/docker/entrypoint.sh
|
||
|
||
# Shared temp directory for file uploads between API and Worker containers.
|
||
# Python's tempfile module uses TMPDIR, so uploaded files land here.
|
||
# Mount the SAME volume at /shared_tmp on both API and Worker in Coolify.
|
||
RUN mkdir -p /shared_tmp
|
||
ENV TMPDIR=/shared_tmp
|
||
|
||
# Prevent uvloop compatibility issues
|
||
ENV PYTHONPATH=/app
|
||
ENV UVICORN_LOOP=asyncio
|
||
|
||
# SERVICE_ROLE controls which process this container runs:
|
||
# api – FastAPI backend only (runs migrations on startup)
|
||
# worker – Celery worker only
|
||
# beat – Celery beat scheduler only
|
||
# all – All three (legacy / dev default)
|
||
ENV SERVICE_ROLE=all
|
||
|
||
# Celery worker tuning (only used when SERVICE_ROLE=worker or all)
|
||
ENV CELERY_MAX_WORKERS=10
|
||
ENV CELERY_MIN_WORKERS=2
|
||
ENV CELERY_MAX_TASKS_PER_CHILD=50
|
||
# CELERY_QUEUES: comma-separated queues to consume (empty = all queues)
|
||
# "surfsense" – fast tasks only (file uploads, podcasts, etc.)
|
||
# "surfsense.connectors" – slow connector indexing tasks only
|
||
# "" – both queues (default, for single-worker setups)
|
||
ENV CELERY_QUEUES=""
|
||
|
||
# Run
|
||
EXPOSE 8000-8001
|
||
CMD ["/app/scripts/docker/entrypoint.sh"] |