mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-04-25 16:56:22 +02:00
- Modified Dockerfile to use placeholder values for frontend environment variables, allowing for runtime substitution. - Enhanced entrypoint script to apply runtime environment variable configuration, replacing placeholders in JavaScript files with actual values. - Updated documentation paths in MDX files for Google OAuth images and added detailed setup guides for Discord, Linear, Notion, and Slack OAuth integrations.
250 lines
7.5 KiB
Text
250 lines
7.5 KiB
Text
# SurfSense All-in-One Docker Image
|
|
# This image bundles PostgreSQL+pgvector, Redis, Backend, and Frontend
|
|
# Usage: docker run -d -p 3000:3000 -p 8000:8000 -v surfsense-data:/data --name surfsense ghcr.io/modsetter/surfsense:latest
|
|
#
|
|
# Included Services (all run locally by default):
|
|
# - PostgreSQL 14 + pgvector (vector database)
|
|
# - Redis (task queue)
|
|
# - Docling (document processing, CPU-only, OCR disabled)
|
|
# - Kokoro TTS (local text-to-speech for podcasts)
|
|
# - Faster-Whisper (local speech-to-text for audio files)
|
|
# - Playwright Chromium (web scraping)
|
|
#
|
|
# Note: This is the CPU-only version. A :cuda tagged image with GPU support
|
|
# will be available in the future for faster AI inference.
|
|
|
|
# ====================
|
|
# Stage 1: Build Frontend
|
|
# ====================
|
|
FROM node:20-alpine AS frontend-builder
|
|
|
|
WORKDIR /app
|
|
|
|
# Install pnpm
|
|
RUN corepack enable pnpm
|
|
|
|
# Copy package files
|
|
COPY surfsense_web/package.json surfsense_web/pnpm-lock.yaml* ./
|
|
COPY surfsense_web/source.config.ts ./
|
|
COPY surfsense_web/content ./content
|
|
|
|
# Install dependencies (skip postinstall which requires all source files)
|
|
RUN pnpm install --frozen-lockfile --ignore-scripts
|
|
|
|
# Copy source
|
|
COPY surfsense_web/ ./
|
|
|
|
# Run fumadocs-mdx postinstall now that source files are available
|
|
RUN pnpm fumadocs-mdx
|
|
|
|
# Build with placeholder values that will be replaced at runtime
|
|
# These unique strings allow runtime substitution via entrypoint script
|
|
ENV NEXT_PUBLIC_FASTAPI_BACKEND_URL=__NEXT_PUBLIC_FASTAPI_BACKEND_URL__
|
|
ENV NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE=__NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE__
|
|
ENV NEXT_PUBLIC_ETL_SERVICE=__NEXT_PUBLIC_ETL_SERVICE__
|
|
|
|
# Build
|
|
RUN pnpm run build
|
|
|
|
# ====================
|
|
# Stage 2: Runtime Image
|
|
# ====================
|
|
FROM ubuntu:22.04 AS runtime
|
|
|
|
# Prevent interactive prompts
|
|
ENV DEBIAN_FRONTEND=noninteractive
|
|
|
|
# Install system dependencies
|
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
# PostgreSQL
|
|
postgresql-14 \
|
|
postgresql-contrib-14 \
|
|
# Build tools for pgvector
|
|
build-essential \
|
|
postgresql-server-dev-14 \
|
|
git \
|
|
# Redis
|
|
redis-server \
|
|
# Node.js prerequisites
|
|
curl \
|
|
ca-certificates \
|
|
gnupg \
|
|
# Backend dependencies
|
|
gcc \
|
|
wget \
|
|
unzip \
|
|
dos2unix \
|
|
# For PPAs
|
|
software-properties-common \
|
|
# ============================
|
|
# Local TTS (Kokoro) dependencies
|
|
# ============================
|
|
espeak-ng \
|
|
libespeak-ng1 \
|
|
# ============================
|
|
# Local STT (Faster-Whisper) dependencies
|
|
# ============================
|
|
ffmpeg \
|
|
# ============================
|
|
# Audio processing (soundfile)
|
|
# ============================
|
|
libsndfile1 \
|
|
# ============================
|
|
# Image/OpenCV dependencies (for Docling)
|
|
# ============================
|
|
libgl1 \
|
|
libglib2.0-0 \
|
|
libsm6 \
|
|
libxext6 \
|
|
libxrender1 \
|
|
# ============================
|
|
# Playwright browser dependencies
|
|
# ============================
|
|
libnspr4 \
|
|
libnss3 \
|
|
libatk1.0-0 \
|
|
libatk-bridge2.0-0 \
|
|
libcups2 \
|
|
libxkbcommon0 \
|
|
libatspi2.0-0 \
|
|
libxcomposite1 \
|
|
libxdamage1 \
|
|
libxrandr2 \
|
|
libgbm1 \
|
|
libcairo2 \
|
|
libpango-1.0-0 \
|
|
&& rm -rf /var/lib/apt/lists/*
|
|
|
|
# Install Node.js 20.x (for running frontend)
|
|
RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash - \
|
|
&& apt-get install -y nodejs \
|
|
&& rm -rf /var/lib/apt/lists/*
|
|
|
|
# Install Python 3.12 from deadsnakes PPA
|
|
RUN add-apt-repository ppa:deadsnakes/ppa -y \
|
|
&& apt-get update \
|
|
&& apt-get install -y --no-install-recommends \
|
|
python3.12 \
|
|
python3.12-venv \
|
|
python3.12-dev \
|
|
&& rm -rf /var/lib/apt/lists/*
|
|
|
|
# Set Python 3.12 as default
|
|
RUN update-alternatives --install /usr/bin/python python /usr/bin/python3.12 1 \
|
|
&& update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.12 1
|
|
|
|
# Install pip for Python 3.12
|
|
RUN python3.12 -m ensurepip --upgrade \
|
|
&& python3.12 -m pip install --upgrade pip
|
|
|
|
# Install supervisor via pip (system package incompatible with Python 3.12)
|
|
RUN pip install --no-cache-dir supervisor
|
|
|
|
# Build and install pgvector
|
|
RUN cd /tmp \
|
|
&& git clone --branch v0.7.4 https://github.com/pgvector/pgvector.git \
|
|
&& cd pgvector \
|
|
&& make \
|
|
&& make install \
|
|
&& rm -rf /tmp/pgvector
|
|
|
|
# Update certificates
|
|
RUN update-ca-certificates
|
|
|
|
# Create data directories
|
|
RUN mkdir -p /data/postgres /data/redis /data/surfsense \
|
|
&& chown -R postgres:postgres /data/postgres
|
|
|
|
# ====================
|
|
# Copy Frontend Build
|
|
# ====================
|
|
WORKDIR /app/frontend
|
|
|
|
# Copy only the standalone build (not node_modules)
|
|
COPY --from=frontend-builder /app/.next/standalone ./
|
|
COPY --from=frontend-builder /app/.next/static ./.next/static
|
|
COPY --from=frontend-builder /app/public ./public
|
|
|
|
# ====================
|
|
# Setup Backend
|
|
# ====================
|
|
WORKDIR /app/backend
|
|
|
|
# Copy backend dependency files
|
|
COPY surfsense_backend/pyproject.toml surfsense_backend/uv.lock ./
|
|
|
|
# Install PyTorch CPU-only (Docling needs it but OCR is disabled, no GPU needed)
|
|
RUN pip install --no-cache-dir torch torchvision --index-url https://download.pytorch.org/whl/cpu
|
|
|
|
# Install python dependencies
|
|
RUN pip install --no-cache-dir certifi pip-system-certs uv \
|
|
&& uv pip install --system --no-cache-dir -e .
|
|
|
|
# Set SSL environment variables
|
|
RUN CERTIFI_PATH=$(python -c "import certifi; print(certifi.where())") \
|
|
&& echo "export SSL_CERT_FILE=$CERTIFI_PATH" >> /etc/profile.d/ssl.sh \
|
|
&& echo "export REQUESTS_CA_BUNDLE=$CERTIFI_PATH" >> /etc/profile.d/ssl.sh
|
|
|
|
# Note: EasyOCR models NOT downloaded - OCR is disabled in docling_service.py
|
|
# GPU support will be added in a future :cuda tagged image
|
|
|
|
# Install Playwright browsers
|
|
RUN pip install --no-cache-dir playwright \
|
|
&& playwright install chromium \
|
|
&& rm -rf /root/.cache/ms-playwright/ffmpeg*
|
|
|
|
# Copy backend source
|
|
COPY surfsense_backend/ ./
|
|
|
|
# ====================
|
|
# Configuration
|
|
# ====================
|
|
WORKDIR /app
|
|
|
|
# Copy supervisor configuration
|
|
COPY scripts/docker/supervisor-allinone.conf /etc/supervisor/conf.d/surfsense.conf
|
|
|
|
# Copy entrypoint script
|
|
COPY scripts/docker/entrypoint-allinone.sh /app/entrypoint.sh
|
|
RUN dos2unix /app/entrypoint.sh && chmod +x /app/entrypoint.sh
|
|
|
|
# PostgreSQL initialization script
|
|
COPY scripts/docker/init-postgres.sh /app/init-postgres.sh
|
|
RUN dos2unix /app/init-postgres.sh && chmod +x /app/init-postgres.sh
|
|
|
|
# Clean up build dependencies to reduce image size
|
|
RUN apt-get purge -y build-essential postgresql-server-dev-14 git \
|
|
&& apt-get autoremove -y \
|
|
&& rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
|
|
|
|
# Environment variables with defaults
|
|
ENV POSTGRES_USER=surfsense
|
|
ENV POSTGRES_PASSWORD=surfsense
|
|
ENV POSTGRES_DB=surfsense
|
|
ENV DATABASE_URL=postgresql+asyncpg://surfsense:surfsense@localhost:5432/surfsense
|
|
ENV CELERY_BROKER_URL=redis://localhost:6379/0
|
|
ENV CELERY_RESULT_BACKEND=redis://localhost:6379/0
|
|
ENV PYTHONPATH=/app/backend
|
|
ENV NEXT_FRONTEND_URL=http://localhost:3000
|
|
ENV AUTH_TYPE=LOCAL
|
|
ENV ETL_SERVICE=DOCLING
|
|
ENV EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2
|
|
|
|
# Frontend configuration (can be overridden at runtime)
|
|
# These are injected into the Next.js build at container startup
|
|
ENV NEXT_PUBLIC_FASTAPI_BACKEND_URL=http://localhost:8000
|
|
ENV NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE=LOCAL
|
|
ENV NEXT_PUBLIC_ETL_SERVICE=DOCLING
|
|
|
|
# Data volume
|
|
VOLUME ["/data"]
|
|
|
|
# Expose ports
|
|
EXPOSE 3000 8000
|
|
|
|
# Health check
|
|
HEALTHCHECK --interval=30s --timeout=10s --start-period=120s --retries=3 \
|
|
CMD curl -f http://localhost:3000 || exit 1
|
|
|
|
# Run entrypoint
|
|
CMD ["/app/entrypoint.sh"]
|