# SurfSense All-in-One Docker Image # This image bundles PostgreSQL+pgvector, Redis, Electric SQL, Backend, and Frontend # Usage: docker run -d -p 3000:3000 -p 8000:8000 -p 5133:5133 -v surfsense-data:/data --name surfsense ghcr.io/modsetter/surfsense:latest # # Included Services (all run locally by default): # - PostgreSQL 14 + pgvector (vector database) # - Redis (task queue) # - Electric SQL (real-time sync) # - Docling (document processing, CPU-only, OCR disabled) # - Kokoro TTS (local text-to-speech for podcasts) # - Faster-Whisper (local speech-to-text for audio files) # - Playwright Chromium (web scraping) # # Note: This is the CPU-only version. A :cuda tagged image with GPU support # will be available in the future for faster AI inference. # ==================== # Stage 1: Get Electric SQL Binary # ==================== FROM electricsql/electric:latest AS electric-builder # ==================== # Stage 2: Build Frontend # ==================== FROM node:20-alpine AS frontend-builder WORKDIR /app # Install pnpm RUN corepack enable pnpm # Copy package files COPY surfsense_web/package.json surfsense_web/pnpm-lock.yaml* ./ COPY surfsense_web/source.config.ts ./ COPY surfsense_web/content ./content # Install dependencies (skip postinstall which requires all source files) RUN pnpm install --frozen-lockfile --ignore-scripts # Copy source COPY surfsense_web/ ./ # Run fumadocs-mdx postinstall now that source files are available RUN pnpm fumadocs-mdx # Build with placeholder values that will be replaced at runtime # These unique strings allow runtime substitution via entrypoint script ENV NEXT_PUBLIC_FASTAPI_BACKEND_URL=__NEXT_PUBLIC_FASTAPI_BACKEND_URL__ ENV NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE=__NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE__ ENV NEXT_PUBLIC_ETL_SERVICE=__NEXT_PUBLIC_ETL_SERVICE__ ENV NEXT_PUBLIC_ELECTRIC_URL=__NEXT_PUBLIC_ELECTRIC_URL__ ENV NEXT_PUBLIC_ELECTRIC_AUTH_MODE=__NEXT_PUBLIC_ELECTRIC_AUTH_MODE__ ENV NEXT_PUBLIC_DEPLOYMENT_MODE=__NEXT_PUBLIC_DEPLOYMENT_MODE__ # Build RUN pnpm run build # ==================== # Stage 3: Runtime Image # ==================== FROM ubuntu:22.04 AS runtime # Prevent interactive prompts ENV DEBIAN_FRONTEND=noninteractive # Install system dependencies RUN apt-get update && apt-get install -y --no-install-recommends \ # PostgreSQL postgresql-14 \ postgresql-contrib-14 \ # Build tools for pgvector build-essential \ postgresql-server-dev-14 \ git \ # Redis redis-server \ # Node.js prerequisites curl \ ca-certificates \ gnupg \ # Backend dependencies gcc \ wget \ unzip \ dos2unix \ # For PPAs software-properties-common \ # ============================ # Local TTS (Kokoro) dependencies # ============================ espeak-ng \ libespeak-ng1 \ # ============================ # Local STT (Faster-Whisper) dependencies # ============================ ffmpeg \ # ============================ # Audio processing (soundfile) # ============================ libsndfile1 \ # ============================ # Image/OpenCV dependencies (for Docling) # ============================ libgl1 \ libglib2.0-0 \ libsm6 \ libxext6 \ libxrender1 \ # ============================ # Playwright browser dependencies # ============================ libnspr4 \ libnss3 \ libatk1.0-0 \ libatk-bridge2.0-0 \ libcups2 \ libxkbcommon0 \ libatspi2.0-0 \ libxcomposite1 \ libxdamage1 \ libxrandr2 \ libgbm1 \ libcairo2 \ libpango-1.0-0 \ && rm -rf /var/lib/apt/lists/* # Install Pandoc 3.x from GitHub (apt ships 2.9 which has broken table rendering). RUN ARCH=$(dpkg --print-architecture) && \ wget -qO /tmp/pandoc.deb "https://github.com/jgm/pandoc/releases/download/3.9/pandoc-3.9-1-${ARCH}.deb" && \ dpkg -i /tmp/pandoc.deb && \ rm /tmp/pandoc.deb # Install Node.js 20.x (for running frontend) RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash - \ && apt-get install -y nodejs \ && rm -rf /var/lib/apt/lists/* # Install Python 3.12 from deadsnakes PPA RUN add-apt-repository ppa:deadsnakes/ppa -y \ && apt-get update \ && apt-get install -y --no-install-recommends \ python3.12 \ python3.12-venv \ python3.12-dev \ && rm -rf /var/lib/apt/lists/* # Set Python 3.12 as default RUN update-alternatives --install /usr/bin/python python /usr/bin/python3.12 1 \ && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.12 1 # Install pip for Python 3.12 RUN python3.12 -m ensurepip --upgrade \ && python3.12 -m pip install --upgrade pip # Install supervisor via pip (system package incompatible with Python 3.12) RUN pip install --no-cache-dir supervisor # Build and install pgvector RUN cd /tmp \ && git clone --branch v0.7.4 https://github.com/pgvector/pgvector.git \ && cd pgvector \ && make \ && make install \ && rm -rf /tmp/pgvector # Update certificates RUN update-ca-certificates # Create data directories RUN mkdir -p /data/postgres /data/redis /data/surfsense \ && chown -R postgres:postgres /data/postgres # ==================== # Copy Frontend Build # ==================== WORKDIR /app/frontend # Copy only the standalone build (not node_modules) COPY --from=frontend-builder /app/.next/standalone ./ COPY --from=frontend-builder /app/.next/static ./.next/static COPY --from=frontend-builder /app/public ./public COPY surfsense_web/content/docs /app/surfsense_web/content/docs # ==================== # Copy Electric SQL Release # ==================== COPY --from=electric-builder /app /app/electric-release # ==================== # Setup Backend # ==================== WORKDIR /app/backend # Copy backend dependency files COPY surfsense_backend/pyproject.toml surfsense_backend/uv.lock ./ # Install PyTorch CPU-only (Docling needs it but OCR is disabled, no GPU needed) RUN pip install --no-cache-dir torch torchvision --index-url https://download.pytorch.org/whl/cpu # Install python dependencies RUN pip install --no-cache-dir certifi pip-system-certs uv \ && uv pip install --system --no-cache-dir -e . # Set SSL environment variables RUN CERTIFI_PATH=$(python -c "import certifi; print(certifi.where())") \ && echo "export SSL_CERT_FILE=$CERTIFI_PATH" >> /etc/profile.d/ssl.sh \ && echo "export REQUESTS_CA_BUNDLE=$CERTIFI_PATH" >> /etc/profile.d/ssl.sh # Note: EasyOCR models NOT downloaded - OCR is disabled in docling_service.py # GPU support will be added in a future :cuda tagged image # Install Playwright browsers RUN pip install --no-cache-dir playwright \ && playwright install chromium \ && rm -rf /root/.cache/ms-playwright/ffmpeg* # Copy backend source COPY surfsense_backend/ ./ # ==================== # Configuration # ==================== WORKDIR /app # Copy supervisor configuration COPY scripts/docker/supervisor-allinone.conf /etc/supervisor/conf.d/surfsense.conf # Copy entrypoint script COPY scripts/docker/entrypoint-allinone.sh /app/entrypoint.sh RUN dos2unix /app/entrypoint.sh && chmod +x /app/entrypoint.sh # PostgreSQL initialization script COPY scripts/docker/init-postgres.sh /app/init-postgres.sh RUN dos2unix /app/init-postgres.sh && chmod +x /app/init-postgres.sh # Clean up build dependencies to reduce image size RUN apt-get purge -y build-essential postgresql-server-dev-14 \ && apt-get autoremove -y \ && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* # Environment variables with defaults ENV POSTGRES_USER=surfsense ENV POSTGRES_PASSWORD=surfsense ENV POSTGRES_DB=surfsense ENV DATABASE_URL=postgresql+asyncpg://surfsense:surfsense@localhost:5432/surfsense ENV CELERY_BROKER_URL=redis://localhost:6379/0 ENV CELERY_RESULT_BACKEND=redis://localhost:6379/0 ENV CELERY_TASK_DEFAULT_QUEUE=surfsense ENV PYTHONPATH=/app/backend ENV NEXT_FRONTEND_URL=http://localhost:3000 ENV AUTH_TYPE=LOCAL ENV ETL_SERVICE=DOCLING ENV EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2 # Frontend configuration (can be overridden at runtime) # These are injected into the Next.js build at container startup ENV NEXT_PUBLIC_FASTAPI_BACKEND_URL=http://localhost:8000 ENV NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE=LOCAL ENV NEXT_PUBLIC_ETL_SERVICE=DOCLING # Electric SQL configuration (ELECTRIC_DATABASE_URL is built dynamically by entrypoint from these values) ENV ELECTRIC_DB_USER=electric ENV ELECTRIC_DB_PASSWORD=electric_password # Note: ELECTRIC_DATABASE_URL is NOT set here - entrypoint builds it dynamically from ELECTRIC_DB_USER/PASSWORD ENV ELECTRIC_INSECURE=true ENV ELECTRIC_WRITE_TO_PG_MODE=direct ENV ELECTRIC_PORT=5133 ENV PORT=5133 ENV NEXT_PUBLIC_ELECTRIC_URL=http://localhost:5133 ENV NEXT_PUBLIC_ELECTRIC_AUTH_MODE=insecure # Data volume VOLUME ["/data"] # Expose ports (Frontend: 3000, Backend: 8000, Electric: 5133) EXPOSE 3000 8000 5133 # Health check HEALTHCHECK --interval=30s --timeout=10s --start-period=120s --retries=3 \ CMD curl -f http://localhost:3000 || exit 1 # Run entrypoint CMD ["/app/entrypoint.sh"]