From 5b0d2f82e6af7c91e51c9185d217d3fd5aea6fb8 Mon Sep 17 00:00:00 2001 From: "DESKTOP-RTLN3BA\\$punk" Date: Mon, 8 Dec 2025 20:45:20 -0800 Subject: [PATCH] try: docker all in one image --- .dockerignore | 97 ++++++++++ .github/workflows/docker-publish.yml | 75 -------- .github/workflows/docker_build.yaml | 92 ++------- Dockerfile.allinone | 180 ++++++++++++++++++ README.md | 68 ++++++- docker-compose.quickstart.yml | 82 ++++++++ scripts/docker/entrypoint-allinone.sh | 115 +++++++++++ scripts/docker/init-postgres.sh | 54 ++++++ scripts/docker/supervisor-allinone.conf | 94 +++++++++ .../content/docs/docker-installation.mdx | 130 ++++++++++++- 10 files changed, 823 insertions(+), 164 deletions(-) create mode 100644 .dockerignore delete mode 100644 .github/workflows/docker-publish.yml create mode 100644 Dockerfile.allinone create mode 100644 docker-compose.quickstart.yml create mode 100644 scripts/docker/entrypoint-allinone.sh create mode 100644 scripts/docker/init-postgres.sh create mode 100644 scripts/docker/supervisor-allinone.conf diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 000000000..207ef0d4b --- /dev/null +++ b/.dockerignore @@ -0,0 +1,97 @@ +# Git +.git +.gitignore +.gitattributes + +# Documentation +*.md +!README.md +docs/ +CONTRIBUTING.md +CODE_OF_CONDUCT.md +LICENSE + +# IDE +.vscode/ +.idea/ +*.swp +*.swo +.cursor/ + +# Node +**/node_modules/ +**/.next/ +**/dist/ +**/.turbo/ +**/.cache/ +**/coverage/ + +# Python +**/__pycache__/ +**/*.pyc +**/*.pyo +**/*.pyd +**/.Python +**/build/ +**/develop-eggs/ +**/downloads/ +**/eggs/ +**/.eggs/ +**/lib/ +**/lib64/ +**/parts/ +**/sdist/ +**/var/ +**/wheels/ +**/*.egg-info/ +**/.installed.cfg +**/*.egg +**/pip-log.txt +**/.tox/ +**/.coverage +**/htmlcov/ +**/.pytest_cache/ +**/nosetests.xml +**/coverage.xml + +# Environment +**/.env +**/.env.* +!**/.env.example +**/*.local + +# Docker +**/Dockerfile +**/docker-compose*.yml +**/.docker/ + +# Testing +**/tests/ +**/test/ +**/__tests__/ +**/*.test.* +**/*.spec.* + +# Logs +**/*.log +**/logs/ + +# Temporary files +**/tmp/ +**/temp/ +**/.tmp/ +**/.temp/ + +# Build artifacts from backend +surfsense_backend/podcasts/ +surfsense_backend/temp_audio/ +surfsense_backend/*.bak +surfsense_backend/*.dat +surfsense_backend/*.dir + +# GitHub +.github/ + +# Browser extension (not needed for main deployment) +surfsense_browser_extension/ + diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml deleted file mode 100644 index a391ba83c..000000000 --- a/.github/workflows/docker-publish.yml +++ /dev/null @@ -1,75 +0,0 @@ -name: Docker Publish - -on: - workflow_dispatch: - -jobs: - # build_and_push_backend: - # runs-on: ubuntu-latest - # permissions: - # contents: read - # packages: write - # steps: - # - name: Checkout repository - # uses: actions/checkout@v4 - - # - name: Set up QEMU - # uses: docker/setup-qemu-action@v3 - - # - name: Set up Docker Buildx - # uses: docker/setup-buildx-action@v3 - - # - name: Log in to GitHub Container Registry - # uses: docker/login-action@v3 - # with: - # registry: ghcr.io - # username: ${{ github.actor }} - # password: ${{ secrets.GITHUB_TOKEN }} - - # - name: Build and push backend image - # uses: docker/build-push-action@v5 - # with: - # context: ./surfsense_backend - # file: ./surfsense_backend/Dockerfile - # push: true - # tags: ghcr.io/${{ github.repository_owner }}/surfsense_backend:${{ github.sha }} - # platforms: linux/amd64,linux/arm64 - # labels: | - # org.opencontainers.image.source=${{ github.repositoryUrl }} - # org.opencontainers.image.created=${{ fromJSON(steps.meta.outputs.json).labels['org.opencontainers.image.created'] }} - # org.opencontainers.image.revision=${{ github.sha }} - - build_and_push_frontend: - runs-on: ubuntu-latest - permissions: - contents: read - packages: write - steps: - - name: Checkout repository - uses: actions/checkout@v4 - - - name: Set up QEMU - uses: docker/setup-qemu-action@v3 - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 - - - name: Log in to GitHub Container Registry - uses: docker/login-action@v3 - with: - registry: ghcr.io - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - - name: Build and push frontend image - uses: docker/build-push-action@v5 - with: - context: ./surfsense_web - file: ./surfsense_web/Dockerfile - push: true - tags: ghcr.io/${{ github.repository_owner }}/surfsense_web:${{ github.sha }} - platforms: linux/amd64,linux/arm64 - labels: | - org.opencontainers.image.source=${{ github.repositoryUrl }} - org.opencontainers.image.created=${{ fromJSON(steps.meta.outputs.json).labels['org.opencontainers.image.created'] }} - org.opencontainers.image.revision=${{ github.sha }} diff --git a/.github/workflows/docker_build.yaml b/.github/workflows/docker_build.yaml index de0ecfe02..2f4d39808 100644 --- a/.github/workflows/docker_build.yaml +++ b/.github/workflows/docker_build.yaml @@ -18,39 +18,30 @@ on: default: '' permissions: - contents: write # Needed for pushing tags - packages: write # Needed for pushing docker images to GHCR + contents: write + packages: write jobs: tag_release: runs-on: ubuntu-latest outputs: - # Define output to pass the tag to the next job new_tag: ${{ steps.tag_version.outputs.next_version }} steps: - name: Checkout code uses: actions/checkout@v4 with: - # Fetch all history and tags to find the latest SemVer tag fetch-depth: 0 - # Checkout the specific branch if provided, otherwise default ref: ${{ github.event.inputs.branch }} - # Token needed to push tags back token: ${{ secrets.GITHUB_TOKEN }} - name: Get latest SemVer tag and calculate next version id: tag_version run: | - # Fetch all tags from remote just in case git fetch --tags - - # Get the latest SemVer tag (handles vX.Y.Z pattern) - # Filters tags, sorts them version-aware, takes the last one LATEST_TAG=$(git tag --list 'v[0-9]*.[0-9]*.[0-9]*' --sort='v:refname' | tail -n 1) if [ -z "$LATEST_TAG" ]; then echo "No previous SemVer tag found. Starting with v0.1.0" - # Determine initial version based on bump type (optional, v0.1.0 is often fine) case "${{ github.event.inputs.bump_type }}" in patch|minor) NEXT_VERSION="v0.1.0" @@ -58,22 +49,18 @@ jobs: major) NEXT_VERSION="v1.0.0" ;; - *) # Should not happen due to 'choice' input, but good practice + *) echo "Invalid bump type: ${{ github.event.inputs.bump_type }}" exit 1 ;; esac else echo "Latest tag found: $LATEST_TAG" - # Remove 'v' prefix for calculation VERSION=${LATEST_TAG#v} - - # Split into parts MAJOR=$(echo $VERSION | cut -d. -f1) MINOR=$(echo $VERSION | cut -d. -f2) PATCH=$(echo $VERSION | cut -d. -f3) - # Bump version based on input case "${{ github.event.inputs.bump_type }}" in patch) PATCH=$((PATCH + 1)) @@ -96,12 +83,10 @@ jobs: fi echo "Calculated next version: $NEXT_VERSION" - # Set output for subsequent steps echo "next_version=$NEXT_VERSION" >> $GITHUB_OUTPUT - name: Create and Push Tag run: | - # Configure Git user identity for annotated tag (FIX) git config --global user.name 'github-actions[bot]' git config --global user.email 'github-actions[bot]@users.noreply.github.com' @@ -109,74 +94,23 @@ jobs: COMMIT_SHA=$(git rev-parse HEAD) echo "Tagging commit $COMMIT_SHA with $NEXT_TAG" - # Create an annotated tag (recommended) - this requires user.name/email git tag -a "$NEXT_TAG" -m "Release $NEXT_TAG" - - # Push the tag to the remote repository echo "Pushing tag $NEXT_TAG to origin" git push origin "$NEXT_TAG" - name: Verify Tag Push run: | echo "Checking if tag ${{ steps.tag_version.outputs.next_version }} exists remotely..." - # Give remote a second to update sleep 5 git ls-remote --tags origin | grep "refs/tags/${{ steps.tag_version.outputs.next_version }}" || (echo "Tag push verification failed!" && exit 1) echo "Tag successfully pushed." - - # build_and_push_backend_image: - # runs-on: ubuntu-latest - # needs: tag_release # Depends on the tag being created successfully - # permissions: - # packages: write # Need permission to write to GHCR - # contents: read # Need permission to read repo contents (checkout) - # steps: - # - name: Checkout code - # uses: actions/checkout@v4 - - # - name: Login to GitHub Container Registry - # uses: docker/login-action@v3 - # with: - # registry: ghcr.io - # username: ${{ github.repository_owner }} - # password: ${{ secrets.GITHUB_TOKEN }} - - # - name: Set up QEMU - # uses: docker/setup-qemu-action@v3 - - # - name: Set up Docker Buildx - # uses: docker/setup-buildx-action@v3 - - # - name: Extract metadata (tags, labels) for Docker build - # id: meta - # uses: docker/metadata-action@v5 - # with: - # images: ghcr.io/${{ github.repository_owner }}/surfsense_backend - # tags: | - # # Use the tag generated in the previous job - # type=raw,value=${{ needs.tag_release.outputs.new_tag }} - # # Optionally add 'latest' tag if building from the default branch - # type=raw,value=latest,enable=${{ github.ref == format('refs/heads/{0}', github.event.repository.default_branch) || github.event.inputs.branch == github.event.repository.default_branch }} - - # - name: Build and push surfsense backend - # uses: docker/build-push-action@v5 - # with: - # context: ./surfsense_backend - # push: true - # tags: ${{ steps.meta.outputs.tags }} - # labels: ${{ steps.meta.outputs.labels }} - # platforms: linux/amd64,linux/arm64 - # # Optional: Add build cache for faster builds - # cache-from: type=gha - # cache-to: type=gha,mode=max - - build_and_push_ui_image: + build_and_push: runs-on: ubuntu-latest - needs: tag_release # Depends on the tag being created successfully + needs: tag_release permissions: - packages: write # Need permission to write to GHCR - contents: read # Need permission to read repo contents (checkout) + packages: write + contents: read steps: - name: Checkout code @@ -195,25 +129,23 @@ jobs: - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 - - name: Extract metadata (tags, labels) for Docker build + - name: Extract metadata for Docker id: meta uses: docker/metadata-action@v5 with: - images: ghcr.io/${{ github.repository_owner }}/surfsense_ui + images: ghcr.io/${{ github.repository_owner }}/surfsense tags: | - # Use the tag generated in the previous job type=raw,value=${{ needs.tag_release.outputs.new_tag }} - # Optionally add 'latest' tag if building from the default branch type=raw,value=latest,enable=${{ github.ref == format('refs/heads/{0}', github.event.repository.default_branch) || github.event.inputs.branch == github.event.repository.default_branch }} - - name: Build and push surfsense UI image + - name: Build and push SurfSense image uses: docker/build-push-action@v5 with: - context: ./surfsense_web + context: . + file: ./Dockerfile.allinone push: true tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} platforms: linux/amd64,linux/arm64 - # Optional: Add build cache for faster builds cache-from: type=gha cache-to: type=gha,mode=max diff --git a/Dockerfile.allinone b/Dockerfile.allinone new file mode 100644 index 000000000..2fe62a86b --- /dev/null +++ b/Dockerfile.allinone @@ -0,0 +1,180 @@ +# SurfSense All-in-One Docker Image +# This image bundles PostgreSQL+pgvector, Redis, Backend, and Frontend +# Usage: docker run -d -p 3000:3000 -v surfsense-data:/data --name surfsense ghcr.io/modsetter/surfsense:latest + +FROM ubuntu:22.04 AS base + +# Prevent interactive prompts during package installation +ENV DEBIAN_FRONTEND=noninteractive + +# Install system dependencies +RUN apt-get update && apt-get install -y --no-install-recommends \ + # PostgreSQL dependencies + postgresql-14 \ + postgresql-contrib-14 \ + # Build tools for pgvector + build-essential \ + postgresql-server-dev-14 \ + git \ + # Redis + redis-server \ + # Python + python3.11 \ + python3.11-venv \ + python3.11-dev \ + python3-pip \ + # Node.js + curl \ + ca-certificates \ + gnupg \ + # Supervisor for process management + supervisor \ + # Additional dependencies for backend + gcc \ + wget \ + unzip \ + espeak-ng \ + libsndfile1 \ + libgl1 \ + libglib2.0-0 \ + libsm6 \ + libxext6 \ + libxrender1 \ + dos2unix \ + && rm -rf /var/lib/apt/lists/* + +# Install Node.js 20.x +RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash - \ + && apt-get install -y nodejs \ + && npm install -g pnpm \ + && rm -rf /var/lib/apt/lists/* + +# Build and install pgvector +RUN cd /tmp \ + && git clone --branch v0.7.4 https://github.com/pgvector/pgvector.git \ + && cd pgvector \ + && make \ + && make install \ + && rm -rf /tmp/pgvector + +# Set Python 3.11 as default +RUN update-alternatives --install /usr/bin/python python /usr/bin/python3.11 1 \ + && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.11 1 + +# Update certificates and install SSL tools +RUN update-ca-certificates + +# Create data directories +RUN mkdir -p /data/postgres /data/redis /data/surfsense \ + && chown -R postgres:postgres /data/postgres + +# ==================== +# Build Frontend +# ==================== +WORKDIR /app/frontend + +# Copy frontend source +COPY surfsense_web/package.json surfsense_web/pnpm-lock.yaml* ./ +COPY surfsense_web/source.config.ts ./ +COPY surfsense_web/content ./content + +# Install frontend dependencies +RUN pnpm install --frozen-lockfile + +# Copy rest of frontend +COPY surfsense_web/ ./ + +# Build frontend with default values (can be overridden at runtime via reverse proxy) +ARG NEXT_PUBLIC_FASTAPI_BACKEND_URL=http://localhost:8000 +ARG NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE=LOCAL +ARG NEXT_PUBLIC_ETL_SERVICE=DOCLING + +ENV NEXT_PUBLIC_FASTAPI_BACKEND_URL=$NEXT_PUBLIC_FASTAPI_BACKEND_URL +ENV NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE=$NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE +ENV NEXT_PUBLIC_ETL_SERVICE=$NEXT_PUBLIC_ETL_SERVICE + +RUN pnpm run build + +# ==================== +# Setup Backend +# ==================== +WORKDIR /app/backend + +# Copy backend source +COPY surfsense_backend/pyproject.toml surfsense_backend/uv.lock ./ + +# Install PyTorch based on architecture +RUN if [ "$(uname -m)" = "x86_64" ]; then \ + pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu; \ + else \ + pip install --no-cache-dir torch torchvision torchaudio; \ + fi + +# Install python dependencies +RUN pip install --no-cache-dir certifi pip-system-certs uv \ + && uv pip install --system --no-cache-dir -e . + +# Set SSL environment variables +RUN CERTIFI_PATH=$(python -c "import certifi; print(certifi.where())") \ + && echo "export SSL_CERT_FILE=$CERTIFI_PATH" >> /etc/profile.d/ssl.sh \ + && echo "export REQUESTS_CA_BUNDLE=$CERTIFI_PATH" >> /etc/profile.d/ssl.sh + +# Pre-download EasyOCR models +RUN mkdir -p /root/.EasyOCR/model \ + && wget --no-check-certificate https://github.com/JaidedAI/EasyOCR/releases/download/v1.3/english_g2.zip -O /root/.EasyOCR/model/english_g2.zip || true \ + && wget --no-check-certificate https://github.com/JaidedAI/EasyOCR/releases/download/pre-v1.1.6/craft_mlt_25k.zip -O /root/.EasyOCR/model/craft_mlt_25k.zip || true \ + && cd /root/.EasyOCR/model && (unzip -o english_g2.zip || true) && (unzip -o craft_mlt_25k.zip || true) + +# Pre-download Docling models +RUN python -c "try:\n from docling.document_converter import DocumentConverter\n conv = DocumentConverter()\nexcept:\n pass" || true + +# Install Playwright browsers +RUN pip install playwright && playwright install chromium + +# Copy backend source +COPY surfsense_backend/ ./ + +# ==================== +# Configuration +# ==================== +WORKDIR /app + +# Copy supervisor configuration +COPY scripts/docker/supervisor-allinone.conf /etc/supervisor/conf.d/surfsense.conf + +# Copy entrypoint script +COPY scripts/docker/entrypoint-allinone.sh /app/entrypoint.sh +RUN chmod +x /app/entrypoint.sh + +# PostgreSQL initialization script +COPY scripts/docker/init-postgres.sh /app/init-postgres.sh +RUN chmod +x /app/init-postgres.sh + +# Environment variables with defaults +ENV POSTGRES_USER=surfsense +ENV POSTGRES_PASSWORD=surfsense +ENV POSTGRES_DB=surfsense +ENV DATABASE_URL=postgresql+asyncpg://surfsense:surfsense@localhost:5432/surfsense +ENV CELERY_BROKER_URL=redis://localhost:6379/0 +ENV CELERY_RESULT_BACKEND=redis://localhost:6379/0 +ENV PYTHONPATH=/app/backend +ENV NEXT_FRONTEND_URL=http://localhost:3000 +ENV AUTH_TYPE=LOCAL +ENV ETL_SERVICE=DOCLING +ENV EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2 + +# Data volume +VOLUME ["/data"] + +# Expose ports +# 3000 - Frontend +# 8000 - Backend API +EXPOSE 3000 8000 + +# Health check +HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \ + CMD curl -f http://localhost:3000 && curl -f http://localhost:8000/docs || exit 1 + +# Run entrypoint +CMD ["/app/entrypoint.sh"] + diff --git a/README.md b/README.md index f97a5813f..b717cacbe 100644 --- a/README.md +++ b/README.md @@ -150,32 +150,84 @@ Check out our public roadmap and contribute your ideas or feedback: ## How to get started? +### Quick Start with Docker 🐳 + +> [!TIP] +> For production deployments, use the full [Docker Compose setup](https://www.surfsense.net/docs/docker-installation) which offers more control and scalability. + +**Quick Start :** + +```bash +docker run -d -p 3000:3000 -p 8000:8000 \ + -v surfsense-data:/data \ + -e SECRET_KEY=$(openssl rand -hex 32) \ + --name surfsense \ + --restart unless-stopped \ + ghcr.io/modsetter/surfsense:latest +``` + +**With Custom Embedding Model (e.g., OpenAI):** + +```bash +docker run -d -p 3000:3000 -p 8000:8000 \ + -v surfsense-data:/data \ + -e SECRET_KEY=$(openssl rand -hex 32) \ + -e EMBEDDING_MODEL=openai://text-embedding-ada-002 \ + -e OPENAI_API_KEY=your_openai_api_key \ + --name surfsense \ + --restart unless-stopped \ + ghcr.io/modsetter/surfsense:latest +``` + +**Using Docker Compose (Recommended for easier management):** + +```bash +# Download the quick start compose file +curl -o docker-compose.yml https://raw.githubusercontent.com/MODSetter/SurfSense/main/docker-compose.quickstart.yml + +# Create .env file with your secret key +echo "SECRET_KEY=$(openssl rand -hex 32)" > .env + +# Start SurfSense +docker compose up -d +``` + +After starting, access SurfSense at: +- **Frontend**: [http://localhost:3000](http://localhost:3000) +- **Backend API**: [http://localhost:8000](http://localhost:8000) +- **API Docs**: [http://localhost:8000/docs](http://localhost:8000/docs) + ### Installation Options -SurfSense provides three options to get started: +SurfSense provides multiple options to get started: 1. **[SurfSense Cloud](https://www.surfsense.com/login)** - The easiest way to try SurfSense without any setup. - No installation required - Instant access to all features - Perfect for getting started quickly -2. **[Docker Installation (Recommended for Self-Hosting)](https://www.surfsense.net/docs/docker-installation)** - Easy way to get SurfSense up and running with all dependencies containerized. +2. **Quick Start Docker (Above)** - Single command to get SurfSense running locally. + - All-in-one image with PostgreSQL, Redis, and all services bundled + - Perfect for evaluation, development, and small deployments + - Data persisted via Docker volume + +3. **[Docker Compose (Production)](https://www.surfsense.net/docs/docker-installation)** - Full stack deployment with separate services. - Includes pgAdmin for database management through a web UI - Supports environment variable customization via `.env` file - Flexible deployment options (full stack or core services only) - - No need to manually edit configuration files between environments + - Better for production with separate scaling of services -3. **[Manual Installation](https://www.surfsense.net/docs/manual-installation)** - For users who prefer more control over their setup or need to customize their deployment. +4. **[Manual Installation](https://www.surfsense.net/docs/manual-installation)** - For users who prefer more control over their setup or need to customize their deployment. Docker and manual installation guides include detailed OS-specific instructions for Windows, macOS, and Linux. Before self-hosting installation, make sure to complete the [prerequisite setup steps](https://www.surfsense.net/docs/) including: -- Auth setup -- **File Processing ETL Service** (choose one): +- Auth setup (optional - defaults to LOCAL auth) +- **File Processing ETL Service** (optional - defaults to Docling): + - Docling (default, local processing, no API key required, supports PDF, Office docs, images, HTML, CSV) - Unstructured.io API key (supports 34+ formats) - LlamaIndex API key (enhanced parsing, supports 50+ formats) - - Docling (local processing, no API key required, supports PDF, Office docs, images, HTML, CSV) -- Other required API keys +- Other API keys as needed for your use case ## Screenshots diff --git a/docker-compose.quickstart.yml b/docker-compose.quickstart.yml new file mode 100644 index 000000000..012388335 --- /dev/null +++ b/docker-compose.quickstart.yml @@ -0,0 +1,82 @@ +# SurfSense Quick Start Docker Compose +# +# This is a simplified docker-compose for quick local deployment using pre-built images. +# For production or customized deployments, use the main docker-compose.yml +# +# Usage: +# 1. Create a .env file with your required configuration (see below) +# 2. Run: docker compose -f docker-compose.quickstart.yml up -d +# 3. Access SurfSense at http://localhost:3000 +# +# Required Environment Variables: +# - SECRET_KEY: JWT secret key (generate with: openssl rand -hex 32) +# +# Optional Environment Variables: +# - EMBEDDING_MODEL: Embedding model to use (default: sentence-transformers/all-MiniLM-L6-v2) +# - ETL_SERVICE: Document parsing service - DOCLING, UNSTRUCTURED, or LLAMACLOUD (default: DOCLING) +# - TTS_SERVICE: Text-to-speech service for podcasts (default: local/kokoro) +# - STT_SERVICE: Speech-to-text service (default: local/base) +# - FIRECRAWL_API_KEY: For web crawling features + +version: "3.8" + +services: + # All-in-one SurfSense container + surfsense: + image: ghcr.io/modsetter/surfsense:latest + container_name: surfsense + ports: + - "${FRONTEND_PORT:-3000}:3000" + - "${BACKEND_PORT:-8000}:8000" + volumes: + - surfsense-data:/data + environment: + # Required + - SECRET_KEY=${SECRET_KEY:-change-me-in-production} + + # Auth Configuration + - AUTH_TYPE=${AUTH_TYPE:-LOCAL} + - GOOGLE_OAUTH_CLIENT_ID=${GOOGLE_OAUTH_CLIENT_ID:-} + - GOOGLE_OAUTH_CLIENT_SECRET=${GOOGLE_OAUTH_CLIENT_SECRET:-} + + # AI/ML Configuration + - EMBEDDING_MODEL=${EMBEDDING_MODEL:-sentence-transformers/all-MiniLM-L6-v2} + - RERANKERS_ENABLED=${RERANKERS_ENABLED:-FALSE} + - RERANKERS_MODEL_NAME=${RERANKERS_MODEL_NAME:-} + - RERANKERS_MODEL_TYPE=${RERANKERS_MODEL_TYPE:-} + + # Document Processing + - ETL_SERVICE=${ETL_SERVICE:-DOCLING} + - UNSTRUCTURED_API_KEY=${UNSTRUCTURED_API_KEY:-} + - LLAMA_CLOUD_API_KEY=${LLAMA_CLOUD_API_KEY:-} + + # Audio Services + - TTS_SERVICE=${TTS_SERVICE:-local/kokoro} + - TTS_SERVICE_API_KEY=${TTS_SERVICE_API_KEY:-} + - STT_SERVICE=${STT_SERVICE:-local/base} + - STT_SERVICE_API_KEY=${STT_SERVICE_API_KEY:-} + + # Web Crawling + - FIRECRAWL_API_KEY=${FIRECRAWL_API_KEY:-} + + # Optional Features + - REGISTRATION_ENABLED=${REGISTRATION_ENABLED:-TRUE} + - SCHEDULE_CHECKER_INTERVAL=${SCHEDULE_CHECKER_INTERVAL:-1m} + + # LangSmith Observability (optional) + - LANGSMITH_TRACING=${LANGSMITH_TRACING:-false} + - LANGSMITH_ENDPOINT=${LANGSMITH_ENDPOINT:-} + - LANGSMITH_API_KEY=${LANGSMITH_API_KEY:-} + - LANGSMITH_PROJECT=${LANGSMITH_PROJECT:-} + restart: unless-stopped + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:3000", "&&", "curl", "-f", "http://localhost:8000/docs"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 120s + +volumes: + surfsense-data: + name: surfsense-data + diff --git a/scripts/docker/entrypoint-allinone.sh b/scripts/docker/entrypoint-allinone.sh new file mode 100644 index 000000000..0df2555c2 --- /dev/null +++ b/scripts/docker/entrypoint-allinone.sh @@ -0,0 +1,115 @@ +#!/bin/bash +set -e + +echo "===========================================" +echo " 🏄 SurfSense All-in-One Container" +echo "===========================================" + +# Create log directory +mkdir -p /var/log/supervisor + +# ================================================ +# Initialize PostgreSQL if needed +# ================================================ +if [ ! -f /data/postgres/PG_VERSION ]; then + echo "📦 Initializing PostgreSQL database..." + + # Initialize PostgreSQL data directory + chown -R postgres:postgres /data/postgres + chmod 700 /data/postgres + + su - postgres -c "/usr/lib/postgresql/14/bin/initdb -D /data/postgres" + + # Configure PostgreSQL for connections + echo "host all all 0.0.0.0/0 md5" >> /data/postgres/pg_hba.conf + echo "local all all trust" >> /data/postgres/pg_hba.conf + echo "listen_addresses='*'" >> /data/postgres/postgresql.conf + + # Start PostgreSQL temporarily to create database and user + su - postgres -c "/usr/lib/postgresql/14/bin/pg_ctl -D /data/postgres -l /tmp/postgres_init.log start" + + # Wait for PostgreSQL to be ready + sleep 5 + + # Create user and database + su - postgres -c "psql -c \"CREATE USER ${POSTGRES_USER:-surfsense} WITH PASSWORD '${POSTGRES_PASSWORD:-surfsense}' SUPERUSER;\"" + su - postgres -c "psql -c \"CREATE DATABASE ${POSTGRES_DB:-surfsense} OWNER ${POSTGRES_USER:-surfsense};\"" + + # Enable pgvector extension + su - postgres -c "psql -d ${POSTGRES_DB:-surfsense} -c 'CREATE EXTENSION IF NOT EXISTS vector;'" + + # Stop temporary PostgreSQL + su - postgres -c "/usr/lib/postgresql/14/bin/pg_ctl -D /data/postgres stop" + + echo "✅ PostgreSQL initialized successfully" +else + echo "✅ PostgreSQL data directory already exists" +fi + +# ================================================ +# Initialize Redis data directory +# ================================================ +mkdir -p /data/redis +chmod 755 /data/redis +echo "✅ Redis data directory ready" + +# ================================================ +# Copy frontend build to runtime location +# ================================================ +if [ -d /app/frontend/.next/standalone ]; then + cp -r /app/frontend/.next/standalone/* /app/frontend/ 2>/dev/null || true + cp -r /app/frontend/.next/static /app/frontend/.next/static 2>/dev/null || true +fi + +# ================================================ +# Run database migrations +# ================================================ +run_migrations() { + echo "🔄 Running database migrations..." + + # Start PostgreSQL temporarily for migrations + su - postgres -c "/usr/lib/postgresql/14/bin/pg_ctl -D /data/postgres -l /tmp/postgres_migrate.log start" + sleep 5 + + # Start Redis temporarily for migrations (some might need it) + redis-server --dir /data/redis --daemonize yes + sleep 2 + + # Run alembic migrations + cd /app/backend + alembic upgrade head || echo "⚠️ Migrations may have already been applied" + + # Stop temporary services + redis-cli shutdown || true + su - postgres -c "/usr/lib/postgresql/14/bin/pg_ctl -D /data/postgres stop" + + echo "✅ Database migrations complete" +} + +# Run migrations on first start or when explicitly requested +if [ ! -f /data/.migrations_run ] || [ "${FORCE_MIGRATIONS:-false}" = "true" ]; then + run_migrations + touch /data/.migrations_run +fi + +# ================================================ +# Environment Variables Info +# ================================================ +echo "" +echo "===========================================" +echo " 📋 Configuration" +echo "===========================================" +echo " Frontend URL: http://localhost:3000" +echo " Backend API: http://localhost:8000" +echo " API Docs: http://localhost:8000/docs" +echo " Auth Type: ${AUTH_TYPE:-LOCAL}" +echo " ETL Service: ${ETL_SERVICE:-DOCLING}" +echo "===========================================" +echo "" + +# ================================================ +# Start Supervisor (manages all services) +# ================================================ +echo "🚀 Starting all services..." +exec /usr/bin/supervisord -c /etc/supervisor/conf.d/surfsense.conf + diff --git a/scripts/docker/init-postgres.sh b/scripts/docker/init-postgres.sh new file mode 100644 index 000000000..3d2a15f46 --- /dev/null +++ b/scripts/docker/init-postgres.sh @@ -0,0 +1,54 @@ +#!/bin/bash +# PostgreSQL initialization script for SurfSense +# This script is called during container startup if the database needs initialization + +set -e + +PGDATA=${PGDATA:-/data/postgres} +POSTGRES_USER=${POSTGRES_USER:-surfsense} +POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-surfsense} +POSTGRES_DB=${POSTGRES_DB:-surfsense} + +echo "Initializing PostgreSQL..." + +# Check if PostgreSQL is already initialized +if [ -f "$PGDATA/PG_VERSION" ]; then + echo "PostgreSQL data directory already exists. Skipping initialization." + exit 0 +fi + +# Initialize the database cluster +/usr/lib/postgresql/14/bin/initdb -D "$PGDATA" --username=postgres + +# Configure PostgreSQL +cat >> "$PGDATA/postgresql.conf" << EOF +listen_addresses = '*' +max_connections = 100 +shared_buffers = 128MB +EOF + +cat >> "$PGDATA/pg_hba.conf" << EOF +# Allow connections from anywhere with password +host all all 0.0.0.0/0 md5 +host all all ::0/0 md5 +EOF + +# Start PostgreSQL temporarily +/usr/lib/postgresql/14/bin/pg_ctl -D "$PGDATA" -l /tmp/postgres_init.log start + +# Wait for PostgreSQL to start +sleep 3 + +# Create user and database +psql -U postgres << EOF +CREATE USER $POSTGRES_USER WITH PASSWORD '$POSTGRES_PASSWORD' SUPERUSER; +CREATE DATABASE $POSTGRES_DB OWNER $POSTGRES_USER; +\c $POSTGRES_DB +CREATE EXTENSION IF NOT EXISTS vector; +EOF + +echo "PostgreSQL initialized successfully." + +# Stop PostgreSQL (supervisor will start it) +/usr/lib/postgresql/14/bin/pg_ctl -D "$PGDATA" stop + diff --git a/scripts/docker/supervisor-allinone.conf b/scripts/docker/supervisor-allinone.conf new file mode 100644 index 000000000..15685592a --- /dev/null +++ b/scripts/docker/supervisor-allinone.conf @@ -0,0 +1,94 @@ +[supervisord] +nodaemon=true +logfile=/var/log/supervisor/supervisord.log +pidfile=/var/run/supervisord.pid +childlogdir=/var/log/supervisor +user=root + +[unix_http_server] +file=/var/run/supervisor.sock +chmod=0700 + +[rpcinterface:supervisor] +supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface + +[supervisorctl] +serverurl=unix:///var/run/supervisor.sock + +# PostgreSQL +[program:postgresql] +command=/usr/lib/postgresql/14/bin/postgres -D /data/postgres +user=postgres +autostart=true +autorestart=true +priority=10 +stdout_logfile=/var/log/supervisor/postgresql.log +stderr_logfile=/var/log/supervisor/postgresql-error.log +environment=PGDATA="/data/postgres" + +# Redis +[program:redis] +command=/usr/bin/redis-server --dir /data/redis --appendonly yes +autostart=true +autorestart=true +priority=20 +stdout_logfile=/var/log/supervisor/redis.log +stderr_logfile=/var/log/supervisor/redis-error.log + +# Backend API +[program:backend] +command=python main.py +directory=/app/backend +autostart=true +autorestart=true +priority=30 +startsecs=10 +startretries=3 +stdout_logfile=/var/log/supervisor/backend.log +stderr_logfile=/var/log/supervisor/backend-error.log +environment=PYTHONPATH="/app/backend",UVICORN_LOOP="asyncio",UNSTRUCTURED_HAS_PATCHED_LOOP="1" + +# Celery Worker +[program:celery-worker] +command=celery -A app.celery_app worker --loglevel=info --concurrency=2 --pool=solo +directory=/app/backend +autostart=true +autorestart=true +priority=40 +startsecs=15 +startretries=3 +stdout_logfile=/var/log/supervisor/celery-worker.log +stderr_logfile=/var/log/supervisor/celery-worker-error.log +environment=PYTHONPATH="/app/backend" + +# Celery Beat (scheduler) +[program:celery-beat] +command=celery -A app.celery_app beat --loglevel=info +directory=/app/backend +autostart=true +autorestart=true +priority=50 +startsecs=20 +startretries=3 +stdout_logfile=/var/log/supervisor/celery-beat.log +stderr_logfile=/var/log/supervisor/celery-beat-error.log +environment=PYTHONPATH="/app/backend" + +# Frontend +[program:frontend] +command=node server.js +directory=/app/frontend +autostart=true +autorestart=true +priority=60 +startsecs=5 +startretries=3 +stdout_logfile=/var/log/supervisor/frontend.log +stderr_logfile=/var/log/supervisor/frontend-error.log +environment=NODE_ENV="production",PORT="3000",HOSTNAME="0.0.0.0" + +# Process Groups +[group:surfsense] +programs=postgresql,redis,backend,celery-worker,celery-beat,frontend +priority=999 + diff --git a/surfsense_web/content/docs/docker-installation.mdx b/surfsense_web/content/docs/docker-installation.mdx index 46ef4128b..e4ae03e92 100644 --- a/surfsense_web/content/docs/docker-installation.mdx +++ b/surfsense_web/content/docs/docker-installation.mdx @@ -8,7 +8,135 @@ full: true # Docker Installation -This guide explains how to run SurfSense using Docker Compose, which is the preferred and recommended method for deployment. +This guide explains how to run SurfSense using Docker, with options ranging from quick single-command deployment to full production setups. + +## Quick Start with Docker 🐳 + +Get SurfSense running in seconds with a single command: + + +The all-in-one Docker image bundles PostgreSQL (with pgvector), Redis, and all SurfSense services. Perfect for quick evaluation and development. + + + +Make sure to include the `-v surfsense-data:/data` in your Docker command. This ensures your database and files are properly persisted. + + +### One-Line Installation + +**Linux/macOS:** + +```bash +docker run -d -p 3000:3000 -p 8000:8000 \ + -v surfsense-data:/data \ + -e SECRET_KEY=$(openssl rand -hex 32) \ + --name surfsense \ + --restart unless-stopped \ + ghcr.io/modsetter/surfsense:latest +``` + +**Windows (PowerShell):** + +```powershell +$secretKey = -join ((48..57) + (65..90) + (97..122) | Get-Random -Count 32 | ForEach-Object {[char]$_}) +docker run -d -p 3000:3000 -p 8000:8000 ` + -v surfsense-data:/data ` + -e SECRET_KEY=$secretKey ` + --name surfsense ` + --restart unless-stopped ` + ghcr.io/modsetter/surfsense:latest +``` + +### With Custom Configuration + +**Using OpenAI Embeddings:** + +```bash +docker run -d -p 3000:3000 -p 8000:8000 \ + -v surfsense-data:/data \ + -e SECRET_KEY=$(openssl rand -hex 32) \ + -e EMBEDDING_MODEL=openai://text-embedding-ada-002 \ + -e OPENAI_API_KEY=your_openai_api_key \ + --name surfsense \ + --restart unless-stopped \ + ghcr.io/modsetter/surfsense:latest +``` + +**With Google OAuth:** + +```bash +docker run -d -p 3000:3000 -p 8000:8000 \ + -v surfsense-data:/data \ + -e SECRET_KEY=$(openssl rand -hex 32) \ + -e AUTH_TYPE=GOOGLE \ + -e GOOGLE_OAUTH_CLIENT_ID=your_client_id \ + -e GOOGLE_OAUTH_CLIENT_SECRET=your_client_secret \ + --name surfsense \ + --restart unless-stopped \ + ghcr.io/modsetter/surfsense:latest +``` + +### Quick Start with Docker Compose + +For easier management with environment files: + +```bash +# Download the quick start compose file +curl -o docker-compose.yml https://raw.githubusercontent.com/MODSetter/SurfSense/main/docker-compose.quickstart.yml + +# Create .env file +cat > .env << EOF +SECRET_KEY=$(openssl rand -hex 32) +# Add other configuration as needed +# EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2 +# ETL_SERVICE=DOCLING +EOF + +# Start SurfSense +docker compose up -d +``` + +After starting, access SurfSense at: +- **Frontend**: [http://localhost:3000](http://localhost:3000) +- **Backend API**: [http://localhost:8000](http://localhost:8000) +- **API Docs**: [http://localhost:8000/docs](http://localhost:8000/docs) + +### Quick Start Environment Variables + +| Variable | Description | Default | +|----------|-------------|---------| +| SECRET_KEY | JWT secret key (required) | - | +| AUTH_TYPE | Authentication: `LOCAL` or `GOOGLE` | LOCAL | +| EMBEDDING_MODEL | Model for embeddings | sentence-transformers/all-MiniLM-L6-v2 | +| ETL_SERVICE | Document parser: `DOCLING`, `UNSTRUCTURED`, `LLAMACLOUD` | DOCLING | +| TTS_SERVICE | Text-to-speech for podcasts | local/kokoro | +| STT_SERVICE | Speech-to-text for audio | local/base | +| REGISTRATION_ENABLED | Allow new user registration | TRUE | + +### Useful Commands + +```bash +# View logs +docker logs -f surfsense + +# Stop SurfSense +docker stop surfsense + +# Start SurfSense +docker start surfsense + +# Remove container (data preserved in volume) +docker rm surfsense + +# Remove container AND data +docker rm surfsense && docker volume rm surfsense-data +``` + +--- + +## Full Docker Compose Setup (Production) + +For production deployments with separate services and more control, use the full Docker Compose setup below. ## Prerequisites