From 5b0d2f82e6af7c91e51c9185d217d3fd5aea6fb8 Mon Sep 17 00:00:00 2001 From: "DESKTOP-RTLN3BA\\$punk" Date: Mon, 8 Dec 2025 20:45:20 -0800 Subject: [PATCH 01/10] try: docker all in one image --- .dockerignore | 97 ++++++++++ .github/workflows/docker-publish.yml | 75 -------- .github/workflows/docker_build.yaml | 92 ++------- Dockerfile.allinone | 180 ++++++++++++++++++ README.md | 68 ++++++- docker-compose.quickstart.yml | 82 ++++++++ scripts/docker/entrypoint-allinone.sh | 115 +++++++++++ scripts/docker/init-postgres.sh | 54 ++++++ scripts/docker/supervisor-allinone.conf | 94 +++++++++ .../content/docs/docker-installation.mdx | 130 ++++++++++++- 10 files changed, 823 insertions(+), 164 deletions(-) create mode 100644 .dockerignore delete mode 100644 .github/workflows/docker-publish.yml create mode 100644 Dockerfile.allinone create mode 100644 docker-compose.quickstart.yml create mode 100644 scripts/docker/entrypoint-allinone.sh create mode 100644 scripts/docker/init-postgres.sh create mode 100644 scripts/docker/supervisor-allinone.conf diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 000000000..207ef0d4b --- /dev/null +++ b/.dockerignore @@ -0,0 +1,97 @@ +# Git +.git +.gitignore +.gitattributes + +# Documentation +*.md +!README.md +docs/ +CONTRIBUTING.md +CODE_OF_CONDUCT.md +LICENSE + +# IDE +.vscode/ +.idea/ +*.swp +*.swo +.cursor/ + +# Node +**/node_modules/ +**/.next/ +**/dist/ +**/.turbo/ +**/.cache/ +**/coverage/ + +# Python +**/__pycache__/ +**/*.pyc +**/*.pyo +**/*.pyd +**/.Python +**/build/ +**/develop-eggs/ +**/downloads/ +**/eggs/ +**/.eggs/ +**/lib/ +**/lib64/ +**/parts/ +**/sdist/ +**/var/ +**/wheels/ +**/*.egg-info/ +**/.installed.cfg +**/*.egg +**/pip-log.txt +**/.tox/ +**/.coverage +**/htmlcov/ +**/.pytest_cache/ +**/nosetests.xml +**/coverage.xml + +# Environment +**/.env +**/.env.* +!**/.env.example +**/*.local + +# Docker +**/Dockerfile +**/docker-compose*.yml +**/.docker/ + +# Testing +**/tests/ +**/test/ +**/__tests__/ +**/*.test.* +**/*.spec.* + +# Logs +**/*.log +**/logs/ + +# Temporary files +**/tmp/ +**/temp/ +**/.tmp/ +**/.temp/ + +# Build artifacts from backend +surfsense_backend/podcasts/ +surfsense_backend/temp_audio/ +surfsense_backend/*.bak +surfsense_backend/*.dat +surfsense_backend/*.dir + +# GitHub +.github/ + +# Browser extension (not needed for main deployment) +surfsense_browser_extension/ + diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml deleted file mode 100644 index a391ba83c..000000000 --- a/.github/workflows/docker-publish.yml +++ /dev/null @@ -1,75 +0,0 @@ -name: Docker Publish - -on: - workflow_dispatch: - -jobs: - # build_and_push_backend: - # runs-on: ubuntu-latest - # permissions: - # contents: read - # packages: write - # steps: - # - name: Checkout repository - # uses: actions/checkout@v4 - - # - name: Set up QEMU - # uses: docker/setup-qemu-action@v3 - - # - name: Set up Docker Buildx - # uses: docker/setup-buildx-action@v3 - - # - name: Log in to GitHub Container Registry - # uses: docker/login-action@v3 - # with: - # registry: ghcr.io - # username: ${{ github.actor }} - # password: ${{ secrets.GITHUB_TOKEN }} - - # - name: Build and push backend image - # uses: docker/build-push-action@v5 - # with: - # context: ./surfsense_backend - # file: ./surfsense_backend/Dockerfile - # push: true - # tags: ghcr.io/${{ github.repository_owner }}/surfsense_backend:${{ github.sha }} - # platforms: linux/amd64,linux/arm64 - # labels: | - # org.opencontainers.image.source=${{ github.repositoryUrl }} - # org.opencontainers.image.created=${{ fromJSON(steps.meta.outputs.json).labels['org.opencontainers.image.created'] }} - # org.opencontainers.image.revision=${{ github.sha }} - - build_and_push_frontend: - runs-on: ubuntu-latest - permissions: - contents: read - packages: write - steps: - - name: Checkout repository - uses: actions/checkout@v4 - - - name: Set up QEMU - uses: docker/setup-qemu-action@v3 - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 - - - name: Log in to GitHub Container Registry - uses: docker/login-action@v3 - with: - registry: ghcr.io - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - - name: Build and push frontend image - uses: docker/build-push-action@v5 - with: - context: ./surfsense_web - file: ./surfsense_web/Dockerfile - push: true - tags: ghcr.io/${{ github.repository_owner }}/surfsense_web:${{ github.sha }} - platforms: linux/amd64,linux/arm64 - labels: | - org.opencontainers.image.source=${{ github.repositoryUrl }} - org.opencontainers.image.created=${{ fromJSON(steps.meta.outputs.json).labels['org.opencontainers.image.created'] }} - org.opencontainers.image.revision=${{ github.sha }} diff --git a/.github/workflows/docker_build.yaml b/.github/workflows/docker_build.yaml index de0ecfe02..2f4d39808 100644 --- a/.github/workflows/docker_build.yaml +++ b/.github/workflows/docker_build.yaml @@ -18,39 +18,30 @@ on: default: '' permissions: - contents: write # Needed for pushing tags - packages: write # Needed for pushing docker images to GHCR + contents: write + packages: write jobs: tag_release: runs-on: ubuntu-latest outputs: - # Define output to pass the tag to the next job new_tag: ${{ steps.tag_version.outputs.next_version }} steps: - name: Checkout code uses: actions/checkout@v4 with: - # Fetch all history and tags to find the latest SemVer tag fetch-depth: 0 - # Checkout the specific branch if provided, otherwise default ref: ${{ github.event.inputs.branch }} - # Token needed to push tags back token: ${{ secrets.GITHUB_TOKEN }} - name: Get latest SemVer tag and calculate next version id: tag_version run: | - # Fetch all tags from remote just in case git fetch --tags - - # Get the latest SemVer tag (handles vX.Y.Z pattern) - # Filters tags, sorts them version-aware, takes the last one LATEST_TAG=$(git tag --list 'v[0-9]*.[0-9]*.[0-9]*' --sort='v:refname' | tail -n 1) if [ -z "$LATEST_TAG" ]; then echo "No previous SemVer tag found. Starting with v0.1.0" - # Determine initial version based on bump type (optional, v0.1.0 is often fine) case "${{ github.event.inputs.bump_type }}" in patch|minor) NEXT_VERSION="v0.1.0" @@ -58,22 +49,18 @@ jobs: major) NEXT_VERSION="v1.0.0" ;; - *) # Should not happen due to 'choice' input, but good practice + *) echo "Invalid bump type: ${{ github.event.inputs.bump_type }}" exit 1 ;; esac else echo "Latest tag found: $LATEST_TAG" - # Remove 'v' prefix for calculation VERSION=${LATEST_TAG#v} - - # Split into parts MAJOR=$(echo $VERSION | cut -d. -f1) MINOR=$(echo $VERSION | cut -d. -f2) PATCH=$(echo $VERSION | cut -d. -f3) - # Bump version based on input case "${{ github.event.inputs.bump_type }}" in patch) PATCH=$((PATCH + 1)) @@ -96,12 +83,10 @@ jobs: fi echo "Calculated next version: $NEXT_VERSION" - # Set output for subsequent steps echo "next_version=$NEXT_VERSION" >> $GITHUB_OUTPUT - name: Create and Push Tag run: | - # Configure Git user identity for annotated tag (FIX) git config --global user.name 'github-actions[bot]' git config --global user.email 'github-actions[bot]@users.noreply.github.com' @@ -109,74 +94,23 @@ jobs: COMMIT_SHA=$(git rev-parse HEAD) echo "Tagging commit $COMMIT_SHA with $NEXT_TAG" - # Create an annotated tag (recommended) - this requires user.name/email git tag -a "$NEXT_TAG" -m "Release $NEXT_TAG" - - # Push the tag to the remote repository echo "Pushing tag $NEXT_TAG to origin" git push origin "$NEXT_TAG" - name: Verify Tag Push run: | echo "Checking if tag ${{ steps.tag_version.outputs.next_version }} exists remotely..." - # Give remote a second to update sleep 5 git ls-remote --tags origin | grep "refs/tags/${{ steps.tag_version.outputs.next_version }}" || (echo "Tag push verification failed!" && exit 1) echo "Tag successfully pushed." - - # build_and_push_backend_image: - # runs-on: ubuntu-latest - # needs: tag_release # Depends on the tag being created successfully - # permissions: - # packages: write # Need permission to write to GHCR - # contents: read # Need permission to read repo contents (checkout) - # steps: - # - name: Checkout code - # uses: actions/checkout@v4 - - # - name: Login to GitHub Container Registry - # uses: docker/login-action@v3 - # with: - # registry: ghcr.io - # username: ${{ github.repository_owner }} - # password: ${{ secrets.GITHUB_TOKEN }} - - # - name: Set up QEMU - # uses: docker/setup-qemu-action@v3 - - # - name: Set up Docker Buildx - # uses: docker/setup-buildx-action@v3 - - # - name: Extract metadata (tags, labels) for Docker build - # id: meta - # uses: docker/metadata-action@v5 - # with: - # images: ghcr.io/${{ github.repository_owner }}/surfsense_backend - # tags: | - # # Use the tag generated in the previous job - # type=raw,value=${{ needs.tag_release.outputs.new_tag }} - # # Optionally add 'latest' tag if building from the default branch - # type=raw,value=latest,enable=${{ github.ref == format('refs/heads/{0}', github.event.repository.default_branch) || github.event.inputs.branch == github.event.repository.default_branch }} - - # - name: Build and push surfsense backend - # uses: docker/build-push-action@v5 - # with: - # context: ./surfsense_backend - # push: true - # tags: ${{ steps.meta.outputs.tags }} - # labels: ${{ steps.meta.outputs.labels }} - # platforms: linux/amd64,linux/arm64 - # # Optional: Add build cache for faster builds - # cache-from: type=gha - # cache-to: type=gha,mode=max - - build_and_push_ui_image: + build_and_push: runs-on: ubuntu-latest - needs: tag_release # Depends on the tag being created successfully + needs: tag_release permissions: - packages: write # Need permission to write to GHCR - contents: read # Need permission to read repo contents (checkout) + packages: write + contents: read steps: - name: Checkout code @@ -195,25 +129,23 @@ jobs: - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 - - name: Extract metadata (tags, labels) for Docker build + - name: Extract metadata for Docker id: meta uses: docker/metadata-action@v5 with: - images: ghcr.io/${{ github.repository_owner }}/surfsense_ui + images: ghcr.io/${{ github.repository_owner }}/surfsense tags: | - # Use the tag generated in the previous job type=raw,value=${{ needs.tag_release.outputs.new_tag }} - # Optionally add 'latest' tag if building from the default branch type=raw,value=latest,enable=${{ github.ref == format('refs/heads/{0}', github.event.repository.default_branch) || github.event.inputs.branch == github.event.repository.default_branch }} - - name: Build and push surfsense UI image + - name: Build and push SurfSense image uses: docker/build-push-action@v5 with: - context: ./surfsense_web + context: . + file: ./Dockerfile.allinone push: true tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} platforms: linux/amd64,linux/arm64 - # Optional: Add build cache for faster builds cache-from: type=gha cache-to: type=gha,mode=max diff --git a/Dockerfile.allinone b/Dockerfile.allinone new file mode 100644 index 000000000..2fe62a86b --- /dev/null +++ b/Dockerfile.allinone @@ -0,0 +1,180 @@ +# SurfSense All-in-One Docker Image +# This image bundles PostgreSQL+pgvector, Redis, Backend, and Frontend +# Usage: docker run -d -p 3000:3000 -v surfsense-data:/data --name surfsense ghcr.io/modsetter/surfsense:latest + +FROM ubuntu:22.04 AS base + +# Prevent interactive prompts during package installation +ENV DEBIAN_FRONTEND=noninteractive + +# Install system dependencies +RUN apt-get update && apt-get install -y --no-install-recommends \ + # PostgreSQL dependencies + postgresql-14 \ + postgresql-contrib-14 \ + # Build tools for pgvector + build-essential \ + postgresql-server-dev-14 \ + git \ + # Redis + redis-server \ + # Python + python3.11 \ + python3.11-venv \ + python3.11-dev \ + python3-pip \ + # Node.js + curl \ + ca-certificates \ + gnupg \ + # Supervisor for process management + supervisor \ + # Additional dependencies for backend + gcc \ + wget \ + unzip \ + espeak-ng \ + libsndfile1 \ + libgl1 \ + libglib2.0-0 \ + libsm6 \ + libxext6 \ + libxrender1 \ + dos2unix \ + && rm -rf /var/lib/apt/lists/* + +# Install Node.js 20.x +RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash - \ + && apt-get install -y nodejs \ + && npm install -g pnpm \ + && rm -rf /var/lib/apt/lists/* + +# Build and install pgvector +RUN cd /tmp \ + && git clone --branch v0.7.4 https://github.com/pgvector/pgvector.git \ + && cd pgvector \ + && make \ + && make install \ + && rm -rf /tmp/pgvector + +# Set Python 3.11 as default +RUN update-alternatives --install /usr/bin/python python /usr/bin/python3.11 1 \ + && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.11 1 + +# Update certificates and install SSL tools +RUN update-ca-certificates + +# Create data directories +RUN mkdir -p /data/postgres /data/redis /data/surfsense \ + && chown -R postgres:postgres /data/postgres + +# ==================== +# Build Frontend +# ==================== +WORKDIR /app/frontend + +# Copy frontend source +COPY surfsense_web/package.json surfsense_web/pnpm-lock.yaml* ./ +COPY surfsense_web/source.config.ts ./ +COPY surfsense_web/content ./content + +# Install frontend dependencies +RUN pnpm install --frozen-lockfile + +# Copy rest of frontend +COPY surfsense_web/ ./ + +# Build frontend with default values (can be overridden at runtime via reverse proxy) +ARG NEXT_PUBLIC_FASTAPI_BACKEND_URL=http://localhost:8000 +ARG NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE=LOCAL +ARG NEXT_PUBLIC_ETL_SERVICE=DOCLING + +ENV NEXT_PUBLIC_FASTAPI_BACKEND_URL=$NEXT_PUBLIC_FASTAPI_BACKEND_URL +ENV NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE=$NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE +ENV NEXT_PUBLIC_ETL_SERVICE=$NEXT_PUBLIC_ETL_SERVICE + +RUN pnpm run build + +# ==================== +# Setup Backend +# ==================== +WORKDIR /app/backend + +# Copy backend source +COPY surfsense_backend/pyproject.toml surfsense_backend/uv.lock ./ + +# Install PyTorch based on architecture +RUN if [ "$(uname -m)" = "x86_64" ]; then \ + pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu; \ + else \ + pip install --no-cache-dir torch torchvision torchaudio; \ + fi + +# Install python dependencies +RUN pip install --no-cache-dir certifi pip-system-certs uv \ + && uv pip install --system --no-cache-dir -e . + +# Set SSL environment variables +RUN CERTIFI_PATH=$(python -c "import certifi; print(certifi.where())") \ + && echo "export SSL_CERT_FILE=$CERTIFI_PATH" >> /etc/profile.d/ssl.sh \ + && echo "export REQUESTS_CA_BUNDLE=$CERTIFI_PATH" >> /etc/profile.d/ssl.sh + +# Pre-download EasyOCR models +RUN mkdir -p /root/.EasyOCR/model \ + && wget --no-check-certificate https://github.com/JaidedAI/EasyOCR/releases/download/v1.3/english_g2.zip -O /root/.EasyOCR/model/english_g2.zip || true \ + && wget --no-check-certificate https://github.com/JaidedAI/EasyOCR/releases/download/pre-v1.1.6/craft_mlt_25k.zip -O /root/.EasyOCR/model/craft_mlt_25k.zip || true \ + && cd /root/.EasyOCR/model && (unzip -o english_g2.zip || true) && (unzip -o craft_mlt_25k.zip || true) + +# Pre-download Docling models +RUN python -c "try:\n from docling.document_converter import DocumentConverter\n conv = DocumentConverter()\nexcept:\n pass" || true + +# Install Playwright browsers +RUN pip install playwright && playwright install chromium + +# Copy backend source +COPY surfsense_backend/ ./ + +# ==================== +# Configuration +# ==================== +WORKDIR /app + +# Copy supervisor configuration +COPY scripts/docker/supervisor-allinone.conf /etc/supervisor/conf.d/surfsense.conf + +# Copy entrypoint script +COPY scripts/docker/entrypoint-allinone.sh /app/entrypoint.sh +RUN chmod +x /app/entrypoint.sh + +# PostgreSQL initialization script +COPY scripts/docker/init-postgres.sh /app/init-postgres.sh +RUN chmod +x /app/init-postgres.sh + +# Environment variables with defaults +ENV POSTGRES_USER=surfsense +ENV POSTGRES_PASSWORD=surfsense +ENV POSTGRES_DB=surfsense +ENV DATABASE_URL=postgresql+asyncpg://surfsense:surfsense@localhost:5432/surfsense +ENV CELERY_BROKER_URL=redis://localhost:6379/0 +ENV CELERY_RESULT_BACKEND=redis://localhost:6379/0 +ENV PYTHONPATH=/app/backend +ENV NEXT_FRONTEND_URL=http://localhost:3000 +ENV AUTH_TYPE=LOCAL +ENV ETL_SERVICE=DOCLING +ENV EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2 + +# Data volume +VOLUME ["/data"] + +# Expose ports +# 3000 - Frontend +# 8000 - Backend API +EXPOSE 3000 8000 + +# Health check +HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \ + CMD curl -f http://localhost:3000 && curl -f http://localhost:8000/docs || exit 1 + +# Run entrypoint +CMD ["/app/entrypoint.sh"] + diff --git a/README.md b/README.md index f97a5813f..b717cacbe 100644 --- a/README.md +++ b/README.md @@ -150,32 +150,84 @@ Check out our public roadmap and contribute your ideas or feedback: ## How to get started? +### Quick Start with Docker 🐳 + +> [!TIP] +> For production deployments, use the full [Docker Compose setup](https://www.surfsense.net/docs/docker-installation) which offers more control and scalability. + +**Quick Start :** + +```bash +docker run -d -p 3000:3000 -p 8000:8000 \ + -v surfsense-data:/data \ + -e SECRET_KEY=$(openssl rand -hex 32) \ + --name surfsense \ + --restart unless-stopped \ + ghcr.io/modsetter/surfsense:latest +``` + +**With Custom Embedding Model (e.g., OpenAI):** + +```bash +docker run -d -p 3000:3000 -p 8000:8000 \ + -v surfsense-data:/data \ + -e SECRET_KEY=$(openssl rand -hex 32) \ + -e EMBEDDING_MODEL=openai://text-embedding-ada-002 \ + -e OPENAI_API_KEY=your_openai_api_key \ + --name surfsense \ + --restart unless-stopped \ + ghcr.io/modsetter/surfsense:latest +``` + +**Using Docker Compose (Recommended for easier management):** + +```bash +# Download the quick start compose file +curl -o docker-compose.yml https://raw.githubusercontent.com/MODSetter/SurfSense/main/docker-compose.quickstart.yml + +# Create .env file with your secret key +echo "SECRET_KEY=$(openssl rand -hex 32)" > .env + +# Start SurfSense +docker compose up -d +``` + +After starting, access SurfSense at: +- **Frontend**: [http://localhost:3000](http://localhost:3000) +- **Backend API**: [http://localhost:8000](http://localhost:8000) +- **API Docs**: [http://localhost:8000/docs](http://localhost:8000/docs) + ### Installation Options -SurfSense provides three options to get started: +SurfSense provides multiple options to get started: 1. **[SurfSense Cloud](https://www.surfsense.com/login)** - The easiest way to try SurfSense without any setup. - No installation required - Instant access to all features - Perfect for getting started quickly -2. **[Docker Installation (Recommended for Self-Hosting)](https://www.surfsense.net/docs/docker-installation)** - Easy way to get SurfSense up and running with all dependencies containerized. +2. **Quick Start Docker (Above)** - Single command to get SurfSense running locally. + - All-in-one image with PostgreSQL, Redis, and all services bundled + - Perfect for evaluation, development, and small deployments + - Data persisted via Docker volume + +3. **[Docker Compose (Production)](https://www.surfsense.net/docs/docker-installation)** - Full stack deployment with separate services. - Includes pgAdmin for database management through a web UI - Supports environment variable customization via `.env` file - Flexible deployment options (full stack or core services only) - - No need to manually edit configuration files between environments + - Better for production with separate scaling of services -3. **[Manual Installation](https://www.surfsense.net/docs/manual-installation)** - For users who prefer more control over their setup or need to customize their deployment. +4. **[Manual Installation](https://www.surfsense.net/docs/manual-installation)** - For users who prefer more control over their setup or need to customize their deployment. Docker and manual installation guides include detailed OS-specific instructions for Windows, macOS, and Linux. Before self-hosting installation, make sure to complete the [prerequisite setup steps](https://www.surfsense.net/docs/) including: -- Auth setup -- **File Processing ETL Service** (choose one): +- Auth setup (optional - defaults to LOCAL auth) +- **File Processing ETL Service** (optional - defaults to Docling): + - Docling (default, local processing, no API key required, supports PDF, Office docs, images, HTML, CSV) - Unstructured.io API key (supports 34+ formats) - LlamaIndex API key (enhanced parsing, supports 50+ formats) - - Docling (local processing, no API key required, supports PDF, Office docs, images, HTML, CSV) -- Other required API keys +- Other API keys as needed for your use case ## Screenshots diff --git a/docker-compose.quickstart.yml b/docker-compose.quickstart.yml new file mode 100644 index 000000000..012388335 --- /dev/null +++ b/docker-compose.quickstart.yml @@ -0,0 +1,82 @@ +# SurfSense Quick Start Docker Compose +# +# This is a simplified docker-compose for quick local deployment using pre-built images. +# For production or customized deployments, use the main docker-compose.yml +# +# Usage: +# 1. Create a .env file with your required configuration (see below) +# 2. Run: docker compose -f docker-compose.quickstart.yml up -d +# 3. Access SurfSense at http://localhost:3000 +# +# Required Environment Variables: +# - SECRET_KEY: JWT secret key (generate with: openssl rand -hex 32) +# +# Optional Environment Variables: +# - EMBEDDING_MODEL: Embedding model to use (default: sentence-transformers/all-MiniLM-L6-v2) +# - ETL_SERVICE: Document parsing service - DOCLING, UNSTRUCTURED, or LLAMACLOUD (default: DOCLING) +# - TTS_SERVICE: Text-to-speech service for podcasts (default: local/kokoro) +# - STT_SERVICE: Speech-to-text service (default: local/base) +# - FIRECRAWL_API_KEY: For web crawling features + +version: "3.8" + +services: + # All-in-one SurfSense container + surfsense: + image: ghcr.io/modsetter/surfsense:latest + container_name: surfsense + ports: + - "${FRONTEND_PORT:-3000}:3000" + - "${BACKEND_PORT:-8000}:8000" + volumes: + - surfsense-data:/data + environment: + # Required + - SECRET_KEY=${SECRET_KEY:-change-me-in-production} + + # Auth Configuration + - AUTH_TYPE=${AUTH_TYPE:-LOCAL} + - GOOGLE_OAUTH_CLIENT_ID=${GOOGLE_OAUTH_CLIENT_ID:-} + - GOOGLE_OAUTH_CLIENT_SECRET=${GOOGLE_OAUTH_CLIENT_SECRET:-} + + # AI/ML Configuration + - EMBEDDING_MODEL=${EMBEDDING_MODEL:-sentence-transformers/all-MiniLM-L6-v2} + - RERANKERS_ENABLED=${RERANKERS_ENABLED:-FALSE} + - RERANKERS_MODEL_NAME=${RERANKERS_MODEL_NAME:-} + - RERANKERS_MODEL_TYPE=${RERANKERS_MODEL_TYPE:-} + + # Document Processing + - ETL_SERVICE=${ETL_SERVICE:-DOCLING} + - UNSTRUCTURED_API_KEY=${UNSTRUCTURED_API_KEY:-} + - LLAMA_CLOUD_API_KEY=${LLAMA_CLOUD_API_KEY:-} + + # Audio Services + - TTS_SERVICE=${TTS_SERVICE:-local/kokoro} + - TTS_SERVICE_API_KEY=${TTS_SERVICE_API_KEY:-} + - STT_SERVICE=${STT_SERVICE:-local/base} + - STT_SERVICE_API_KEY=${STT_SERVICE_API_KEY:-} + + # Web Crawling + - FIRECRAWL_API_KEY=${FIRECRAWL_API_KEY:-} + + # Optional Features + - REGISTRATION_ENABLED=${REGISTRATION_ENABLED:-TRUE} + - SCHEDULE_CHECKER_INTERVAL=${SCHEDULE_CHECKER_INTERVAL:-1m} + + # LangSmith Observability (optional) + - LANGSMITH_TRACING=${LANGSMITH_TRACING:-false} + - LANGSMITH_ENDPOINT=${LANGSMITH_ENDPOINT:-} + - LANGSMITH_API_KEY=${LANGSMITH_API_KEY:-} + - LANGSMITH_PROJECT=${LANGSMITH_PROJECT:-} + restart: unless-stopped + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:3000", "&&", "curl", "-f", "http://localhost:8000/docs"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 120s + +volumes: + surfsense-data: + name: surfsense-data + diff --git a/scripts/docker/entrypoint-allinone.sh b/scripts/docker/entrypoint-allinone.sh new file mode 100644 index 000000000..0df2555c2 --- /dev/null +++ b/scripts/docker/entrypoint-allinone.sh @@ -0,0 +1,115 @@ +#!/bin/bash +set -e + +echo "===========================================" +echo " 🏄 SurfSense All-in-One Container" +echo "===========================================" + +# Create log directory +mkdir -p /var/log/supervisor + +# ================================================ +# Initialize PostgreSQL if needed +# ================================================ +if [ ! -f /data/postgres/PG_VERSION ]; then + echo "📦 Initializing PostgreSQL database..." + + # Initialize PostgreSQL data directory + chown -R postgres:postgres /data/postgres + chmod 700 /data/postgres + + su - postgres -c "/usr/lib/postgresql/14/bin/initdb -D /data/postgres" + + # Configure PostgreSQL for connections + echo "host all all 0.0.0.0/0 md5" >> /data/postgres/pg_hba.conf + echo "local all all trust" >> /data/postgres/pg_hba.conf + echo "listen_addresses='*'" >> /data/postgres/postgresql.conf + + # Start PostgreSQL temporarily to create database and user + su - postgres -c "/usr/lib/postgresql/14/bin/pg_ctl -D /data/postgres -l /tmp/postgres_init.log start" + + # Wait for PostgreSQL to be ready + sleep 5 + + # Create user and database + su - postgres -c "psql -c \"CREATE USER ${POSTGRES_USER:-surfsense} WITH PASSWORD '${POSTGRES_PASSWORD:-surfsense}' SUPERUSER;\"" + su - postgres -c "psql -c \"CREATE DATABASE ${POSTGRES_DB:-surfsense} OWNER ${POSTGRES_USER:-surfsense};\"" + + # Enable pgvector extension + su - postgres -c "psql -d ${POSTGRES_DB:-surfsense} -c 'CREATE EXTENSION IF NOT EXISTS vector;'" + + # Stop temporary PostgreSQL + su - postgres -c "/usr/lib/postgresql/14/bin/pg_ctl -D /data/postgres stop" + + echo "✅ PostgreSQL initialized successfully" +else + echo "✅ PostgreSQL data directory already exists" +fi + +# ================================================ +# Initialize Redis data directory +# ================================================ +mkdir -p /data/redis +chmod 755 /data/redis +echo "✅ Redis data directory ready" + +# ================================================ +# Copy frontend build to runtime location +# ================================================ +if [ -d /app/frontend/.next/standalone ]; then + cp -r /app/frontend/.next/standalone/* /app/frontend/ 2>/dev/null || true + cp -r /app/frontend/.next/static /app/frontend/.next/static 2>/dev/null || true +fi + +# ================================================ +# Run database migrations +# ================================================ +run_migrations() { + echo "🔄 Running database migrations..." + + # Start PostgreSQL temporarily for migrations + su - postgres -c "/usr/lib/postgresql/14/bin/pg_ctl -D /data/postgres -l /tmp/postgres_migrate.log start" + sleep 5 + + # Start Redis temporarily for migrations (some might need it) + redis-server --dir /data/redis --daemonize yes + sleep 2 + + # Run alembic migrations + cd /app/backend + alembic upgrade head || echo "⚠️ Migrations may have already been applied" + + # Stop temporary services + redis-cli shutdown || true + su - postgres -c "/usr/lib/postgresql/14/bin/pg_ctl -D /data/postgres stop" + + echo "✅ Database migrations complete" +} + +# Run migrations on first start or when explicitly requested +if [ ! -f /data/.migrations_run ] || [ "${FORCE_MIGRATIONS:-false}" = "true" ]; then + run_migrations + touch /data/.migrations_run +fi + +# ================================================ +# Environment Variables Info +# ================================================ +echo "" +echo "===========================================" +echo " 📋 Configuration" +echo "===========================================" +echo " Frontend URL: http://localhost:3000" +echo " Backend API: http://localhost:8000" +echo " API Docs: http://localhost:8000/docs" +echo " Auth Type: ${AUTH_TYPE:-LOCAL}" +echo " ETL Service: ${ETL_SERVICE:-DOCLING}" +echo "===========================================" +echo "" + +# ================================================ +# Start Supervisor (manages all services) +# ================================================ +echo "🚀 Starting all services..." +exec /usr/bin/supervisord -c /etc/supervisor/conf.d/surfsense.conf + diff --git a/scripts/docker/init-postgres.sh b/scripts/docker/init-postgres.sh new file mode 100644 index 000000000..3d2a15f46 --- /dev/null +++ b/scripts/docker/init-postgres.sh @@ -0,0 +1,54 @@ +#!/bin/bash +# PostgreSQL initialization script for SurfSense +# This script is called during container startup if the database needs initialization + +set -e + +PGDATA=${PGDATA:-/data/postgres} +POSTGRES_USER=${POSTGRES_USER:-surfsense} +POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-surfsense} +POSTGRES_DB=${POSTGRES_DB:-surfsense} + +echo "Initializing PostgreSQL..." + +# Check if PostgreSQL is already initialized +if [ -f "$PGDATA/PG_VERSION" ]; then + echo "PostgreSQL data directory already exists. Skipping initialization." + exit 0 +fi + +# Initialize the database cluster +/usr/lib/postgresql/14/bin/initdb -D "$PGDATA" --username=postgres + +# Configure PostgreSQL +cat >> "$PGDATA/postgresql.conf" << EOF +listen_addresses = '*' +max_connections = 100 +shared_buffers = 128MB +EOF + +cat >> "$PGDATA/pg_hba.conf" << EOF +# Allow connections from anywhere with password +host all all 0.0.0.0/0 md5 +host all all ::0/0 md5 +EOF + +# Start PostgreSQL temporarily +/usr/lib/postgresql/14/bin/pg_ctl -D "$PGDATA" -l /tmp/postgres_init.log start + +# Wait for PostgreSQL to start +sleep 3 + +# Create user and database +psql -U postgres << EOF +CREATE USER $POSTGRES_USER WITH PASSWORD '$POSTGRES_PASSWORD' SUPERUSER; +CREATE DATABASE $POSTGRES_DB OWNER $POSTGRES_USER; +\c $POSTGRES_DB +CREATE EXTENSION IF NOT EXISTS vector; +EOF + +echo "PostgreSQL initialized successfully." + +# Stop PostgreSQL (supervisor will start it) +/usr/lib/postgresql/14/bin/pg_ctl -D "$PGDATA" stop + diff --git a/scripts/docker/supervisor-allinone.conf b/scripts/docker/supervisor-allinone.conf new file mode 100644 index 000000000..15685592a --- /dev/null +++ b/scripts/docker/supervisor-allinone.conf @@ -0,0 +1,94 @@ +[supervisord] +nodaemon=true +logfile=/var/log/supervisor/supervisord.log +pidfile=/var/run/supervisord.pid +childlogdir=/var/log/supervisor +user=root + +[unix_http_server] +file=/var/run/supervisor.sock +chmod=0700 + +[rpcinterface:supervisor] +supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface + +[supervisorctl] +serverurl=unix:///var/run/supervisor.sock + +# PostgreSQL +[program:postgresql] +command=/usr/lib/postgresql/14/bin/postgres -D /data/postgres +user=postgres +autostart=true +autorestart=true +priority=10 +stdout_logfile=/var/log/supervisor/postgresql.log +stderr_logfile=/var/log/supervisor/postgresql-error.log +environment=PGDATA="/data/postgres" + +# Redis +[program:redis] +command=/usr/bin/redis-server --dir /data/redis --appendonly yes +autostart=true +autorestart=true +priority=20 +stdout_logfile=/var/log/supervisor/redis.log +stderr_logfile=/var/log/supervisor/redis-error.log + +# Backend API +[program:backend] +command=python main.py +directory=/app/backend +autostart=true +autorestart=true +priority=30 +startsecs=10 +startretries=3 +stdout_logfile=/var/log/supervisor/backend.log +stderr_logfile=/var/log/supervisor/backend-error.log +environment=PYTHONPATH="/app/backend",UVICORN_LOOP="asyncio",UNSTRUCTURED_HAS_PATCHED_LOOP="1" + +# Celery Worker +[program:celery-worker] +command=celery -A app.celery_app worker --loglevel=info --concurrency=2 --pool=solo +directory=/app/backend +autostart=true +autorestart=true +priority=40 +startsecs=15 +startretries=3 +stdout_logfile=/var/log/supervisor/celery-worker.log +stderr_logfile=/var/log/supervisor/celery-worker-error.log +environment=PYTHONPATH="/app/backend" + +# Celery Beat (scheduler) +[program:celery-beat] +command=celery -A app.celery_app beat --loglevel=info +directory=/app/backend +autostart=true +autorestart=true +priority=50 +startsecs=20 +startretries=3 +stdout_logfile=/var/log/supervisor/celery-beat.log +stderr_logfile=/var/log/supervisor/celery-beat-error.log +environment=PYTHONPATH="/app/backend" + +# Frontend +[program:frontend] +command=node server.js +directory=/app/frontend +autostart=true +autorestart=true +priority=60 +startsecs=5 +startretries=3 +stdout_logfile=/var/log/supervisor/frontend.log +stderr_logfile=/var/log/supervisor/frontend-error.log +environment=NODE_ENV="production",PORT="3000",HOSTNAME="0.0.0.0" + +# Process Groups +[group:surfsense] +programs=postgresql,redis,backend,celery-worker,celery-beat,frontend +priority=999 + diff --git a/surfsense_web/content/docs/docker-installation.mdx b/surfsense_web/content/docs/docker-installation.mdx index 46ef4128b..e4ae03e92 100644 --- a/surfsense_web/content/docs/docker-installation.mdx +++ b/surfsense_web/content/docs/docker-installation.mdx @@ -8,7 +8,135 @@ full: true # Docker Installation -This guide explains how to run SurfSense using Docker Compose, which is the preferred and recommended method for deployment. +This guide explains how to run SurfSense using Docker, with options ranging from quick single-command deployment to full production setups. + +## Quick Start with Docker 🐳 + +Get SurfSense running in seconds with a single command: + + +The all-in-one Docker image bundles PostgreSQL (with pgvector), Redis, and all SurfSense services. Perfect for quick evaluation and development. + + + +Make sure to include the `-v surfsense-data:/data` in your Docker command. This ensures your database and files are properly persisted. + + +### One-Line Installation + +**Linux/macOS:** + +```bash +docker run -d -p 3000:3000 -p 8000:8000 \ + -v surfsense-data:/data \ + -e SECRET_KEY=$(openssl rand -hex 32) \ + --name surfsense \ + --restart unless-stopped \ + ghcr.io/modsetter/surfsense:latest +``` + +**Windows (PowerShell):** + +```powershell +$secretKey = -join ((48..57) + (65..90) + (97..122) | Get-Random -Count 32 | ForEach-Object {[char]$_}) +docker run -d -p 3000:3000 -p 8000:8000 ` + -v surfsense-data:/data ` + -e SECRET_KEY=$secretKey ` + --name surfsense ` + --restart unless-stopped ` + ghcr.io/modsetter/surfsense:latest +``` + +### With Custom Configuration + +**Using OpenAI Embeddings:** + +```bash +docker run -d -p 3000:3000 -p 8000:8000 \ + -v surfsense-data:/data \ + -e SECRET_KEY=$(openssl rand -hex 32) \ + -e EMBEDDING_MODEL=openai://text-embedding-ada-002 \ + -e OPENAI_API_KEY=your_openai_api_key \ + --name surfsense \ + --restart unless-stopped \ + ghcr.io/modsetter/surfsense:latest +``` + +**With Google OAuth:** + +```bash +docker run -d -p 3000:3000 -p 8000:8000 \ + -v surfsense-data:/data \ + -e SECRET_KEY=$(openssl rand -hex 32) \ + -e AUTH_TYPE=GOOGLE \ + -e GOOGLE_OAUTH_CLIENT_ID=your_client_id \ + -e GOOGLE_OAUTH_CLIENT_SECRET=your_client_secret \ + --name surfsense \ + --restart unless-stopped \ + ghcr.io/modsetter/surfsense:latest +``` + +### Quick Start with Docker Compose + +For easier management with environment files: + +```bash +# Download the quick start compose file +curl -o docker-compose.yml https://raw.githubusercontent.com/MODSetter/SurfSense/main/docker-compose.quickstart.yml + +# Create .env file +cat > .env << EOF +SECRET_KEY=$(openssl rand -hex 32) +# Add other configuration as needed +# EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2 +# ETL_SERVICE=DOCLING +EOF + +# Start SurfSense +docker compose up -d +``` + +After starting, access SurfSense at: +- **Frontend**: [http://localhost:3000](http://localhost:3000) +- **Backend API**: [http://localhost:8000](http://localhost:8000) +- **API Docs**: [http://localhost:8000/docs](http://localhost:8000/docs) + +### Quick Start Environment Variables + +| Variable | Description | Default | +|----------|-------------|---------| +| SECRET_KEY | JWT secret key (required) | - | +| AUTH_TYPE | Authentication: `LOCAL` or `GOOGLE` | LOCAL | +| EMBEDDING_MODEL | Model for embeddings | sentence-transformers/all-MiniLM-L6-v2 | +| ETL_SERVICE | Document parser: `DOCLING`, `UNSTRUCTURED`, `LLAMACLOUD` | DOCLING | +| TTS_SERVICE | Text-to-speech for podcasts | local/kokoro | +| STT_SERVICE | Speech-to-text for audio | local/base | +| REGISTRATION_ENABLED | Allow new user registration | TRUE | + +### Useful Commands + +```bash +# View logs +docker logs -f surfsense + +# Stop SurfSense +docker stop surfsense + +# Start SurfSense +docker start surfsense + +# Remove container (data preserved in volume) +docker rm surfsense + +# Remove container AND data +docker rm surfsense && docker volume rm surfsense-data +``` + +--- + +## Full Docker Compose Setup (Production) + +For production deployments with separate services and more control, use the full Docker Compose setup below. ## Prerequisites From 32dd3732e3d7d078674e3e41a0ce93bbeb863fe5 Mon Sep 17 00:00:00 2001 From: "DESKTOP-RTLN3BA\\$punk" Date: Mon, 8 Dec 2025 21:07:05 -0800 Subject: [PATCH 02/10] fix: exclude only backend lib folders in dockerignore --- .dockerignore | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.dockerignore b/.dockerignore index 207ef0d4b..ad6805174 100644 --- a/.dockerignore +++ b/.dockerignore @@ -37,8 +37,9 @@ LICENSE **/downloads/ **/eggs/ **/.eggs/ -**/lib/ -**/lib64/ +# Python venv lib folders (but not frontend lib folders) +surfsense_backend/lib/ +surfsense_backend/lib64/ **/parts/ **/sdist/ **/var/ From 0b4efbbe9f0e58ae24e89b545b7f54525123ae4a Mon Sep 17 00:00:00 2001 From: "DESKTOP-RTLN3BA\\$punk" Date: Mon, 8 Dec 2025 21:44:27 -0800 Subject: [PATCH 03/10] refactor: update Dockerfile to install Python 3.12 --- Dockerfile.allinone | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/Dockerfile.allinone b/Dockerfile.allinone index 2fe62a86b..7061ee4b0 100644 --- a/Dockerfile.allinone +++ b/Dockerfile.allinone @@ -7,7 +7,7 @@ FROM ubuntu:22.04 AS base # Prevent interactive prompts during package installation ENV DEBIAN_FRONTEND=noninteractive -# Install system dependencies +# Install system dependencies (first batch without Python) RUN apt-get update && apt-get install -y --no-install-recommends \ # PostgreSQL dependencies postgresql-14 \ @@ -18,12 +18,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ git \ # Redis redis-server \ - # Python - python3.11 \ - python3.11-venv \ - python3.11-dev \ - python3-pip \ - # Node.js + # Node.js prerequisites curl \ ca-certificates \ gnupg \ @@ -41,6 +36,18 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ libxext6 \ libxrender1 \ dos2unix \ + # For adding PPAs + software-properties-common \ + && rm -rf /var/lib/apt/lists/* + +# Install Python 3.12 from deadsnakes PPA (required by backend) +RUN add-apt-repository ppa:deadsnakes/ppa -y \ + && apt-get update \ + && apt-get install -y --no-install-recommends \ + python3.12 \ + python3.12-venv \ + python3.12-dev \ + python3-pip \ && rm -rf /var/lib/apt/lists/* # Install Node.js 20.x @@ -57,9 +64,12 @@ RUN cd /tmp \ && make install \ && rm -rf /tmp/pgvector -# Set Python 3.11 as default -RUN update-alternatives --install /usr/bin/python python /usr/bin/python3.11 1 \ - && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.11 1 +# Set Python 3.12 as default +RUN update-alternatives --install /usr/bin/python python /usr/bin/python3.12 1 \ + && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.12 1 + +# Ensure pip uses Python 3.12 +RUN python3.12 -m pip install --upgrade pip # Update certificates and install SSL tools RUN update-ca-certificates From 50409e5fc43aa66cfa535443842d331d529d5951 Mon Sep 17 00:00:00 2001 From: "DESKTOP-RTLN3BA\\$punk" Date: Mon, 8 Dec 2025 21:50:20 -0800 Subject: [PATCH 04/10] fix: update Dockerfile to install pip for Python 3.12 using ensurepip --- Dockerfile.allinone | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Dockerfile.allinone b/Dockerfile.allinone index 7061ee4b0..d4e6c02e6 100644 --- a/Dockerfile.allinone +++ b/Dockerfile.allinone @@ -68,8 +68,9 @@ RUN cd /tmp \ RUN update-alternatives --install /usr/bin/python python /usr/bin/python3.12 1 \ && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.12 1 -# Ensure pip uses Python 3.12 -RUN python3.12 -m pip install --upgrade pip +# Install pip for Python 3.12 using ensurepip (distutils removed in 3.12) +RUN python3.12 -m ensurepip --upgrade \ + && python3.12 -m pip install --upgrade pip # Update certificates and install SSL tools RUN update-ca-certificates From 216a9188a93dce4a5ccb680297e6f7f105c04202 Mon Sep 17 00:00:00 2001 From: "DESKTOP-RTLN3BA\\$punk" Date: Mon, 8 Dec 2025 23:13:42 -0800 Subject: [PATCH 05/10] fix(try): reduce image size --- .github/workflows/docker_build.yaml | 10 +- Dockerfile.allinone | 161 ++++++++++++++++------------ 2 files changed, 104 insertions(+), 67 deletions(-) diff --git a/.github/workflows/docker_build.yaml b/.github/workflows/docker_build.yaml index 2f4d39808..e8916b47a 100644 --- a/.github/workflows/docker_build.yaml +++ b/.github/workflows/docker_build.yaml @@ -138,6 +138,14 @@ jobs: type=raw,value=${{ needs.tag_release.outputs.new_tag }} type=raw,value=latest,enable=${{ github.ref == format('refs/heads/{0}', github.event.repository.default_branch) || github.event.inputs.branch == github.event.repository.default_branch }} + - name: Free up disk space + run: | + sudo rm -rf /usr/share/dotnet + sudo rm -rf /opt/ghc + sudo rm -rf /usr/local/share/boost + sudo rm -rf "$AGENT_TOOLSDIRECTORY" + docker system prune -af + - name: Build and push SurfSense image uses: docker/build-push-action@v5 with: @@ -146,6 +154,6 @@ jobs: push: true tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} - platforms: linux/amd64,linux/arm64 + platforms: linux/amd64 cache-from: type=gha cache-to: type=gha,mode=max diff --git a/Dockerfile.allinone b/Dockerfile.allinone index d4e6c02e6..8de82fd3b 100644 --- a/Dockerfile.allinone +++ b/Dockerfile.allinone @@ -2,14 +2,50 @@ # This image bundles PostgreSQL+pgvector, Redis, Backend, and Frontend # Usage: docker run -d -p 3000:3000 -v surfsense-data:/data --name surfsense ghcr.io/modsetter/surfsense:latest -FROM ubuntu:22.04 AS base +# ==================== +# Stage 1: Build Frontend +# ==================== +FROM node:20-alpine AS frontend-builder -# Prevent interactive prompts during package installation +WORKDIR /app + +# Install pnpm +RUN corepack enable pnpm + +# Copy package files +COPY surfsense_web/package.json surfsense_web/pnpm-lock.yaml* ./ +COPY surfsense_web/source.config.ts ./ +COPY surfsense_web/content ./content + +# Install dependencies +RUN pnpm install --frozen-lockfile + +# Copy source +COPY surfsense_web/ ./ + +# Build args for frontend +ARG NEXT_PUBLIC_FASTAPI_BACKEND_URL=http://localhost:8000 +ARG NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE=LOCAL +ARG NEXT_PUBLIC_ETL_SERVICE=DOCLING + +ENV NEXT_PUBLIC_FASTAPI_BACKEND_URL=$NEXT_PUBLIC_FASTAPI_BACKEND_URL +ENV NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE=$NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE +ENV NEXT_PUBLIC_ETL_SERVICE=$NEXT_PUBLIC_ETL_SERVICE + +# Build +RUN pnpm run build + +# ==================== +# Stage 2: Runtime Image +# ==================== +FROM ubuntu:22.04 AS runtime + +# Prevent interactive prompts ENV DEBIAN_FRONTEND=noninteractive -# Install system dependencies (first batch without Python) +# Install system dependencies RUN apt-get update && apt-get install -y --no-install-recommends \ - # PostgreSQL dependencies + # PostgreSQL postgresql-14 \ postgresql-contrib-14 \ # Build tools for pgvector @@ -22,9 +58,9 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ curl \ ca-certificates \ gnupg \ - # Supervisor for process management + # Supervisor supervisor \ - # Additional dependencies for backend + # Backend dependencies gcc \ wget \ unzip \ @@ -36,25 +72,45 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ libxext6 \ libxrender1 \ dos2unix \ - # For adding PPAs + # For PPAs software-properties-common \ + # Playwright dependencies + libnspr4 \ + libnss3 \ + libatk1.0-0 \ + libatk-bridge2.0-0 \ + libcups2 \ + libxkbcommon0 \ + libatspi2.0-0 \ + libxcomposite1 \ + libxdamage1 \ + libxrandr2 \ + libgbm1 \ + libcairo2 \ + libpango-1.0-0 \ && rm -rf /var/lib/apt/lists/* -# Install Python 3.12 from deadsnakes PPA (required by backend) +# Install Node.js 20.x (for running frontend) +RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash - \ + && apt-get install -y nodejs \ + && rm -rf /var/lib/apt/lists/* + +# Install Python 3.12 from deadsnakes PPA RUN add-apt-repository ppa:deadsnakes/ppa -y \ && apt-get update \ && apt-get install -y --no-install-recommends \ python3.12 \ python3.12-venv \ python3.12-dev \ - python3-pip \ && rm -rf /var/lib/apt/lists/* -# Install Node.js 20.x -RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash - \ - && apt-get install -y nodejs \ - && npm install -g pnpm \ - && rm -rf /var/lib/apt/lists/* +# Set Python 3.12 as default +RUN update-alternatives --install /usr/bin/python python /usr/bin/python3.12 1 \ + && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.12 1 + +# Install pip for Python 3.12 +RUN python3.12 -m ensurepip --upgrade \ + && python3.12 -m pip install --upgrade pip # Build and install pgvector RUN cd /tmp \ @@ -64,15 +120,7 @@ RUN cd /tmp \ && make install \ && rm -rf /tmp/pgvector -# Set Python 3.12 as default -RUN update-alternatives --install /usr/bin/python python /usr/bin/python3.12 1 \ - && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.12 1 - -# Install pip for Python 3.12 using ensurepip (distutils removed in 3.12) -RUN python3.12 -m ensurepip --upgrade \ - && python3.12 -m pip install --upgrade pip - -# Update certificates and install SSL tools +# Update certificates RUN update-ca-certificates # Create data directories @@ -80,46 +128,25 @@ RUN mkdir -p /data/postgres /data/redis /data/surfsense \ && chown -R postgres:postgres /data/postgres # ==================== -# Build Frontend +# Copy Frontend Build # ==================== WORKDIR /app/frontend -# Copy frontend source -COPY surfsense_web/package.json surfsense_web/pnpm-lock.yaml* ./ -COPY surfsense_web/source.config.ts ./ -COPY surfsense_web/content ./content - -# Install frontend dependencies -RUN pnpm install --frozen-lockfile - -# Copy rest of frontend -COPY surfsense_web/ ./ - -# Build frontend with default values (can be overridden at runtime via reverse proxy) -ARG NEXT_PUBLIC_FASTAPI_BACKEND_URL=http://localhost:8000 -ARG NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE=LOCAL -ARG NEXT_PUBLIC_ETL_SERVICE=DOCLING - -ENV NEXT_PUBLIC_FASTAPI_BACKEND_URL=$NEXT_PUBLIC_FASTAPI_BACKEND_URL -ENV NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE=$NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE -ENV NEXT_PUBLIC_ETL_SERVICE=$NEXT_PUBLIC_ETL_SERVICE - -RUN pnpm run build +# Copy only the standalone build (not node_modules) +COPY --from=frontend-builder /app/.next/standalone ./ +COPY --from=frontend-builder /app/.next/static ./.next/static +COPY --from=frontend-builder /app/public ./public # ==================== # Setup Backend # ==================== WORKDIR /app/backend -# Copy backend source +# Copy backend dependency files COPY surfsense_backend/pyproject.toml surfsense_backend/uv.lock ./ -# Install PyTorch based on architecture -RUN if [ "$(uname -m)" = "x86_64" ]; then \ - pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu; \ - else \ - pip install --no-cache-dir torch torchvision torchaudio; \ - fi +# Install PyTorch (CPU only to save space) +RUN pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu # Install python dependencies RUN pip install --no-cache-dir certifi pip-system-certs uv \ @@ -132,15 +159,15 @@ RUN CERTIFI_PATH=$(python -c "import certifi; print(certifi.where())") \ # Pre-download EasyOCR models RUN mkdir -p /root/.EasyOCR/model \ - && wget --no-check-certificate https://github.com/JaidedAI/EasyOCR/releases/download/v1.3/english_g2.zip -O /root/.EasyOCR/model/english_g2.zip || true \ - && wget --no-check-certificate https://github.com/JaidedAI/EasyOCR/releases/download/pre-v1.1.6/craft_mlt_25k.zip -O /root/.EasyOCR/model/craft_mlt_25k.zip || true \ - && cd /root/.EasyOCR/model && (unzip -o english_g2.zip || true) && (unzip -o craft_mlt_25k.zip || true) - -# Pre-download Docling models -RUN python -c "try:\n from docling.document_converter import DocumentConverter\n conv = DocumentConverter()\nexcept:\n pass" || true + && wget --no-check-certificate -q https://github.com/JaidedAI/EasyOCR/releases/download/v1.3/english_g2.zip -O /root/.EasyOCR/model/english_g2.zip || true \ + && wget --no-check-certificate -q https://github.com/JaidedAI/EasyOCR/releases/download/pre-v1.1.6/craft_mlt_25k.zip -O /root/.EasyOCR/model/craft_mlt_25k.zip || true \ + && cd /root/.EasyOCR/model && (unzip -o -q english_g2.zip || true) && (unzip -o -q craft_mlt_25k.zip || true) \ + && rm -f /root/.EasyOCR/model/*.zip # Install Playwright browsers -RUN pip install playwright && playwright install chromium +RUN pip install --no-cache-dir playwright \ + && playwright install chromium \ + && rm -rf /root/.cache/ms-playwright/ffmpeg* # Copy backend source COPY surfsense_backend/ ./ @@ -155,11 +182,16 @@ COPY scripts/docker/supervisor-allinone.conf /etc/supervisor/conf.d/surfsense.co # Copy entrypoint script COPY scripts/docker/entrypoint-allinone.sh /app/entrypoint.sh -RUN chmod +x /app/entrypoint.sh +RUN dos2unix /app/entrypoint.sh && chmod +x /app/entrypoint.sh # PostgreSQL initialization script COPY scripts/docker/init-postgres.sh /app/init-postgres.sh -RUN chmod +x /app/init-postgres.sh +RUN dos2unix /app/init-postgres.sh && chmod +x /app/init-postgres.sh + +# Clean up build dependencies to reduce image size +RUN apt-get purge -y build-essential postgresql-server-dev-14 git \ + && apt-get autoremove -y \ + && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* # Environment variables with defaults ENV POSTGRES_USER=surfsense @@ -178,14 +210,11 @@ ENV EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2 VOLUME ["/data"] # Expose ports -# 3000 - Frontend -# 8000 - Backend API EXPOSE 3000 8000 # Health check -HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \ - CMD curl -f http://localhost:3000 && curl -f http://localhost:8000/docs || exit 1 +HEALTHCHECK --interval=30s --timeout=10s --start-period=120s --retries=3 \ + CMD curl -f http://localhost:3000 || exit 1 # Run entrypoint CMD ["/app/entrypoint.sh"] - From 6b07fcb13155aec0c75937cc621f2c370a0d7d07 Mon Sep 17 00:00:00 2001 From: "DESKTOP-RTLN3BA\\$punk" Date: Mon, 8 Dec 2025 23:19:50 -0800 Subject: [PATCH 06/10] chore: update Dockerfile --- Dockerfile.allinone | 49 +++++++++++++++++++++++++++++++++------------ 1 file changed, 36 insertions(+), 13 deletions(-) diff --git a/Dockerfile.allinone b/Dockerfile.allinone index 8de82fd3b..c7a2505f6 100644 --- a/Dockerfile.allinone +++ b/Dockerfile.allinone @@ -1,6 +1,17 @@ # SurfSense All-in-One Docker Image # This image bundles PostgreSQL+pgvector, Redis, Backend, and Frontend -# Usage: docker run -d -p 3000:3000 -v surfsense-data:/data --name surfsense ghcr.io/modsetter/surfsense:latest +# Usage: docker run -d -p 3000:3000 -p 8000:8000 -v surfsense-data:/data --name surfsense ghcr.io/modsetter/surfsense:latest +# +# Included Services (all run locally by default): +# - PostgreSQL 14 + pgvector (vector database) +# - Redis (task queue) +# - Docling (document processing, CPU-only, OCR disabled) +# - Kokoro TTS (local text-to-speech for podcasts) +# - Faster-Whisper (local speech-to-text for audio files) +# - Playwright Chromium (web scraping) +# +# Note: This is the CPU-only version. A :cuda tagged image with GPU support +# will be available in the future for faster AI inference. # ==================== # Stage 1: Build Frontend @@ -64,17 +75,33 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ gcc \ wget \ unzip \ + dos2unix \ + # For PPAs + software-properties-common \ + # ============================ + # Local TTS (Kokoro) dependencies + # ============================ espeak-ng \ + libespeak-ng1 \ + # ============================ + # Local STT (Faster-Whisper) dependencies + # ============================ + ffmpeg \ + # ============================ + # Audio processing (soundfile) + # ============================ libsndfile1 \ + # ============================ + # Image/OpenCV dependencies (for Docling) + # ============================ libgl1 \ libglib2.0-0 \ libsm6 \ libxext6 \ libxrender1 \ - dos2unix \ - # For PPAs - software-properties-common \ - # Playwright dependencies + # ============================ + # Playwright browser dependencies + # ============================ libnspr4 \ libnss3 \ libatk1.0-0 \ @@ -145,8 +172,8 @@ WORKDIR /app/backend # Copy backend dependency files COPY surfsense_backend/pyproject.toml surfsense_backend/uv.lock ./ -# Install PyTorch (CPU only to save space) -RUN pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu +# Install PyTorch CPU-only (Docling needs it but OCR is disabled, no GPU needed) +RUN pip install --no-cache-dir torch torchvision --index-url https://download.pytorch.org/whl/cpu # Install python dependencies RUN pip install --no-cache-dir certifi pip-system-certs uv \ @@ -157,12 +184,8 @@ RUN CERTIFI_PATH=$(python -c "import certifi; print(certifi.where())") \ && echo "export SSL_CERT_FILE=$CERTIFI_PATH" >> /etc/profile.d/ssl.sh \ && echo "export REQUESTS_CA_BUNDLE=$CERTIFI_PATH" >> /etc/profile.d/ssl.sh -# Pre-download EasyOCR models -RUN mkdir -p /root/.EasyOCR/model \ - && wget --no-check-certificate -q https://github.com/JaidedAI/EasyOCR/releases/download/v1.3/english_g2.zip -O /root/.EasyOCR/model/english_g2.zip || true \ - && wget --no-check-certificate -q https://github.com/JaidedAI/EasyOCR/releases/download/pre-v1.1.6/craft_mlt_25k.zip -O /root/.EasyOCR/model/craft_mlt_25k.zip || true \ - && cd /root/.EasyOCR/model && (unzip -o -q english_g2.zip || true) && (unzip -o -q craft_mlt_25k.zip || true) \ - && rm -f /root/.EasyOCR/model/*.zip +# Note: EasyOCR models NOT downloaded - OCR is disabled in docling_service.py +# GPU support will be added in a future :cuda tagged image # Install Playwright browsers RUN pip install --no-cache-dir playwright \ From b8478f2ec0606cf34d79e07aaf138d1ff1009391 Mon Sep 17 00:00:00 2001 From: "DESKTOP-RTLN3BA\\$punk" Date: Tue, 9 Dec 2025 00:53:55 -0800 Subject: [PATCH 07/10] chore: improved configuration management and logging --- Dockerfile.allinone | 5 ++- README.md | 38 +++++++++-------- docker-compose.quickstart.yml | 14 +++---- scripts/docker/entrypoint-allinone.sh | 41 ++++++++++++++++++- scripts/docker/supervisor-allinone.conf | 41 ++++++++++++------- .../content/docs/docker-installation.mdx | 16 +++----- 6 files changed, 102 insertions(+), 53 deletions(-) diff --git a/Dockerfile.allinone b/Dockerfile.allinone index c7a2505f6..0765deb15 100644 --- a/Dockerfile.allinone +++ b/Dockerfile.allinone @@ -69,8 +69,6 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ curl \ ca-certificates \ gnupg \ - # Supervisor - supervisor \ # Backend dependencies gcc \ wget \ @@ -139,6 +137,9 @@ RUN update-alternatives --install /usr/bin/python python /usr/bin/python3.12 1 \ RUN python3.12 -m ensurepip --upgrade \ && python3.12 -m pip install --upgrade pip +# Install supervisor via pip (system package incompatible with Python 3.12) +RUN pip install --no-cache-dir supervisor + # Build and install pgvector RUN cd /tmp \ && git clone --branch v0.7.4 https://github.com/pgvector/pgvector.git \ diff --git a/README.md b/README.md index b717cacbe..249ca0f0b 100644 --- a/README.md +++ b/README.md @@ -155,23 +155,31 @@ Check out our public roadmap and contribute your ideas or feedback: > [!TIP] > For production deployments, use the full [Docker Compose setup](https://www.surfsense.net/docs/docker-installation) which offers more control and scalability. -**Quick Start :** +**Linux/macOS:** ```bash docker run -d -p 3000:3000 -p 8000:8000 \ -v surfsense-data:/data \ - -e SECRET_KEY=$(openssl rand -hex 32) \ --name surfsense \ --restart unless-stopped \ ghcr.io/modsetter/surfsense:latest ``` -**With Custom Embedding Model (e.g., OpenAI):** +**Windows (PowerShell):** + +```powershell +docker run -d -p 3000:3000 -p 8000:8000 ` + -v surfsense-data:/data ` + --name surfsense ` + --restart unless-stopped ` + ghcr.io/modsetter/surfsense:latest +``` + +**With Custom Configuration (e.g., OpenAI Embeddings):** ```bash docker run -d -p 3000:3000 -p 8000:8000 \ -v surfsense-data:/data \ - -e SECRET_KEY=$(openssl rand -hex 32) \ -e EMBEDDING_MODEL=openai://text-embedding-ada-002 \ -e OPENAI_API_KEY=your_openai_api_key \ --name surfsense \ @@ -179,24 +187,20 @@ docker run -d -p 3000:3000 -p 8000:8000 \ ghcr.io/modsetter/surfsense:latest ``` -**Using Docker Compose (Recommended for easier management):** - -```bash -# Download the quick start compose file -curl -o docker-compose.yml https://raw.githubusercontent.com/MODSetter/SurfSense/main/docker-compose.quickstart.yml - -# Create .env file with your secret key -echo "SECRET_KEY=$(openssl rand -hex 32)" > .env - -# Start SurfSense -docker compose up -d -``` - After starting, access SurfSense at: - **Frontend**: [http://localhost:3000](http://localhost:3000) - **Backend API**: [http://localhost:8000](http://localhost:8000) - **API Docs**: [http://localhost:8000/docs](http://localhost:8000/docs) +**Useful Commands:** + +```bash +docker logs -f surfsense # View logs +docker stop surfsense # Stop +docker start surfsense # Start +docker rm surfsense # Remove (data preserved in volume) +``` + ### Installation Options SurfSense provides multiple options to get started: diff --git a/docker-compose.quickstart.yml b/docker-compose.quickstart.yml index 012388335..ff72618b7 100644 --- a/docker-compose.quickstart.yml +++ b/docker-compose.quickstart.yml @@ -4,18 +4,16 @@ # For production or customized deployments, use the main docker-compose.yml # # Usage: -# 1. Create a .env file with your required configuration (see below) +# 1. (Optional) Create a .env file with your configuration # 2. Run: docker compose -f docker-compose.quickstart.yml up -d # 3. Access SurfSense at http://localhost:3000 # -# Required Environment Variables: -# - SECRET_KEY: JWT secret key (generate with: openssl rand -hex 32) -# -# Optional Environment Variables: +# All Environment Variables are Optional: +# - SECRET_KEY: JWT secret key (auto-generated and persisted if not set) # - EMBEDDING_MODEL: Embedding model to use (default: sentence-transformers/all-MiniLM-L6-v2) # - ETL_SERVICE: Document parsing service - DOCLING, UNSTRUCTURED, or LLAMACLOUD (default: DOCLING) # - TTS_SERVICE: Text-to-speech service for podcasts (default: local/kokoro) -# - STT_SERVICE: Speech-to-text service (default: local/base) +# - STT_SERVICE: Speech-to-text service with model size (default: local/base) # - FIRECRAWL_API_KEY: For web crawling features version: "3.8" @@ -31,8 +29,8 @@ services: volumes: - surfsense-data:/data environment: - # Required - - SECRET_KEY=${SECRET_KEY:-change-me-in-production} + # Authentication (auto-generated if not set) + - SECRET_KEY=${SECRET_KEY:-} # Auth Configuration - AUTH_TYPE=${AUTH_TYPE:-LOCAL} diff --git a/scripts/docker/entrypoint-allinone.sh b/scripts/docker/entrypoint-allinone.sh index 0df2555c2..427256f6d 100644 --- a/scripts/docker/entrypoint-allinone.sh +++ b/scripts/docker/entrypoint-allinone.sh @@ -8,6 +8,40 @@ echo "===========================================" # Create log directory mkdir -p /var/log/supervisor +# ================================================ +# Ensure data directory exists +# ================================================ +mkdir -p /data + +# ================================================ +# Generate SECRET_KEY if not provided +# ================================================ +if [ -z "$SECRET_KEY" ]; then + # Generate a random secret key and persist it + if [ -f /data/.secret_key ]; then + export SECRET_KEY=$(cat /data/.secret_key) + echo "✅ Using existing SECRET_KEY from persistent storage" + else + export SECRET_KEY=$(python3 -c "import secrets; print(secrets.token_urlsafe(32))") + echo "$SECRET_KEY" > /data/.secret_key + chmod 600 /data/.secret_key + echo "✅ Generated new SECRET_KEY (saved for persistence)" + fi +fi + +# ================================================ +# Set default TTS/STT services if not provided +# ================================================ +if [ -z "$TTS_SERVICE" ]; then + export TTS_SERVICE="local/kokoro" + echo "✅ Using default TTS_SERVICE: local/kokoro" +fi + +if [ -z "$STT_SERVICE" ]; then + export STT_SERVICE="local/base" + echo "✅ Using default STT_SERVICE: local/base" +fi + # ================================================ # Initialize PostgreSQL if needed # ================================================ @@ -18,7 +52,8 @@ if [ ! -f /data/postgres/PG_VERSION ]; then chown -R postgres:postgres /data/postgres chmod 700 /data/postgres - su - postgres -c "/usr/lib/postgresql/14/bin/initdb -D /data/postgres" + # Initialize with UTF8 encoding (required for proper text handling) + su - postgres -c "/usr/lib/postgresql/14/bin/initdb -D /data/postgres --encoding=UTF8 --locale=C.UTF-8" # Configure PostgreSQL for connections echo "host all all 0.0.0.0/0 md5" >> /data/postgres/pg_hba.conf @@ -104,6 +139,8 @@ echo " Backend API: http://localhost:8000" echo " API Docs: http://localhost:8000/docs" echo " Auth Type: ${AUTH_TYPE:-LOCAL}" echo " ETL Service: ${ETL_SERVICE:-DOCLING}" +echo " TTS Service: ${TTS_SERVICE}" +echo " STT Service: ${STT_SERVICE}" echo "===========================================" echo "" @@ -111,5 +148,5 @@ echo "" # Start Supervisor (manages all services) # ================================================ echo "🚀 Starting all services..." -exec /usr/bin/supervisord -c /etc/supervisor/conf.d/surfsense.conf +exec /usr/local/bin/supervisord -c /etc/supervisor/conf.d/surfsense.conf diff --git a/scripts/docker/supervisor-allinone.conf b/scripts/docker/supervisor-allinone.conf index 15685592a..6cada0dc2 100644 --- a/scripts/docker/supervisor-allinone.conf +++ b/scripts/docker/supervisor-allinone.conf @@ -1,8 +1,9 @@ [supervisord] nodaemon=true -logfile=/var/log/supervisor/supervisord.log +logfile=/dev/stdout +logfile_maxbytes=0 pidfile=/var/run/supervisord.pid -childlogdir=/var/log/supervisor +loglevel=info user=root [unix_http_server] @@ -22,8 +23,10 @@ user=postgres autostart=true autorestart=true priority=10 -stdout_logfile=/var/log/supervisor/postgresql.log -stderr_logfile=/var/log/supervisor/postgresql-error.log +stdout_logfile=/dev/stdout +stdout_logfile_maxbytes=0 +stderr_logfile=/dev/stderr +stderr_logfile_maxbytes=0 environment=PGDATA="/data/postgres" # Redis @@ -32,8 +35,10 @@ command=/usr/bin/redis-server --dir /data/redis --appendonly yes autostart=true autorestart=true priority=20 -stdout_logfile=/var/log/supervisor/redis.log -stderr_logfile=/var/log/supervisor/redis-error.log +stdout_logfile=/dev/stdout +stdout_logfile_maxbytes=0 +stderr_logfile=/dev/stderr +stderr_logfile_maxbytes=0 # Backend API [program:backend] @@ -44,8 +49,10 @@ autorestart=true priority=30 startsecs=10 startretries=3 -stdout_logfile=/var/log/supervisor/backend.log -stderr_logfile=/var/log/supervisor/backend-error.log +stdout_logfile=/dev/stdout +stdout_logfile_maxbytes=0 +stderr_logfile=/dev/stderr +stderr_logfile_maxbytes=0 environment=PYTHONPATH="/app/backend",UVICORN_LOOP="asyncio",UNSTRUCTURED_HAS_PATCHED_LOOP="1" # Celery Worker @@ -57,8 +64,10 @@ autorestart=true priority=40 startsecs=15 startretries=3 -stdout_logfile=/var/log/supervisor/celery-worker.log -stderr_logfile=/var/log/supervisor/celery-worker-error.log +stdout_logfile=/dev/stdout +stdout_logfile_maxbytes=0 +stderr_logfile=/dev/stderr +stderr_logfile_maxbytes=0 environment=PYTHONPATH="/app/backend" # Celery Beat (scheduler) @@ -70,8 +79,10 @@ autorestart=true priority=50 startsecs=20 startretries=3 -stdout_logfile=/var/log/supervisor/celery-beat.log -stderr_logfile=/var/log/supervisor/celery-beat-error.log +stdout_logfile=/dev/stdout +stdout_logfile_maxbytes=0 +stderr_logfile=/dev/stderr +stderr_logfile_maxbytes=0 environment=PYTHONPATH="/app/backend" # Frontend @@ -83,8 +94,10 @@ autorestart=true priority=60 startsecs=5 startretries=3 -stdout_logfile=/var/log/supervisor/frontend.log -stderr_logfile=/var/log/supervisor/frontend-error.log +stdout_logfile=/dev/stdout +stdout_logfile_maxbytes=0 +stderr_logfile=/dev/stderr +stderr_logfile_maxbytes=0 environment=NODE_ENV="production",PORT="3000",HOSTNAME="0.0.0.0" # Process Groups diff --git a/surfsense_web/content/docs/docker-installation.mdx b/surfsense_web/content/docs/docker-installation.mdx index e4ae03e92..32532725b 100644 --- a/surfsense_web/content/docs/docker-installation.mdx +++ b/surfsense_web/content/docs/docker-installation.mdx @@ -29,7 +29,6 @@ Make sure to include the `-v surfsense-data:/data` in your Docker command. This ```bash docker run -d -p 3000:3000 -p 8000:8000 \ -v surfsense-data:/data \ - -e SECRET_KEY=$(openssl rand -hex 32) \ --name surfsense \ --restart unless-stopped \ ghcr.io/modsetter/surfsense:latest @@ -38,15 +37,15 @@ docker run -d -p 3000:3000 -p 8000:8000 \ **Windows (PowerShell):** ```powershell -$secretKey = -join ((48..57) + (65..90) + (97..122) | Get-Random -Count 32 | ForEach-Object {[char]$_}) docker run -d -p 3000:3000 -p 8000:8000 ` -v surfsense-data:/data ` - -e SECRET_KEY=$secretKey ` --name surfsense ` --restart unless-stopped ` ghcr.io/modsetter/surfsense:latest ``` +> **Note:** A secure `SECRET_KEY` is automatically generated and persisted in the data volume on first run. + ### With Custom Configuration **Using OpenAI Embeddings:** @@ -54,7 +53,6 @@ docker run -d -p 3000:3000 -p 8000:8000 ` ```bash docker run -d -p 3000:3000 -p 8000:8000 \ -v surfsense-data:/data \ - -e SECRET_KEY=$(openssl rand -hex 32) \ -e EMBEDDING_MODEL=openai://text-embedding-ada-002 \ -e OPENAI_API_KEY=your_openai_api_key \ --name surfsense \ @@ -67,7 +65,6 @@ docker run -d -p 3000:3000 -p 8000:8000 \ ```bash docker run -d -p 3000:3000 -p 8000:8000 \ -v surfsense-data:/data \ - -e SECRET_KEY=$(openssl rand -hex 32) \ -e AUTH_TYPE=GOOGLE \ -e GOOGLE_OAUTH_CLIENT_ID=your_client_id \ -e GOOGLE_OAUTH_CLIENT_SECRET=your_client_secret \ @@ -84,12 +81,11 @@ For easier management with environment files: # Download the quick start compose file curl -o docker-compose.yml https://raw.githubusercontent.com/MODSetter/SurfSense/main/docker-compose.quickstart.yml -# Create .env file +# Create .env file (optional - for custom configuration) cat > .env << EOF -SECRET_KEY=$(openssl rand -hex 32) -# Add other configuration as needed # EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2 # ETL_SERVICE=DOCLING +# SECRET_KEY=your_custom_secret_key # Auto-generated if not set EOF # Start SurfSense @@ -105,12 +101,12 @@ After starting, access SurfSense at: | Variable | Description | Default | |----------|-------------|---------| -| SECRET_KEY | JWT secret key (required) | - | +| SECRET_KEY | JWT secret key (auto-generated if not set) | Auto-generated | | AUTH_TYPE | Authentication: `LOCAL` or `GOOGLE` | LOCAL | | EMBEDDING_MODEL | Model for embeddings | sentence-transformers/all-MiniLM-L6-v2 | | ETL_SERVICE | Document parser: `DOCLING`, `UNSTRUCTURED`, `LLAMACLOUD` | DOCLING | | TTS_SERVICE | Text-to-speech for podcasts | local/kokoro | -| STT_SERVICE | Speech-to-text for audio | local/base | +| STT_SERVICE | Speech-to-text for audio (model size: tiny, base, small, medium, large) | local/base | | REGISTRATION_ENABLED | Allow new user registration | TRUE | ### Useful Commands From 50edeadcaac934866aa182e30366ad4b4b6b6022 Mon Sep 17 00:00:00 2001 From: "DESKTOP-RTLN3BA\\$punk" Date: Tue, 9 Dec 2025 01:27:13 -0800 Subject: [PATCH 08/10] chore: update links in documentation and metadata to use the correct domain --- CONTRIBUTING.md | 6 +- README.md | 8 +- README.zh-CN.md | 74 ++++++++++++++++--- .../routes/pages/ApiKeyForm.tsx | 2 +- surfsense_web/app/layout.tsx | 10 +-- 5 files changed, 78 insertions(+), 22 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index cc5dde3cc..bb3d607c1 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -57,14 +57,14 @@ Want to fix it? Go for it! Just link the issue in your PR. 2. **Choose your setup method**: - **Docker Setup**: Follow the [Docker Setup Guide](./DOCKER_SETUP.md) - - **Manual Setup**: Follow the [Installation Guide](https://www.surfsense.net/docs/) + - **Manual Setup**: Follow the [Installation Guide](https://www.surfsense.com/docs/) 3. **Configure services**: - Set up PGVector & PostgreSQL - Configure a file ETL service: `Unstructured.io` or `LlamaIndex` - Add API keys for external services -For detailed setup instructions, refer to our [Installation Guide](https://www.surfsense.net/docs/). +For detailed setup instructions, refer to our [Installation Guide](https://www.surfsense.com/docs/). ## 🏗️ Project Structure @@ -146,7 +146,7 @@ When contributing, please: Stuck? Need clarification? Here's how to get help: 1. **Check existing issues** - your question might already be answered -2. **Search the docs** - [https://www.surfsense.net/docs/](https://www.surfsense.net/docs/) +2. **Search the docs** - [https://www.surfsense.com/docs/](https://www.surfsense.com/docs/) 3. **Ask in Discord** - [https://discord.gg/ejRNvftDp9](https://discord.gg/ejRNvftDp9) 4. **Create an issue** - if it's a bug or feature request diff --git a/README.md b/README.md index 249ca0f0b..e675c45c3 100644 --- a/README.md +++ b/README.md @@ -153,7 +153,7 @@ Check out our public roadmap and contribute your ideas or feedback: ### Quick Start with Docker 🐳 > [!TIP] -> For production deployments, use the full [Docker Compose setup](https://www.surfsense.net/docs/docker-installation) which offers more control and scalability. +> For production deployments, use the full [Docker Compose setup](https://www.surfsense.com/docs/docker-installation) which offers more control and scalability. **Linux/macOS:** @@ -215,17 +215,17 @@ SurfSense provides multiple options to get started: - Perfect for evaluation, development, and small deployments - Data persisted via Docker volume -3. **[Docker Compose (Production)](https://www.surfsense.net/docs/docker-installation)** - Full stack deployment with separate services. +3. **[Docker Compose (Production)](https://www.surfsense.com/docs/docker-installation)** - Full stack deployment with separate services. - Includes pgAdmin for database management through a web UI - Supports environment variable customization via `.env` file - Flexible deployment options (full stack or core services only) - Better for production with separate scaling of services -4. **[Manual Installation](https://www.surfsense.net/docs/manual-installation)** - For users who prefer more control over their setup or need to customize their deployment. +4. **[Manual Installation](https://www.surfsense.com/docs/manual-installation)** - For users who prefer more control over their setup or need to customize their deployment. Docker and manual installation guides include detailed OS-specific instructions for Windows, macOS, and Linux. -Before self-hosting installation, make sure to complete the [prerequisite setup steps](https://www.surfsense.net/docs/) including: +Before self-hosting installation, make sure to complete the [prerequisite setup steps](https://www.surfsense.com/docs/) including: - Auth setup (optional - defaults to LOCAL auth) - **File Processing ETL Service** (optional - defaults to Docling): - Docling (default, local processing, no API key required, supports PDF, Office docs, images, HTML, CSV) diff --git a/README.zh-CN.md b/README.zh-CN.md index 464242a4d..1c3a6b159 100644 --- a/README.zh-CN.md +++ b/README.zh-CN.md @@ -157,32 +157,88 @@ https://github.com/user-attachments/assets/a0a16566-6967-4374-ac51-9b3e07fbecd7 ## 如何开始? +### 使用 Docker 快速开始 🐳 + +> [!TIP] +> 对于生产部署,请使用完整的 [Docker Compose 设置](https://www.surfsense.com/docs/docker-installation),它提供更多控制和可扩展性。 + +**Linux/macOS:** + +```bash +docker run -d -p 3000:3000 -p 8000:8000 \ + -v surfsense-data:/data \ + --name surfsense \ + --restart unless-stopped \ + ghcr.io/modsetter/surfsense:latest +``` + +**Windows (PowerShell):** + +```powershell +docker run -d -p 3000:3000 -p 8000:8000 ` + -v surfsense-data:/data ` + --name surfsense ` + --restart unless-stopped ` + ghcr.io/modsetter/surfsense:latest +``` + +**使用自定义配置(例如 OpenAI 嵌入):** + +```bash +docker run -d -p 3000:3000 -p 8000:8000 \ + -v surfsense-data:/data \ + -e EMBEDDING_MODEL=openai://text-embedding-ada-002 \ + -e OPENAI_API_KEY=your_openai_api_key \ + --name surfsense \ + --restart unless-stopped \ + ghcr.io/modsetter/surfsense:latest +``` + +启动后,访问 SurfSense: +- **前端**: [http://localhost:3000](http://localhost:3000) +- **后端 API**: [http://localhost:8000](http://localhost:8000) +- **API 文档**: [http://localhost:8000/docs](http://localhost:8000/docs) + +**常用命令:** + +```bash +docker logs -f surfsense # 查看日志 +docker stop surfsense # 停止 +docker start surfsense # 启动 +docker rm surfsense # 删除(数据保留在卷中) +``` + ### 安装选项 -SurfSense 提供三种入门方式: +SurfSense 提供多种入门方式: 1. **[SurfSense Cloud](https://www.surfsense.com/login)** - 无需任何设置即可试用 SurfSense 的最简单方法。 - 无需安装 - 即时访问所有功能 - 非常适合快速上手 -2. **[Docker 安装(推荐用于自托管)](https://www.surfsense.net/docs/docker-installation)** - 通过容器化所有依赖项,轻松启动和运行 SurfSense。 +2. **快速启动 Docker(上述方法)** - 一条命令即可在本地运行 SurfSense。 + - 一体化镜像,捆绑 PostgreSQL、Redis 和所有服务 + - 非常适合评估、开发和小型部署 + - 数据通过 Docker 卷持久化 + +3. **[Docker Compose(生产环境)](https://www.surfsense.com/docs/docker-installation)** - 使用独立服务进行完整堆栈部署。 - 包含 pgAdmin,通过 Web UI 进行数据库管理 - 支持通过 `.env` 文件自定义环境变量 - 灵活的部署选项(完整堆栈或仅核心服务) - - 无需在环境之间手动编辑配置文件 + - 更适合生产环境,支持独立扩展服务 -3. **[手动安装](https://www.surfsense.net/docs/manual-installation)** - 适合希望对设置有更多控制或需要自定义部署的用户。 +4. **[手动安装](https://www.surfsense.com/docs/manual-installation)** - 适合希望对设置有更多控制或需要自定义部署的用户。 Docker 和手动安装指南都包含适用于 Windows、macOS 和 Linux 的详细操作系统特定说明。 -在自托管安装之前,请确保完成[先决条件设置步骤](https://www.surfsense.net/docs/),包括: -- 身份验证设置 -- **文件处理 ETL 服务**(选择其一): +在自托管安装之前,请确保完成[先决条件设置步骤](https://www.surfsense.com/docs/),包括: +- 身份验证设置(可选 - 默认为 LOCAL 身份验证) +- **文件处理 ETL 服务**(可选 - 默认为 Docling): + - Docling(默认,本地处理,无需 API 密钥,支持 PDF、Office 文档、图像、HTML、CSV) - Unstructured.io API 密钥(支持 34+ 种格式) - LlamaIndex API 密钥(增强解析,支持 50+ 种格式) - - Docling(本地处理,无需 API 密钥,支持 PDF、Office 文档、图像、HTML、CSV) -- 其他所需的 API 密钥 +- 其他根据用例需要的 API 密钥 ## 截图 diff --git a/surfsense_browser_extension/routes/pages/ApiKeyForm.tsx b/surfsense_browser_extension/routes/pages/ApiKeyForm.tsx index 2c8a7f286..b6deb1c05 100644 --- a/surfsense_browser_extension/routes/pages/ApiKeyForm.tsx +++ b/surfsense_browser_extension/routes/pages/ApiKeyForm.tsx @@ -103,7 +103,7 @@ const ApiKeyForm = () => {

Need an API key?{" "} Date: Tue, 9 Dec 2025 01:49:27 -0800 Subject: [PATCH 09/10] fix(ux): : update onboarding and form components for improved user experience and clarity --- .../[search_space_id]/onboard/page.tsx | 41 ++++++++++--------- .../components/onboard/setup-llm-step.tsx | 33 ++++++++++++++- .../components/search-space-form.tsx | 2 +- .../settings/model-config-manager.tsx | 31 +++++++++++++- .../contracts/enums/llm-providers.ts | 1 + 5 files changed, 84 insertions(+), 24 deletions(-) diff --git a/surfsense_web/app/dashboard/[search_space_id]/onboard/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/onboard/page.tsx index 7382429d2..b81310e09 100644 --- a/surfsense_web/app/dashboard/[search_space_id]/onboard/page.tsx +++ b/surfsense_web/app/dashboard/[search_space_id]/onboard/page.tsx @@ -198,18 +198,18 @@ const OnboardPage = () => { className="grid grid-cols-1 md:grid-cols-3 gap-6 mb-10" > router.push(`/dashboard/${searchSpaceId}/team`)} - colorScheme="emerald" - delay={0.7} + buttonText="Start Chatting" + onClick={() => router.push(`/dashboard/${searchSpaceId}/researcher`)} + colorScheme="violet" + delay={0.9} /> { /> router.push(`/dashboard/${searchSpaceId}/researcher`)} - colorScheme="violet" - delay={0.9} + buttonText="Manage Team" + onClick={() => router.push(`/dashboard/${searchSpaceId}/team`)} + colorScheme="emerald" + delay={0.7} /> + {/* Advanced Settings */} diff --git a/surfsense_web/components/onboard/setup-llm-step.tsx b/surfsense_web/components/onboard/setup-llm-step.tsx index 9735061ee..825b8a030 100644 --- a/surfsense_web/components/onboard/setup-llm-step.tsx +++ b/surfsense_web/components/onboard/setup-llm-step.tsx @@ -521,21 +521,50 @@ export function SetupLLMStep({ handleInputChange("api_key", e.target.value)} required /> + {formData.provider === "OLLAMA" && ( +

+ 💡 Ollama doesn't require authentication — enter any value (e.g., "ollama") +

+ )}
handleInputChange("api_base", e.target.value)} /> + {/* Ollama-specific help */} + {formData.provider === "OLLAMA" && ( +
+

💡 Ollama API Base URL Examples:

+
+ + +
+
+ )}
diff --git a/surfsense_web/components/search-space-form.tsx b/surfsense_web/components/search-space-form.tsx index ccb290dc8..d683772ef 100644 --- a/surfsense_web/components/search-space-form.tsx +++ b/surfsense_web/components/search-space-form.tsx @@ -36,7 +36,7 @@ import { cn } from "@/lib/utils"; // Define the form schema with Zod const searchSpaceFormSchema = z.object({ - name: z.string().min(3, "Name must be at least 3 characters"), + name: z.string().min(1, "Name is required"), description: z.string().optional(), }); diff --git a/surfsense_web/components/settings/model-config-manager.tsx b/surfsense_web/components/settings/model-config-manager.tsx index 16bd57e71..234301b7c 100644 --- a/surfsense_web/components/settings/model-config-manager.tsx +++ b/surfsense_web/components/settings/model-config-manager.tsx @@ -677,11 +677,16 @@ export function ModelConfigManager({ searchSpaceId }: ModelConfigManagerProps) { handleInputChange("api_key", e.target.value)} required /> + {formData.provider === "OLLAMA" && ( +

+ 💡 Ollama doesn't require authentication — enter any value (e.g., "ollama") +

+ )}
@@ -718,6 +723,30 @@ export function ModelConfigManager({ searchSpaceId }: ModelConfigManagerProps) {

)} + {/* Ollama-specific help */} + {formData.provider === "OLLAMA" && ( +
+

💡 Ollama API Base URL Examples:

+
+ + +
+
+ )}
{/* Optional Inference Parameters */} diff --git a/surfsense_web/contracts/enums/llm-providers.ts b/surfsense_web/contracts/enums/llm-providers.ts index cbe33d840..40b7ee2df 100644 --- a/surfsense_web/contracts/enums/llm-providers.ts +++ b/surfsense_web/contracts/enums/llm-providers.ts @@ -109,6 +109,7 @@ export const LLM_PROVIDERS: LLMProvider[] = [ label: "Ollama", example: "ollama/llama3.1, ollama/mistral", description: "Run models locally", + apiBase: "http://localhost:11434", }, { value: "ALIBABA_QWEN", From 4f5c1cf070db347fe42006343bdb2133caa2d898 Mon Sep 17 00:00:00 2001 From: "DESKTOP-RTLN3BA\\$punk" Date: Tue, 9 Dec 2025 01:50:08 -0800 Subject: [PATCH 10/10] chore: linting --- .../[search_space_id]/onboard/page.tsx | 1 - .../components/onboard/setup-llm-step.tsx | 29 ++++++++++++++----- .../settings/model-config-manager.tsx | 16 +++++++--- 3 files changed, 34 insertions(+), 12 deletions(-) diff --git a/surfsense_web/app/dashboard/[search_space_id]/onboard/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/onboard/page.tsx index b81310e09..1a4d24bd6 100644 --- a/surfsense_web/app/dashboard/[search_space_id]/onboard/page.tsx +++ b/surfsense_web/app/dashboard/[search_space_id]/onboard/page.tsx @@ -241,7 +241,6 @@ const OnboardPage = () => { colorScheme="emerald" delay={0.7} /> - {/* Advanced Settings */} diff --git a/surfsense_web/components/onboard/setup-llm-step.tsx b/surfsense_web/components/onboard/setup-llm-step.tsx index 825b8a030..41cc5be99 100644 --- a/surfsense_web/components/onboard/setup-llm-step.tsx +++ b/surfsense_web/components/onboard/setup-llm-step.tsx @@ -521,14 +521,19 @@ export function SetupLLMStep({ handleInputChange("api_key", e.target.value)} required /> {formData.provider === "OLLAMA" && (

- 💡 Ollama doesn't require authentication — enter any value (e.g., "ollama") + 💡 Ollama doesn't require authentication — enter any value (e.g., + "ollama")

)} @@ -544,22 +549,32 @@ export function SetupLLMStep({ {/* Ollama-specific help */} {formData.provider === "OLLAMA" && (
-

💡 Ollama API Base URL Examples:

+

+ 💡 Ollama API Base URL Examples: +

diff --git a/surfsense_web/components/settings/model-config-manager.tsx b/surfsense_web/components/settings/model-config-manager.tsx index 234301b7c..3bd871135 100644 --- a/surfsense_web/components/settings/model-config-manager.tsx +++ b/surfsense_web/components/settings/model-config-manager.tsx @@ -677,7 +677,9 @@ export function ModelConfigManager({ searchSpaceId }: ModelConfigManagerProps) { handleInputChange("api_key", e.target.value)} required @@ -733,15 +735,21 @@ export function ModelConfigManager({ searchSpaceId }: ModelConfigManagerProps) { className="flex items-center gap-2 text-xs text-muted-foreground hover:text-foreground transition-colors" onClick={() => handleInputChange("api_base", "http://localhost:11434")} > - http://localhost:11434 + + http://localhost:11434 + — Standard local installation