try: docker all in one image

This commit is contained in:
DESKTOP-RTLN3BA\$punk 2025-12-08 20:45:20 -08:00
parent 2cf9fa7a39
commit 5b0d2f82e6
10 changed files with 823 additions and 164 deletions

97
.dockerignore Normal file
View file

@ -0,0 +1,97 @@
# Git
.git
.gitignore
.gitattributes
# Documentation
*.md
!README.md
docs/
CONTRIBUTING.md
CODE_OF_CONDUCT.md
LICENSE
# IDE
.vscode/
.idea/
*.swp
*.swo
.cursor/
# Node
**/node_modules/
**/.next/
**/dist/
**/.turbo/
**/.cache/
**/coverage/
# Python
**/__pycache__/
**/*.pyc
**/*.pyo
**/*.pyd
**/.Python
**/build/
**/develop-eggs/
**/downloads/
**/eggs/
**/.eggs/
**/lib/
**/lib64/
**/parts/
**/sdist/
**/var/
**/wheels/
**/*.egg-info/
**/.installed.cfg
**/*.egg
**/pip-log.txt
**/.tox/
**/.coverage
**/htmlcov/
**/.pytest_cache/
**/nosetests.xml
**/coverage.xml
# Environment
**/.env
**/.env.*
!**/.env.example
**/*.local
# Docker
**/Dockerfile
**/docker-compose*.yml
**/.docker/
# Testing
**/tests/
**/test/
**/__tests__/
**/*.test.*
**/*.spec.*
# Logs
**/*.log
**/logs/
# Temporary files
**/tmp/
**/temp/
**/.tmp/
**/.temp/
# Build artifacts from backend
surfsense_backend/podcasts/
surfsense_backend/temp_audio/
surfsense_backend/*.bak
surfsense_backend/*.dat
surfsense_backend/*.dir
# GitHub
.github/
# Browser extension (not needed for main deployment)
surfsense_browser_extension/

View file

@ -1,75 +0,0 @@
name: Docker Publish
on:
workflow_dispatch:
jobs:
# build_and_push_backend:
# runs-on: ubuntu-latest
# permissions:
# contents: read
# packages: write
# steps:
# - name: Checkout repository
# uses: actions/checkout@v4
# - name: Set up QEMU
# uses: docker/setup-qemu-action@v3
# - name: Set up Docker Buildx
# uses: docker/setup-buildx-action@v3
# - name: Log in to GitHub Container Registry
# uses: docker/login-action@v3
# with:
# registry: ghcr.io
# username: ${{ github.actor }}
# password: ${{ secrets.GITHUB_TOKEN }}
# - name: Build and push backend image
# uses: docker/build-push-action@v5
# with:
# context: ./surfsense_backend
# file: ./surfsense_backend/Dockerfile
# push: true
# tags: ghcr.io/${{ github.repository_owner }}/surfsense_backend:${{ github.sha }}
# platforms: linux/amd64,linux/arm64
# labels: |
# org.opencontainers.image.source=${{ github.repositoryUrl }}
# org.opencontainers.image.created=${{ fromJSON(steps.meta.outputs.json).labels['org.opencontainers.image.created'] }}
# org.opencontainers.image.revision=${{ github.sha }}
build_and_push_frontend:
runs-on: ubuntu-latest
permissions:
contents: read
packages: write
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Log in to GitHub Container Registry
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Build and push frontend image
uses: docker/build-push-action@v5
with:
context: ./surfsense_web
file: ./surfsense_web/Dockerfile
push: true
tags: ghcr.io/${{ github.repository_owner }}/surfsense_web:${{ github.sha }}
platforms: linux/amd64,linux/arm64
labels: |
org.opencontainers.image.source=${{ github.repositoryUrl }}
org.opencontainers.image.created=${{ fromJSON(steps.meta.outputs.json).labels['org.opencontainers.image.created'] }}
org.opencontainers.image.revision=${{ github.sha }}

View file

@ -18,39 +18,30 @@ on:
default: ''
permissions:
contents: write # Needed for pushing tags
packages: write # Needed for pushing docker images to GHCR
contents: write
packages: write
jobs:
tag_release:
runs-on: ubuntu-latest
outputs:
# Define output to pass the tag to the next job
new_tag: ${{ steps.tag_version.outputs.next_version }}
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
# Fetch all history and tags to find the latest SemVer tag
fetch-depth: 0
# Checkout the specific branch if provided, otherwise default
ref: ${{ github.event.inputs.branch }}
# Token needed to push tags back
token: ${{ secrets.GITHUB_TOKEN }}
- name: Get latest SemVer tag and calculate next version
id: tag_version
run: |
# Fetch all tags from remote just in case
git fetch --tags
# Get the latest SemVer tag (handles vX.Y.Z pattern)
# Filters tags, sorts them version-aware, takes the last one
LATEST_TAG=$(git tag --list 'v[0-9]*.[0-9]*.[0-9]*' --sort='v:refname' | tail -n 1)
if [ -z "$LATEST_TAG" ]; then
echo "No previous SemVer tag found. Starting with v0.1.0"
# Determine initial version based on bump type (optional, v0.1.0 is often fine)
case "${{ github.event.inputs.bump_type }}" in
patch|minor)
NEXT_VERSION="v0.1.0"
@ -58,22 +49,18 @@ jobs:
major)
NEXT_VERSION="v1.0.0"
;;
*) # Should not happen due to 'choice' input, but good practice
*)
echo "Invalid bump type: ${{ github.event.inputs.bump_type }}"
exit 1
;;
esac
else
echo "Latest tag found: $LATEST_TAG"
# Remove 'v' prefix for calculation
VERSION=${LATEST_TAG#v}
# Split into parts
MAJOR=$(echo $VERSION | cut -d. -f1)
MINOR=$(echo $VERSION | cut -d. -f2)
PATCH=$(echo $VERSION | cut -d. -f3)
# Bump version based on input
case "${{ github.event.inputs.bump_type }}" in
patch)
PATCH=$((PATCH + 1))
@ -96,12 +83,10 @@ jobs:
fi
echo "Calculated next version: $NEXT_VERSION"
# Set output for subsequent steps
echo "next_version=$NEXT_VERSION" >> $GITHUB_OUTPUT
- name: Create and Push Tag
run: |
# Configure Git user identity for annotated tag (FIX)
git config --global user.name 'github-actions[bot]'
git config --global user.email 'github-actions[bot]@users.noreply.github.com'
@ -109,74 +94,23 @@ jobs:
COMMIT_SHA=$(git rev-parse HEAD)
echo "Tagging commit $COMMIT_SHA with $NEXT_TAG"
# Create an annotated tag (recommended) - this requires user.name/email
git tag -a "$NEXT_TAG" -m "Release $NEXT_TAG"
# Push the tag to the remote repository
echo "Pushing tag $NEXT_TAG to origin"
git push origin "$NEXT_TAG"
- name: Verify Tag Push
run: |
echo "Checking if tag ${{ steps.tag_version.outputs.next_version }} exists remotely..."
# Give remote a second to update
sleep 5
git ls-remote --tags origin | grep "refs/tags/${{ steps.tag_version.outputs.next_version }}" || (echo "Tag push verification failed!" && exit 1)
echo "Tag successfully pushed."
# build_and_push_backend_image:
# runs-on: ubuntu-latest
# needs: tag_release # Depends on the tag being created successfully
# permissions:
# packages: write # Need permission to write to GHCR
# contents: read # Need permission to read repo contents (checkout)
# steps:
# - name: Checkout code
# uses: actions/checkout@v4
# - name: Login to GitHub Container Registry
# uses: docker/login-action@v3
# with:
# registry: ghcr.io
# username: ${{ github.repository_owner }}
# password: ${{ secrets.GITHUB_TOKEN }}
# - name: Set up QEMU
# uses: docker/setup-qemu-action@v3
# - name: Set up Docker Buildx
# uses: docker/setup-buildx-action@v3
# - name: Extract metadata (tags, labels) for Docker build
# id: meta
# uses: docker/metadata-action@v5
# with:
# images: ghcr.io/${{ github.repository_owner }}/surfsense_backend
# tags: |
# # Use the tag generated in the previous job
# type=raw,value=${{ needs.tag_release.outputs.new_tag }}
# # Optionally add 'latest' tag if building from the default branch
# type=raw,value=latest,enable=${{ github.ref == format('refs/heads/{0}', github.event.repository.default_branch) || github.event.inputs.branch == github.event.repository.default_branch }}
# - name: Build and push surfsense backend
# uses: docker/build-push-action@v5
# with:
# context: ./surfsense_backend
# push: true
# tags: ${{ steps.meta.outputs.tags }}
# labels: ${{ steps.meta.outputs.labels }}
# platforms: linux/amd64,linux/arm64
# # Optional: Add build cache for faster builds
# cache-from: type=gha
# cache-to: type=gha,mode=max
build_and_push_ui_image:
build_and_push:
runs-on: ubuntu-latest
needs: tag_release # Depends on the tag being created successfully
needs: tag_release
permissions:
packages: write # Need permission to write to GHCR
contents: read # Need permission to read repo contents (checkout)
packages: write
contents: read
steps:
- name: Checkout code
@ -195,25 +129,23 @@ jobs:
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Extract metadata (tags, labels) for Docker build
- name: Extract metadata for Docker
id: meta
uses: docker/metadata-action@v5
with:
images: ghcr.io/${{ github.repository_owner }}/surfsense_ui
images: ghcr.io/${{ github.repository_owner }}/surfsense
tags: |
# Use the tag generated in the previous job
type=raw,value=${{ needs.tag_release.outputs.new_tag }}
# Optionally add 'latest' tag if building from the default branch
type=raw,value=latest,enable=${{ github.ref == format('refs/heads/{0}', github.event.repository.default_branch) || github.event.inputs.branch == github.event.repository.default_branch }}
- name: Build and push surfsense UI image
- name: Build and push SurfSense image
uses: docker/build-push-action@v5
with:
context: ./surfsense_web
context: .
file: ./Dockerfile.allinone
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
platforms: linux/amd64,linux/arm64
# Optional: Add build cache for faster builds
cache-from: type=gha
cache-to: type=gha,mode=max

180
Dockerfile.allinone Normal file
View file

@ -0,0 +1,180 @@
# SurfSense All-in-One Docker Image
# This image bundles PostgreSQL+pgvector, Redis, Backend, and Frontend
# Usage: docker run -d -p 3000:3000 -v surfsense-data:/data --name surfsense ghcr.io/modsetter/surfsense:latest
FROM ubuntu:22.04 AS base
# Prevent interactive prompts during package installation
ENV DEBIAN_FRONTEND=noninteractive
# Install system dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
# PostgreSQL dependencies
postgresql-14 \
postgresql-contrib-14 \
# Build tools for pgvector
build-essential \
postgresql-server-dev-14 \
git \
# Redis
redis-server \
# Python
python3.11 \
python3.11-venv \
python3.11-dev \
python3-pip \
# Node.js
curl \
ca-certificates \
gnupg \
# Supervisor for process management
supervisor \
# Additional dependencies for backend
gcc \
wget \
unzip \
espeak-ng \
libsndfile1 \
libgl1 \
libglib2.0-0 \
libsm6 \
libxext6 \
libxrender1 \
dos2unix \
&& rm -rf /var/lib/apt/lists/*
# Install Node.js 20.x
RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash - \
&& apt-get install -y nodejs \
&& npm install -g pnpm \
&& rm -rf /var/lib/apt/lists/*
# Build and install pgvector
RUN cd /tmp \
&& git clone --branch v0.7.4 https://github.com/pgvector/pgvector.git \
&& cd pgvector \
&& make \
&& make install \
&& rm -rf /tmp/pgvector
# Set Python 3.11 as default
RUN update-alternatives --install /usr/bin/python python /usr/bin/python3.11 1 \
&& update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.11 1
# Update certificates and install SSL tools
RUN update-ca-certificates
# Create data directories
RUN mkdir -p /data/postgres /data/redis /data/surfsense \
&& chown -R postgres:postgres /data/postgres
# ====================
# Build Frontend
# ====================
WORKDIR /app/frontend
# Copy frontend source
COPY surfsense_web/package.json surfsense_web/pnpm-lock.yaml* ./
COPY surfsense_web/source.config.ts ./
COPY surfsense_web/content ./content
# Install frontend dependencies
RUN pnpm install --frozen-lockfile
# Copy rest of frontend
COPY surfsense_web/ ./
# Build frontend with default values (can be overridden at runtime via reverse proxy)
ARG NEXT_PUBLIC_FASTAPI_BACKEND_URL=http://localhost:8000
ARG NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE=LOCAL
ARG NEXT_PUBLIC_ETL_SERVICE=DOCLING
ENV NEXT_PUBLIC_FASTAPI_BACKEND_URL=$NEXT_PUBLIC_FASTAPI_BACKEND_URL
ENV NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE=$NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE
ENV NEXT_PUBLIC_ETL_SERVICE=$NEXT_PUBLIC_ETL_SERVICE
RUN pnpm run build
# ====================
# Setup Backend
# ====================
WORKDIR /app/backend
# Copy backend source
COPY surfsense_backend/pyproject.toml surfsense_backend/uv.lock ./
# Install PyTorch based on architecture
RUN if [ "$(uname -m)" = "x86_64" ]; then \
pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu; \
else \
pip install --no-cache-dir torch torchvision torchaudio; \
fi
# Install python dependencies
RUN pip install --no-cache-dir certifi pip-system-certs uv \
&& uv pip install --system --no-cache-dir -e .
# Set SSL environment variables
RUN CERTIFI_PATH=$(python -c "import certifi; print(certifi.where())") \
&& echo "export SSL_CERT_FILE=$CERTIFI_PATH" >> /etc/profile.d/ssl.sh \
&& echo "export REQUESTS_CA_BUNDLE=$CERTIFI_PATH" >> /etc/profile.d/ssl.sh
# Pre-download EasyOCR models
RUN mkdir -p /root/.EasyOCR/model \
&& wget --no-check-certificate https://github.com/JaidedAI/EasyOCR/releases/download/v1.3/english_g2.zip -O /root/.EasyOCR/model/english_g2.zip || true \
&& wget --no-check-certificate https://github.com/JaidedAI/EasyOCR/releases/download/pre-v1.1.6/craft_mlt_25k.zip -O /root/.EasyOCR/model/craft_mlt_25k.zip || true \
&& cd /root/.EasyOCR/model && (unzip -o english_g2.zip || true) && (unzip -o craft_mlt_25k.zip || true)
# Pre-download Docling models
RUN python -c "try:\n from docling.document_converter import DocumentConverter\n conv = DocumentConverter()\nexcept:\n pass" || true
# Install Playwright browsers
RUN pip install playwright && playwright install chromium
# Copy backend source
COPY surfsense_backend/ ./
# ====================
# Configuration
# ====================
WORKDIR /app
# Copy supervisor configuration
COPY scripts/docker/supervisor-allinone.conf /etc/supervisor/conf.d/surfsense.conf
# Copy entrypoint script
COPY scripts/docker/entrypoint-allinone.sh /app/entrypoint.sh
RUN chmod +x /app/entrypoint.sh
# PostgreSQL initialization script
COPY scripts/docker/init-postgres.sh /app/init-postgres.sh
RUN chmod +x /app/init-postgres.sh
# Environment variables with defaults
ENV POSTGRES_USER=surfsense
ENV POSTGRES_PASSWORD=surfsense
ENV POSTGRES_DB=surfsense
ENV DATABASE_URL=postgresql+asyncpg://surfsense:surfsense@localhost:5432/surfsense
ENV CELERY_BROKER_URL=redis://localhost:6379/0
ENV CELERY_RESULT_BACKEND=redis://localhost:6379/0
ENV PYTHONPATH=/app/backend
ENV NEXT_FRONTEND_URL=http://localhost:3000
ENV AUTH_TYPE=LOCAL
ENV ETL_SERVICE=DOCLING
ENV EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2
# Data volume
VOLUME ["/data"]
# Expose ports
# 3000 - Frontend
# 8000 - Backend API
EXPOSE 3000 8000
# Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
CMD curl -f http://localhost:3000 && curl -f http://localhost:8000/docs || exit 1
# Run entrypoint
CMD ["/app/entrypoint.sh"]

View file

@ -150,32 +150,84 @@ Check out our public roadmap and contribute your ideas or feedback:
## How to get started?
### Quick Start with Docker 🐳
> [!TIP]
> For production deployments, use the full [Docker Compose setup](https://www.surfsense.net/docs/docker-installation) which offers more control and scalability.
**Quick Start :**
```bash
docker run -d -p 3000:3000 -p 8000:8000 \
-v surfsense-data:/data \
-e SECRET_KEY=$(openssl rand -hex 32) \
--name surfsense \
--restart unless-stopped \
ghcr.io/modsetter/surfsense:latest
```
**With Custom Embedding Model (e.g., OpenAI):**
```bash
docker run -d -p 3000:3000 -p 8000:8000 \
-v surfsense-data:/data \
-e SECRET_KEY=$(openssl rand -hex 32) \
-e EMBEDDING_MODEL=openai://text-embedding-ada-002 \
-e OPENAI_API_KEY=your_openai_api_key \
--name surfsense \
--restart unless-stopped \
ghcr.io/modsetter/surfsense:latest
```
**Using Docker Compose (Recommended for easier management):**
```bash
# Download the quick start compose file
curl -o docker-compose.yml https://raw.githubusercontent.com/MODSetter/SurfSense/main/docker-compose.quickstart.yml
# Create .env file with your secret key
echo "SECRET_KEY=$(openssl rand -hex 32)" > .env
# Start SurfSense
docker compose up -d
```
After starting, access SurfSense at:
- **Frontend**: [http://localhost:3000](http://localhost:3000)
- **Backend API**: [http://localhost:8000](http://localhost:8000)
- **API Docs**: [http://localhost:8000/docs](http://localhost:8000/docs)
### Installation Options
SurfSense provides three options to get started:
SurfSense provides multiple options to get started:
1. **[SurfSense Cloud](https://www.surfsense.com/login)** - The easiest way to try SurfSense without any setup.
- No installation required
- Instant access to all features
- Perfect for getting started quickly
2. **[Docker Installation (Recommended for Self-Hosting)](https://www.surfsense.net/docs/docker-installation)** - Easy way to get SurfSense up and running with all dependencies containerized.
2. **Quick Start Docker (Above)** - Single command to get SurfSense running locally.
- All-in-one image with PostgreSQL, Redis, and all services bundled
- Perfect for evaluation, development, and small deployments
- Data persisted via Docker volume
3. **[Docker Compose (Production)](https://www.surfsense.net/docs/docker-installation)** - Full stack deployment with separate services.
- Includes pgAdmin for database management through a web UI
- Supports environment variable customization via `.env` file
- Flexible deployment options (full stack or core services only)
- No need to manually edit configuration files between environments
- Better for production with separate scaling of services
3. **[Manual Installation](https://www.surfsense.net/docs/manual-installation)** - For users who prefer more control over their setup or need to customize their deployment.
4. **[Manual Installation](https://www.surfsense.net/docs/manual-installation)** - For users who prefer more control over their setup or need to customize their deployment.
Docker and manual installation guides include detailed OS-specific instructions for Windows, macOS, and Linux.
Before self-hosting installation, make sure to complete the [prerequisite setup steps](https://www.surfsense.net/docs/) including:
- Auth setup
- **File Processing ETL Service** (choose one):
- Auth setup (optional - defaults to LOCAL auth)
- **File Processing ETL Service** (optional - defaults to Docling):
- Docling (default, local processing, no API key required, supports PDF, Office docs, images, HTML, CSV)
- Unstructured.io API key (supports 34+ formats)
- LlamaIndex API key (enhanced parsing, supports 50+ formats)
- Docling (local processing, no API key required, supports PDF, Office docs, images, HTML, CSV)
- Other required API keys
- Other API keys as needed for your use case
## Screenshots

View file

@ -0,0 +1,82 @@
# SurfSense Quick Start Docker Compose
#
# This is a simplified docker-compose for quick local deployment using pre-built images.
# For production or customized deployments, use the main docker-compose.yml
#
# Usage:
# 1. Create a .env file with your required configuration (see below)
# 2. Run: docker compose -f docker-compose.quickstart.yml up -d
# 3. Access SurfSense at http://localhost:3000
#
# Required Environment Variables:
# - SECRET_KEY: JWT secret key (generate with: openssl rand -hex 32)
#
# Optional Environment Variables:
# - EMBEDDING_MODEL: Embedding model to use (default: sentence-transformers/all-MiniLM-L6-v2)
# - ETL_SERVICE: Document parsing service - DOCLING, UNSTRUCTURED, or LLAMACLOUD (default: DOCLING)
# - TTS_SERVICE: Text-to-speech service for podcasts (default: local/kokoro)
# - STT_SERVICE: Speech-to-text service (default: local/base)
# - FIRECRAWL_API_KEY: For web crawling features
version: "3.8"
services:
# All-in-one SurfSense container
surfsense:
image: ghcr.io/modsetter/surfsense:latest
container_name: surfsense
ports:
- "${FRONTEND_PORT:-3000}:3000"
- "${BACKEND_PORT:-8000}:8000"
volumes:
- surfsense-data:/data
environment:
# Required
- SECRET_KEY=${SECRET_KEY:-change-me-in-production}
# Auth Configuration
- AUTH_TYPE=${AUTH_TYPE:-LOCAL}
- GOOGLE_OAUTH_CLIENT_ID=${GOOGLE_OAUTH_CLIENT_ID:-}
- GOOGLE_OAUTH_CLIENT_SECRET=${GOOGLE_OAUTH_CLIENT_SECRET:-}
# AI/ML Configuration
- EMBEDDING_MODEL=${EMBEDDING_MODEL:-sentence-transformers/all-MiniLM-L6-v2}
- RERANKERS_ENABLED=${RERANKERS_ENABLED:-FALSE}
- RERANKERS_MODEL_NAME=${RERANKERS_MODEL_NAME:-}
- RERANKERS_MODEL_TYPE=${RERANKERS_MODEL_TYPE:-}
# Document Processing
- ETL_SERVICE=${ETL_SERVICE:-DOCLING}
- UNSTRUCTURED_API_KEY=${UNSTRUCTURED_API_KEY:-}
- LLAMA_CLOUD_API_KEY=${LLAMA_CLOUD_API_KEY:-}
# Audio Services
- TTS_SERVICE=${TTS_SERVICE:-local/kokoro}
- TTS_SERVICE_API_KEY=${TTS_SERVICE_API_KEY:-}
- STT_SERVICE=${STT_SERVICE:-local/base}
- STT_SERVICE_API_KEY=${STT_SERVICE_API_KEY:-}
# Web Crawling
- FIRECRAWL_API_KEY=${FIRECRAWL_API_KEY:-}
# Optional Features
- REGISTRATION_ENABLED=${REGISTRATION_ENABLED:-TRUE}
- SCHEDULE_CHECKER_INTERVAL=${SCHEDULE_CHECKER_INTERVAL:-1m}
# LangSmith Observability (optional)
- LANGSMITH_TRACING=${LANGSMITH_TRACING:-false}
- LANGSMITH_ENDPOINT=${LANGSMITH_ENDPOINT:-}
- LANGSMITH_API_KEY=${LANGSMITH_API_KEY:-}
- LANGSMITH_PROJECT=${LANGSMITH_PROJECT:-}
restart: unless-stopped
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:3000", "&&", "curl", "-f", "http://localhost:8000/docs"]
interval: 30s
timeout: 10s
retries: 3
start_period: 120s
volumes:
surfsense-data:
name: surfsense-data

View file

@ -0,0 +1,115 @@
#!/bin/bash
set -e
echo "==========================================="
echo " 🏄 SurfSense All-in-One Container"
echo "==========================================="
# Create log directory
mkdir -p /var/log/supervisor
# ================================================
# Initialize PostgreSQL if needed
# ================================================
if [ ! -f /data/postgres/PG_VERSION ]; then
echo "📦 Initializing PostgreSQL database..."
# Initialize PostgreSQL data directory
chown -R postgres:postgres /data/postgres
chmod 700 /data/postgres
su - postgres -c "/usr/lib/postgresql/14/bin/initdb -D /data/postgres"
# Configure PostgreSQL for connections
echo "host all all 0.0.0.0/0 md5" >> /data/postgres/pg_hba.conf
echo "local all all trust" >> /data/postgres/pg_hba.conf
echo "listen_addresses='*'" >> /data/postgres/postgresql.conf
# Start PostgreSQL temporarily to create database and user
su - postgres -c "/usr/lib/postgresql/14/bin/pg_ctl -D /data/postgres -l /tmp/postgres_init.log start"
# Wait for PostgreSQL to be ready
sleep 5
# Create user and database
su - postgres -c "psql -c \"CREATE USER ${POSTGRES_USER:-surfsense} WITH PASSWORD '${POSTGRES_PASSWORD:-surfsense}' SUPERUSER;\""
su - postgres -c "psql -c \"CREATE DATABASE ${POSTGRES_DB:-surfsense} OWNER ${POSTGRES_USER:-surfsense};\""
# Enable pgvector extension
su - postgres -c "psql -d ${POSTGRES_DB:-surfsense} -c 'CREATE EXTENSION IF NOT EXISTS vector;'"
# Stop temporary PostgreSQL
su - postgres -c "/usr/lib/postgresql/14/bin/pg_ctl -D /data/postgres stop"
echo "✅ PostgreSQL initialized successfully"
else
echo "✅ PostgreSQL data directory already exists"
fi
# ================================================
# Initialize Redis data directory
# ================================================
mkdir -p /data/redis
chmod 755 /data/redis
echo "✅ Redis data directory ready"
# ================================================
# Copy frontend build to runtime location
# ================================================
if [ -d /app/frontend/.next/standalone ]; then
cp -r /app/frontend/.next/standalone/* /app/frontend/ 2>/dev/null || true
cp -r /app/frontend/.next/static /app/frontend/.next/static 2>/dev/null || true
fi
# ================================================
# Run database migrations
# ================================================
run_migrations() {
echo "🔄 Running database migrations..."
# Start PostgreSQL temporarily for migrations
su - postgres -c "/usr/lib/postgresql/14/bin/pg_ctl -D /data/postgres -l /tmp/postgres_migrate.log start"
sleep 5
# Start Redis temporarily for migrations (some might need it)
redis-server --dir /data/redis --daemonize yes
sleep 2
# Run alembic migrations
cd /app/backend
alembic upgrade head || echo "⚠️ Migrations may have already been applied"
# Stop temporary services
redis-cli shutdown || true
su - postgres -c "/usr/lib/postgresql/14/bin/pg_ctl -D /data/postgres stop"
echo "✅ Database migrations complete"
}
# Run migrations on first start or when explicitly requested
if [ ! -f /data/.migrations_run ] || [ "${FORCE_MIGRATIONS:-false}" = "true" ]; then
run_migrations
touch /data/.migrations_run
fi
# ================================================
# Environment Variables Info
# ================================================
echo ""
echo "==========================================="
echo " 📋 Configuration"
echo "==========================================="
echo " Frontend URL: http://localhost:3000"
echo " Backend API: http://localhost:8000"
echo " API Docs: http://localhost:8000/docs"
echo " Auth Type: ${AUTH_TYPE:-LOCAL}"
echo " ETL Service: ${ETL_SERVICE:-DOCLING}"
echo "==========================================="
echo ""
# ================================================
# Start Supervisor (manages all services)
# ================================================
echo "🚀 Starting all services..."
exec /usr/bin/supervisord -c /etc/supervisor/conf.d/surfsense.conf

View file

@ -0,0 +1,54 @@
#!/bin/bash
# PostgreSQL initialization script for SurfSense
# This script is called during container startup if the database needs initialization
set -e
PGDATA=${PGDATA:-/data/postgres}
POSTGRES_USER=${POSTGRES_USER:-surfsense}
POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-surfsense}
POSTGRES_DB=${POSTGRES_DB:-surfsense}
echo "Initializing PostgreSQL..."
# Check if PostgreSQL is already initialized
if [ -f "$PGDATA/PG_VERSION" ]; then
echo "PostgreSQL data directory already exists. Skipping initialization."
exit 0
fi
# Initialize the database cluster
/usr/lib/postgresql/14/bin/initdb -D "$PGDATA" --username=postgres
# Configure PostgreSQL
cat >> "$PGDATA/postgresql.conf" << EOF
listen_addresses = '*'
max_connections = 100
shared_buffers = 128MB
EOF
cat >> "$PGDATA/pg_hba.conf" << EOF
# Allow connections from anywhere with password
host all all 0.0.0.0/0 md5
host all all ::0/0 md5
EOF
# Start PostgreSQL temporarily
/usr/lib/postgresql/14/bin/pg_ctl -D "$PGDATA" -l /tmp/postgres_init.log start
# Wait for PostgreSQL to start
sleep 3
# Create user and database
psql -U postgres << EOF
CREATE USER $POSTGRES_USER WITH PASSWORD '$POSTGRES_PASSWORD' SUPERUSER;
CREATE DATABASE $POSTGRES_DB OWNER $POSTGRES_USER;
\c $POSTGRES_DB
CREATE EXTENSION IF NOT EXISTS vector;
EOF
echo "PostgreSQL initialized successfully."
# Stop PostgreSQL (supervisor will start it)
/usr/lib/postgresql/14/bin/pg_ctl -D "$PGDATA" stop

View file

@ -0,0 +1,94 @@
[supervisord]
nodaemon=true
logfile=/var/log/supervisor/supervisord.log
pidfile=/var/run/supervisord.pid
childlogdir=/var/log/supervisor
user=root
[unix_http_server]
file=/var/run/supervisor.sock
chmod=0700
[rpcinterface:supervisor]
supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface
[supervisorctl]
serverurl=unix:///var/run/supervisor.sock
# PostgreSQL
[program:postgresql]
command=/usr/lib/postgresql/14/bin/postgres -D /data/postgres
user=postgres
autostart=true
autorestart=true
priority=10
stdout_logfile=/var/log/supervisor/postgresql.log
stderr_logfile=/var/log/supervisor/postgresql-error.log
environment=PGDATA="/data/postgres"
# Redis
[program:redis]
command=/usr/bin/redis-server --dir /data/redis --appendonly yes
autostart=true
autorestart=true
priority=20
stdout_logfile=/var/log/supervisor/redis.log
stderr_logfile=/var/log/supervisor/redis-error.log
# Backend API
[program:backend]
command=python main.py
directory=/app/backend
autostart=true
autorestart=true
priority=30
startsecs=10
startretries=3
stdout_logfile=/var/log/supervisor/backend.log
stderr_logfile=/var/log/supervisor/backend-error.log
environment=PYTHONPATH="/app/backend",UVICORN_LOOP="asyncio",UNSTRUCTURED_HAS_PATCHED_LOOP="1"
# Celery Worker
[program:celery-worker]
command=celery -A app.celery_app worker --loglevel=info --concurrency=2 --pool=solo
directory=/app/backend
autostart=true
autorestart=true
priority=40
startsecs=15
startretries=3
stdout_logfile=/var/log/supervisor/celery-worker.log
stderr_logfile=/var/log/supervisor/celery-worker-error.log
environment=PYTHONPATH="/app/backend"
# Celery Beat (scheduler)
[program:celery-beat]
command=celery -A app.celery_app beat --loglevel=info
directory=/app/backend
autostart=true
autorestart=true
priority=50
startsecs=20
startretries=3
stdout_logfile=/var/log/supervisor/celery-beat.log
stderr_logfile=/var/log/supervisor/celery-beat-error.log
environment=PYTHONPATH="/app/backend"
# Frontend
[program:frontend]
command=node server.js
directory=/app/frontend
autostart=true
autorestart=true
priority=60
startsecs=5
startretries=3
stdout_logfile=/var/log/supervisor/frontend.log
stderr_logfile=/var/log/supervisor/frontend-error.log
environment=NODE_ENV="production",PORT="3000",HOSTNAME="0.0.0.0"
# Process Groups
[group:surfsense]
programs=postgresql,redis,backend,celery-worker,celery-beat,frontend
priority=999

View file

@ -8,7 +8,135 @@ full: true
# Docker Installation
This guide explains how to run SurfSense using Docker Compose, which is the preferred and recommended method for deployment.
This guide explains how to run SurfSense using Docker, with options ranging from quick single-command deployment to full production setups.
## Quick Start with Docker 🐳
Get SurfSense running in seconds with a single command:
<Callout type="info">
The all-in-one Docker image bundles PostgreSQL (with pgvector), Redis, and all SurfSense services. Perfect for quick evaluation and development.
</Callout>
<Callout type="warn">
Make sure to include the `-v surfsense-data:/data` in your Docker command. This ensures your database and files are properly persisted.
</Callout>
### One-Line Installation
**Linux/macOS:**
```bash
docker run -d -p 3000:3000 -p 8000:8000 \
-v surfsense-data:/data \
-e SECRET_KEY=$(openssl rand -hex 32) \
--name surfsense \
--restart unless-stopped \
ghcr.io/modsetter/surfsense:latest
```
**Windows (PowerShell):**
```powershell
$secretKey = -join ((48..57) + (65..90) + (97..122) | Get-Random -Count 32 | ForEach-Object {[char]$_})
docker run -d -p 3000:3000 -p 8000:8000 `
-v surfsense-data:/data `
-e SECRET_KEY=$secretKey `
--name surfsense `
--restart unless-stopped `
ghcr.io/modsetter/surfsense:latest
```
### With Custom Configuration
**Using OpenAI Embeddings:**
```bash
docker run -d -p 3000:3000 -p 8000:8000 \
-v surfsense-data:/data \
-e SECRET_KEY=$(openssl rand -hex 32) \
-e EMBEDDING_MODEL=openai://text-embedding-ada-002 \
-e OPENAI_API_KEY=your_openai_api_key \
--name surfsense \
--restart unless-stopped \
ghcr.io/modsetter/surfsense:latest
```
**With Google OAuth:**
```bash
docker run -d -p 3000:3000 -p 8000:8000 \
-v surfsense-data:/data \
-e SECRET_KEY=$(openssl rand -hex 32) \
-e AUTH_TYPE=GOOGLE \
-e GOOGLE_OAUTH_CLIENT_ID=your_client_id \
-e GOOGLE_OAUTH_CLIENT_SECRET=your_client_secret \
--name surfsense \
--restart unless-stopped \
ghcr.io/modsetter/surfsense:latest
```
### Quick Start with Docker Compose
For easier management with environment files:
```bash
# Download the quick start compose file
curl -o docker-compose.yml https://raw.githubusercontent.com/MODSetter/SurfSense/main/docker-compose.quickstart.yml
# Create .env file
cat > .env << EOF
SECRET_KEY=$(openssl rand -hex 32)
# Add other configuration as needed
# EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2
# ETL_SERVICE=DOCLING
EOF
# Start SurfSense
docker compose up -d
```
After starting, access SurfSense at:
- **Frontend**: [http://localhost:3000](http://localhost:3000)
- **Backend API**: [http://localhost:8000](http://localhost:8000)
- **API Docs**: [http://localhost:8000/docs](http://localhost:8000/docs)
### Quick Start Environment Variables
| Variable | Description | Default |
|----------|-------------|---------|
| SECRET_KEY | JWT secret key (required) | - |
| AUTH_TYPE | Authentication: `LOCAL` or `GOOGLE` | LOCAL |
| EMBEDDING_MODEL | Model for embeddings | sentence-transformers/all-MiniLM-L6-v2 |
| ETL_SERVICE | Document parser: `DOCLING`, `UNSTRUCTURED`, `LLAMACLOUD` | DOCLING |
| TTS_SERVICE | Text-to-speech for podcasts | local/kokoro |
| STT_SERVICE | Speech-to-text for audio | local/base |
| REGISTRATION_ENABLED | Allow new user registration | TRUE |
### Useful Commands
```bash
# View logs
docker logs -f surfsense
# Stop SurfSense
docker stop surfsense
# Start SurfSense
docker start surfsense
# Remove container (data preserved in volume)
docker rm surfsense
# Remove container AND data
docker rm surfsense && docker volume rm surfsense-data
```
---
## Full Docker Compose Setup (Production)
For production deployments with separate services and more control, use the full Docker Compose setup below.
## Prerequisites