feat: add Docker configuration files and installation script for SurfSense

This commit is contained in:
Anish Sarkar 2026-02-24 22:45:37 +05:30
parent 0fb6caa01f
commit ce1f8c872f
11 changed files with 762 additions and 98 deletions

194
docker/.env.example Normal file
View file

@ -0,0 +1,194 @@
# ==============================================================================
# SurfSense Docker Configuration
# ==============================================================================
# Only variables YOU need to set are in this file.
# Database, Redis, and internal service wiring are handled automatically.
# ==============================================================================
# SurfSense version (pin to a specific version like "0.0.13.1" or use "latest")
SURFSENSE_VERSION=latest
# ------------------------------------------------------------------------------
# Core Settings
# ------------------------------------------------------------------------------
# REQUIRED: Generate a secret key with: openssl rand -base64 32
SECRET_KEY=replace_me_with_a_random_string
# Auth type: LOCAL (email/password) or GOOGLE (OAuth)
AUTH_TYPE=LOCAL
# Allow new user registrations (TRUE or FALSE)
# REGISTRATION_ENABLED=TRUE
# Document parsing service: DOCLING, UNSTRUCTURED, or LLAMACLOUD
ETL_SERVICE=DOCLING
# Embedding model for vector search
# Local: sentence-transformers/all-MiniLM-L6-v2
# OpenAI: openai://text-embedding-ada-002 (set OPENAI_API_KEY below)
# Cohere: cohere://embed-english-light-v3.0 (set COHERE_API_KEY below)
EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2
# ------------------------------------------------------------------------------
# Ports (change these to avoid host conflicts — everything auto-derives)
# ------------------------------------------------------------------------------
# BACKEND_PORT=8000
# FRONTEND_PORT=3000
# ELECTRIC_PORT=5133
# Frontend URL used by backend for CORS and OAuth redirects.
# Auto-derived from FRONTEND_PORT for localhost. Set explicitly for reverse proxy.
# NEXT_FRONTEND_URL=http://localhost:3000
# Backend URL for OAuth callback redirects (set when behind a reverse proxy)
# BACKEND_URL=https://api.yourdomain.com
# ------------------------------------------------------------------------------
# Frontend URL Overrides (reverse proxy / custom domains)
# ------------------------------------------------------------------------------
# These are auto-derived from the port settings above for localhost deployments.
# You only need to set these explicitly when using a reverse proxy with real
# domains (e.g. Caddy, Nginx, Cloudflare Tunnel).
#
# NEXT_PUBLIC_FASTAPI_BACKEND_URL=https://api.yourdomain.com
# NEXT_PUBLIC_ELECTRIC_URL=https://electric.yourdomain.com
# ------------------------------------------------------------------------------
# Database (defaults work out of the box, change for security)
# ------------------------------------------------------------------------------
# DB_PASSWORD=surfsense
# ------------------------------------------------------------------------------
# TTS & STT (Text-to-Speech / Speech-to-Text)
# ------------------------------------------------------------------------------
# Local Kokoro TTS (default) or LiteLLM provider
TTS_SERVICE=local/kokoro
# TTS_SERVICE_API_KEY=
# TTS_SERVICE_API_BASE=
# Local Faster-Whisper STT: local/MODEL_SIZE (tiny, base, small, medium, large-v3)
STT_SERVICE=local/base
# Or use LiteLLM: openai/whisper-1
# STT_SERVICE_API_KEY=
# STT_SERVICE_API_BASE=
# ------------------------------------------------------------------------------
# Rerankers (optional, disabled by default)
# ------------------------------------------------------------------------------
# RERANKERS_ENABLED=TRUE
# RERANKERS_MODEL_NAME=ms-marco-MiniLM-L-12-v2
# RERANKERS_MODEL_TYPE=flashrank
# ------------------------------------------------------------------------------
# Google OAuth (only if AUTH_TYPE=GOOGLE)
# ------------------------------------------------------------------------------
# GOOGLE_OAUTH_CLIENT_ID=
# GOOGLE_OAUTH_CLIENT_SECRET=
# ------------------------------------------------------------------------------
# Connector OAuth Keys (uncomment connectors you want to use)
# ------------------------------------------------------------------------------
# -- Google Connectors --
# GOOGLE_CALENDAR_REDIRECT_URI=http://localhost:8000/api/v1/auth/google/calendar/connector/callback
# GOOGLE_GMAIL_REDIRECT_URI=http://localhost:8000/api/v1/auth/google/gmail/connector/callback
# GOOGLE_DRIVE_REDIRECT_URI=http://localhost:8000/api/v1/auth/google/drive/connector/callback
# -- Notion --
# NOTION_CLIENT_ID=
# NOTION_CLIENT_SECRET=
# NOTION_REDIRECT_URI=http://localhost:8000/api/v1/auth/notion/connector/callback
# -- Slack --
# SLACK_CLIENT_ID=
# SLACK_CLIENT_SECRET=
# SLACK_REDIRECT_URI=http://localhost:8000/api/v1/auth/slack/connector/callback
# -- Discord --
# DISCORD_CLIENT_ID=
# DISCORD_CLIENT_SECRET=
# DISCORD_REDIRECT_URI=http://localhost:8000/api/v1/auth/discord/connector/callback
# DISCORD_BOT_TOKEN=
# -- Atlassian (Jira & Confluence) --
# ATLASSIAN_CLIENT_ID=
# ATLASSIAN_CLIENT_SECRET=
# JIRA_REDIRECT_URI=http://localhost:8000/api/v1/auth/jira/connector/callback
# CONFLUENCE_REDIRECT_URI=http://localhost:8000/api/v1/auth/confluence/connector/callback
# -- Linear --
# LINEAR_CLIENT_ID=
# LINEAR_CLIENT_SECRET=
# LINEAR_REDIRECT_URI=http://localhost:8000/api/v1/auth/linear/connector/callback
# -- ClickUp --
# CLICKUP_CLIENT_ID=
# CLICKUP_CLIENT_SECRET=
# CLICKUP_REDIRECT_URI=http://localhost:8000/api/v1/auth/clickup/connector/callback
# -- Airtable --
# AIRTABLE_CLIENT_ID=
# AIRTABLE_CLIENT_SECRET=
# AIRTABLE_REDIRECT_URI=http://localhost:8000/api/v1/auth/airtable/connector/callback
# -- Microsoft Teams --
# TEAMS_CLIENT_ID=
# TEAMS_CLIENT_SECRET=
# TEAMS_REDIRECT_URI=http://localhost:8000/api/v1/auth/teams/connector/callback
# -- Composio --
# COMPOSIO_API_KEY=
# COMPOSIO_ENABLED=TRUE
# COMPOSIO_REDIRECT_URI=http://localhost:8000/api/v1/auth/composio/connector/callback
# ------------------------------------------------------------------------------
# External API Keys (optional)
# ------------------------------------------------------------------------------
# Firecrawl (web scraping)
# FIRECRAWL_API_KEY=
# Unstructured (if ETL_SERVICE=UNSTRUCTURED)
# UNSTRUCTURED_API_KEY=
# LlamaCloud (if ETL_SERVICE=LLAMACLOUD)
# LLAMA_CLOUD_API_KEY=
# ------------------------------------------------------------------------------
# Observability (optional)
# ------------------------------------------------------------------------------
# LANGSMITH_TRACING=true
# LANGSMITH_ENDPOINT=https://api.smith.langchain.com
# LANGSMITH_API_KEY=
# LANGSMITH_PROJECT=surfsense
# ------------------------------------------------------------------------------
# Advanced (optional)
# ------------------------------------------------------------------------------
# Periodic connector sync interval (default: 5m)
# SCHEDULE_CHECKER_INTERVAL=5m
# JWT token lifetimes
# ACCESS_TOKEN_LIFETIME_SECONDS=86400
# REFRESH_TOKEN_LIFETIME_SECONDS=1209600
# Pages limit per user for ETL (default: unlimited)
# PAGES_LIMIT=500
# Connector indexing lock TTL in seconds (default: 28800 = 8 hours)
# CONNECTOR_INDEXING_LOCK_TTL_SECONDS=28800
# Residential proxy for web crawling
# RESIDENTIAL_PROXY_USERNAME=
# RESIDENTIAL_PROXY_PASSWORD=
# RESIDENTIAL_PROXY_HOSTNAME=
# RESIDENTIAL_PROXY_LOCATION=
# RESIDENTIAL_PROXY_TYPE=1

View file

@ -0,0 +1,129 @@
# =============================================================================
# SurfSense — Development Docker Compose
# =============================================================================
# Usage (from repo root):
# docker compose -f docker/docker-compose.dev.yml up --build
#
# This file builds from source and includes dev tools like pgAdmin.
# For production with prebuilt images, use docker/docker-compose.yml instead.
# =============================================================================
name: surfsense
services:
db:
image: pgvector/pgvector:pg17
ports:
- "${POSTGRES_PORT:-5432}:5432"
volumes:
- postgres_data:/var/lib/postgresql/data
- ./postgresql.conf:/etc/postgresql/postgresql.conf:ro
- ./scripts/init-electric-user.sh:/docker-entrypoint-initdb.d/init-electric-user.sh:ro
environment:
- POSTGRES_USER=${POSTGRES_USER:-postgres}
- POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-postgres}
- POSTGRES_DB=${POSTGRES_DB:-surfsense}
- ELECTRIC_DB_USER=${ELECTRIC_DB_USER:-electric}
- ELECTRIC_DB_PASSWORD=${ELECTRIC_DB_PASSWORD:-electric_password}
command: postgres -c config_file=/etc/postgresql/postgresql.conf
healthcheck:
test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-postgres} -d ${POSTGRES_DB:-surfsense}"]
interval: 10s
timeout: 5s
retries: 5
pgadmin:
image: dpage/pgadmin4
ports:
- "${PGADMIN_PORT:-5050}:80"
environment:
- PGADMIN_DEFAULT_EMAIL=${PGADMIN_DEFAULT_EMAIL:-admin@surfsense.com}
- PGADMIN_DEFAULT_PASSWORD=${PGADMIN_DEFAULT_PASSWORD:-surfsense}
volumes:
- pgadmin_data:/var/lib/pgadmin
depends_on:
- db
redis:
image: redis:7-alpine
ports:
- "${REDIS_PORT:-6379}:6379"
volumes:
- redis_data:/data
command: redis-server --appendonly yes
healthcheck:
test: ["CMD", "redis-cli", "ping"]
interval: 10s
timeout: 5s
retries: 5
backend:
build: ../surfsense_backend
ports:
- "${BACKEND_PORT:-8000}:8000"
volumes:
- ../surfsense_backend/app:/app/app
- shared_temp:/shared_tmp
env_file:
- ../surfsense_backend/.env
environment:
- DATABASE_URL=postgresql+asyncpg://${POSTGRES_USER:-postgres}:${POSTGRES_PASSWORD:-postgres}@db:5432/${POSTGRES_DB:-surfsense}
- CELERY_BROKER_URL=redis://redis:6379/0
- CELERY_RESULT_BACKEND=redis://redis:6379/0
- REDIS_APP_URL=redis://redis:6379/0
- CELERY_TASK_DEFAULT_QUEUE=surfsense
- PYTHONPATH=/app
- UVICORN_LOOP=asyncio
- UNSTRUCTURED_HAS_PATCHED_LOOP=1
- LANGCHAIN_TRACING_V2=false
- LANGSMITH_TRACING=false
- ELECTRIC_DB_USER=${ELECTRIC_DB_USER:-electric}
- ELECTRIC_DB_PASSWORD=${ELECTRIC_DB_PASSWORD:-electric_password}
- AUTH_TYPE=${AUTH_TYPE:-LOCAL}
- NEXT_FRONTEND_URL=${NEXT_FRONTEND_URL:-http://localhost:3000}
depends_on:
db:
condition: service_healthy
redis:
condition: service_healthy
electric:
image: electricsql/electric:latest
ports:
- "${ELECTRIC_PORT:-5133}:3000"
# depends_on:
# - db
environment:
- DATABASE_URL=${ELECTRIC_DATABASE_URL:-postgresql://${ELECTRIC_DB_USER:-electric}:${ELECTRIC_DB_PASSWORD:-electric_password}@${POSTGRES_HOST:-db}:5432/${POSTGRES_DB:-surfsense}?sslmode=disable}
- ELECTRIC_INSECURE=true
- ELECTRIC_WRITE_TO_PG_MODE=direct
restart: unless-stopped
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:3000/v1/health"]
interval: 10s
timeout: 5s
retries: 5
frontend:
build:
context: ../surfsense_web
args:
NEXT_PUBLIC_FASTAPI_BACKEND_URL: ${NEXT_PUBLIC_FASTAPI_BACKEND_URL:-http://localhost:8000}
NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE: ${NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE:-LOCAL}
NEXT_PUBLIC_ETL_SERVICE: ${NEXT_PUBLIC_ETL_SERVICE:-DOCLING}
NEXT_PUBLIC_ELECTRIC_URL: ${NEXT_PUBLIC_ELECTRIC_URL:-http://localhost:5133}
NEXT_PUBLIC_ELECTRIC_AUTH_MODE: ${NEXT_PUBLIC_ELECTRIC_AUTH_MODE:-insecure}
NEXT_PUBLIC_DEPLOYMENT_MODE: ${NEXT_PUBLIC_DEPLOYMENT_MODE:-self-hosted}
ports:
- "${FRONTEND_PORT:-3000}:3000"
env_file:
- ../surfsense_web/.env
depends_on:
- backend
- electric
volumes:
postgres_data:
pgadmin_data:
redis_data:
shared_temp:

110
docker/docker-compose.yml Normal file
View file

@ -0,0 +1,110 @@
# =============================================================================
# SurfSense — Production Docker Compose
# Docs: https://docs.surfsense.com/docs/docker-installation
# =============================================================================
# Usage:
# 1. Copy .env.example to .env and edit the required values
# 2. docker compose up -d
# =============================================================================
name: surfsense
services:
db:
image: pgvector/pgvector:pg17
volumes:
- postgres_data:/var/lib/postgresql/data
- ./postgresql.conf:/etc/postgresql/postgresql.conf:ro
- ./scripts/init-electric-user.sh:/docker-entrypoint-initdb.d/init-electric-user.sh:ro
environment:
POSTGRES_USER: surfsense
POSTGRES_PASSWORD: ${DB_PASSWORD:-surfsense}
POSTGRES_DB: surfsense
ELECTRIC_DB_USER: electric
ELECTRIC_DB_PASSWORD: electric_password
command: postgres -c config_file=/etc/postgresql/postgresql.conf
restart: unless-stopped
healthcheck:
test: ["CMD-SHELL", "pg_isready -U surfsense -d surfsense"]
interval: 10s
timeout: 5s
retries: 5
redis:
image: redis:7-alpine
volumes:
- redis_data:/data
command: redis-server --appendonly yes
restart: unless-stopped
healthcheck:
test: ["CMD", "redis-cli", "ping"]
interval: 10s
timeout: 5s
retries: 5
backend:
image: ghcr.io/modsetter/surfsense_backend:${SURFSENSE_VERSION:-latest}
ports:
- "${BACKEND_PORT:-8000}:8000"
volumes:
- shared_temp:/shared_tmp
env_file:
- .env
environment:
DATABASE_URL: postgresql+asyncpg://surfsense:${DB_PASSWORD:-surfsense}@db:5432/surfsense
CELERY_BROKER_URL: redis://redis:6379/0
CELERY_RESULT_BACKEND: redis://redis:6379/0
REDIS_APP_URL: redis://redis:6379/0
CELERY_TASK_DEFAULT_QUEUE: surfsense
PYTHONPATH: /app
UVICORN_LOOP: asyncio
UNSTRUCTURED_HAS_PATCHED_LOOP: "1"
ELECTRIC_DB_USER: electric
ELECTRIC_DB_PASSWORD: electric_password
NEXT_FRONTEND_URL: ${NEXT_FRONTEND_URL:-http://localhost:${FRONTEND_PORT:-3000}}
SERVICE_ROLE: all
depends_on:
db:
condition: service_healthy
redis:
condition: service_healthy
restart: unless-stopped
electric:
image: electricsql/electric:latest
ports:
- "${ELECTRIC_PORT:-5133}:3000"
environment:
DATABASE_URL: postgresql://electric:electric_password@db:5432/surfsense?sslmode=disable
ELECTRIC_INSECURE: "true"
ELECTRIC_WRITE_TO_PG_MODE: direct
restart: unless-stopped
depends_on:
db:
condition: service_healthy
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:3000/v1/health"]
interval: 10s
timeout: 5s
retries: 5
frontend:
image: ghcr.io/modsetter/surfsense_web:${SURFSENSE_VERSION:-latest}
ports:
- "${FRONTEND_PORT:-3000}:3000"
environment:
NEXT_PUBLIC_FASTAPI_BACKEND_URL: ${NEXT_PUBLIC_FASTAPI_BACKEND_URL:-http://localhost:${BACKEND_PORT:-8000}}
NEXT_PUBLIC_ELECTRIC_URL: ${NEXT_PUBLIC_ELECTRIC_URL:-http://localhost:${ELECTRIC_PORT:-5133}}
NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE: ${AUTH_TYPE:-LOCAL}
NEXT_PUBLIC_ETL_SERVICE: ${ETL_SERVICE:-DOCLING}
NEXT_PUBLIC_DEPLOYMENT_MODE: ${DEPLOYMENT_MODE:-self-hosted}
NEXT_PUBLIC_ELECTRIC_AUTH_MODE: ${NEXT_PUBLIC_ELECTRIC_AUTH_MODE:-insecure}
depends_on:
- backend
- electric
restart: unless-stopped
volumes:
postgres_data:
redis_data:
shared_temp:

20
docker/postgresql.conf Normal file
View file

@ -0,0 +1,20 @@
# PostgreSQL configuration for Electric SQL
# This file is mounted into the PostgreSQL container
listen_addresses = '*'
max_connections = 200
shared_buffers = 256MB
# Enable logical replication (required for Electric SQL)
wal_level = logical
max_replication_slots = 10
max_wal_senders = 10
# Performance settings
checkpoint_timeout = 10min
max_wal_size = 1GB
min_wal_size = 80MB
# Logging (optional, for debugging)
# log_statement = 'all'
# log_replication_commands = on

View file

@ -0,0 +1,38 @@
#!/bin/sh
# Creates the Electric SQL replication user on first DB initialization.
# Idempotent — safe to run alongside Alembic migration 66.
set -e
ELECTRIC_DB_USER="${ELECTRIC_DB_USER:-electric}"
ELECTRIC_DB_PASSWORD="${ELECTRIC_DB_PASSWORD:-electric_password}"
echo "Creating Electric SQL replication user: $ELECTRIC_DB_USER"
psql -v ON_ERROR_STOP=1 --username "$POSTGRES_USER" --dbname "$POSTGRES_DB" <<-EOSQL
DO \$\$
BEGIN
IF NOT EXISTS (SELECT FROM pg_user WHERE usename = '$ELECTRIC_DB_USER') THEN
CREATE USER $ELECTRIC_DB_USER WITH REPLICATION PASSWORD '$ELECTRIC_DB_PASSWORD';
END IF;
END
\$\$;
GRANT CONNECT ON DATABASE $POSTGRES_DB TO $ELECTRIC_DB_USER;
GRANT CREATE ON DATABASE $POSTGRES_DB TO $ELECTRIC_DB_USER;
GRANT USAGE ON SCHEMA public TO $ELECTRIC_DB_USER;
GRANT SELECT ON ALL TABLES IN SCHEMA public TO $ELECTRIC_DB_USER;
GRANT SELECT ON ALL SEQUENCES IN SCHEMA public TO $ELECTRIC_DB_USER;
ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT SELECT ON TABLES TO $ELECTRIC_DB_USER;
ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT SELECT ON SEQUENCES TO $ELECTRIC_DB_USER;
DO \$\$
BEGIN
IF NOT EXISTS (SELECT FROM pg_publication WHERE pubname = 'electric_publication_default') THEN
CREATE PUBLICATION electric_publication_default;
END IF;
END
\$\$;
EOSQL
echo "Electric SQL user '$ELECTRIC_DB_USER' and publication created successfully"

92
docker/scripts/install.sh Normal file
View file

@ -0,0 +1,92 @@
#!/usr/bin/env bash
# =============================================================================
# SurfSense — One-line Install Script
# Usage: curl -fsSL https://raw.githubusercontent.com/MODSetter/SurfSense/main/docker/scripts/install.sh | bash
# =============================================================================
set -euo pipefail
REPO_RAW="https://raw.githubusercontent.com/MODSetter/SurfSense/main"
INSTALL_DIR="./surfsense"
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
RED='\033[0;31m'
NC='\033[0m'
info() { printf "${GREEN}[SurfSense]${NC} %s\n" "$1"; }
warn() { printf "${YELLOW}[SurfSense]${NC} %s\n" "$1"; }
error() { printf "${RED}[SurfSense]${NC} %s\n" "$1" >&2; exit 1; }
# ── Pre-flight checks ───────────────────────────────────────────────────────
command -v docker >/dev/null 2>&1 || error "Docker is not installed. Please install Docker first: https://docs.docker.com/get-docker/"
if docker compose version >/dev/null 2>&1; then
DC="docker compose"
elif command -v docker-compose >/dev/null 2>&1; then
DC="docker-compose"
else
error "Docker Compose is not installed. Please install Docker Compose: https://docs.docker.com/compose/install/"
fi
# ── Download files ───────────────────────────────────────────────────────────
info "Creating installation directory: ${INSTALL_DIR}"
mkdir -p "${INSTALL_DIR}"
FILES=(
"docker/docker-compose.yml:docker-compose.yml"
"docker/.env.example:.env.example"
"docker/postgresql.conf:postgresql.conf"
"docker/scripts/init-electric-user.sh:init-electric-user.sh"
)
for entry in "${FILES[@]}"; do
src="${entry%%:*}"
dest="${entry##*:}"
info "Downloading ${dest}..."
curl -fsSL "${REPO_RAW}/${src}" -o "${INSTALL_DIR}/${dest}" || error "Failed to download ${src}"
done
chmod +x "${INSTALL_DIR}/init-electric-user.sh"
# ── Set up .env ──────────────────────────────────────────────────────────────
if [ ! -f "${INSTALL_DIR}/.env" ]; then
cp "${INSTALL_DIR}/.env.example" "${INSTALL_DIR}/.env"
SECRET_KEY=$(openssl rand -base64 32 2>/dev/null || head -c 32 /dev/urandom | base64)
if [[ "$OSTYPE" == "darwin"* ]]; then
sed -i '' "s|SECRET_KEY=replace_me_with_a_random_string|SECRET_KEY=${SECRET_KEY}|" "${INSTALL_DIR}/.env"
else
sed -i "s|SECRET_KEY=replace_me_with_a_random_string|SECRET_KEY=${SECRET_KEY}|" "${INSTALL_DIR}/.env"
fi
info "Generated random SECRET_KEY in .env"
else
warn ".env already exists — skipping (your existing config is preserved)"
fi
# ── Start containers ─────────────────────────────────────────────────────────
info "Starting SurfSense..."
cd "${INSTALL_DIR}"
${DC} up -d
echo ""
info "=========================================="
info " SurfSense is starting up!"
info "=========================================="
info ""
info " Frontend: http://localhost:3000"
info " Backend: http://localhost:8000"
info " API Docs: http://localhost:8000/docs"
info ""
info " Config: ${INSTALL_DIR}/.env"
info " Logs: cd ${INSTALL_DIR} && ${DC} logs -f"
info " Stop: cd ${INSTALL_DIR} && ${DC} down"
info " Update: cd ${INSTALL_DIR} && ${DC} pull && ${DC} up -d"
info ""
warn " First startup may take a few minutes while images are pulled."
warn " Edit .env to configure OAuth connectors, API keys, etc."
info "=========================================="