chore: remove legacy Docker configuration files and initialization scripts for SurfSense

This commit is contained in:
Anish Sarkar 2026-02-24 23:01:38 +05:30
parent ce1f8c872f
commit 68be0d8675
8 changed files with 0 additions and 1047 deletions

View file

@ -1,285 +0,0 @@
# SurfSense All-in-One Docker Image
# This image bundles PostgreSQL+pgvector, Redis, Electric SQL, Backend, and Frontend
# Usage: docker run -d -p 3000:3000 -p 8000:8000 -p 5133:5133 -v surfsense-data:/data --name surfsense ghcr.io/modsetter/surfsense:latest
#
# Included Services (all run locally by default):
# - PostgreSQL 14 + pgvector (vector database)
# - Redis (task queue)
# - Electric SQL (real-time sync)
# - Docling (document processing, CPU-only, OCR disabled)
# - Kokoro TTS (local text-to-speech for podcasts)
# - Faster-Whisper (local speech-to-text for audio files)
# - Playwright Chromium (web scraping)
#
# Note: This is the CPU-only version. A :cuda tagged image with GPU support
# will be available in the future for faster AI inference.
# ====================
# Stage 1: Get Electric SQL Binary
# ====================
FROM electricsql/electric:latest AS electric-builder
# ====================
# Stage 2: Build Frontend
# ====================
FROM node:20-alpine AS frontend-builder
WORKDIR /app
# Install pnpm
RUN corepack enable pnpm
# Copy package files
COPY surfsense_web/package.json surfsense_web/pnpm-lock.yaml* ./
COPY surfsense_web/source.config.ts ./
COPY surfsense_web/content ./content
# Install dependencies (skip postinstall which requires all source files)
RUN pnpm install --frozen-lockfile --ignore-scripts
# Copy source
COPY surfsense_web/ ./
# Run fumadocs-mdx postinstall now that source files are available
RUN pnpm fumadocs-mdx
# Build with placeholder values that will be replaced at runtime
# These unique strings allow runtime substitution via entrypoint script
ENV NEXT_PUBLIC_FASTAPI_BACKEND_URL=__NEXT_PUBLIC_FASTAPI_BACKEND_URL__
ENV NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE=__NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE__
ENV NEXT_PUBLIC_ETL_SERVICE=__NEXT_PUBLIC_ETL_SERVICE__
ENV NEXT_PUBLIC_ELECTRIC_URL=__NEXT_PUBLIC_ELECTRIC_URL__
ENV NEXT_PUBLIC_ELECTRIC_AUTH_MODE=__NEXT_PUBLIC_ELECTRIC_AUTH_MODE__
ENV NEXT_PUBLIC_DEPLOYMENT_MODE=__NEXT_PUBLIC_DEPLOYMENT_MODE__
# Build
RUN pnpm run build
# ====================
# Stage 3: Runtime Image
# ====================
FROM ubuntu:22.04 AS runtime
# Prevent interactive prompts
ENV DEBIAN_FRONTEND=noninteractive
# Install system dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
# PostgreSQL
postgresql-14 \
postgresql-contrib-14 \
# Build tools for pgvector
build-essential \
postgresql-server-dev-14 \
git \
# Redis
redis-server \
# Node.js prerequisites
curl \
ca-certificates \
gnupg \
# Backend dependencies
gcc \
wget \
unzip \
dos2unix \
# For PPAs
software-properties-common \
# ============================
# Local TTS (Kokoro) dependencies
# ============================
espeak-ng \
libespeak-ng1 \
# ============================
# Local STT (Faster-Whisper) dependencies
# ============================
ffmpeg \
# ============================
# Audio processing (soundfile)
# ============================
libsndfile1 \
# ============================
# Image/OpenCV dependencies (for Docling)
# ============================
libgl1 \
libglib2.0-0 \
libsm6 \
libxext6 \
libxrender1 \
# ============================
# Playwright browser dependencies
# ============================
libnspr4 \
libnss3 \
libatk1.0-0 \
libatk-bridge2.0-0 \
libcups2 \
libxkbcommon0 \
libatspi2.0-0 \
libxcomposite1 \
libxdamage1 \
libxrandr2 \
libgbm1 \
libcairo2 \
libpango-1.0-0 \
&& rm -rf /var/lib/apt/lists/*
# Install Pandoc 3.x from GitHub (apt ships 2.9 which has broken table rendering).
RUN ARCH=$(dpkg --print-architecture) && \
wget -qO /tmp/pandoc.deb "https://github.com/jgm/pandoc/releases/download/3.9/pandoc-3.9-1-${ARCH}.deb" && \
dpkg -i /tmp/pandoc.deb && \
rm /tmp/pandoc.deb
# Install Node.js 20.x (for running frontend)
RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash - \
&& apt-get install -y nodejs \
&& rm -rf /var/lib/apt/lists/*
# Install Python 3.12 from deadsnakes PPA
RUN add-apt-repository ppa:deadsnakes/ppa -y \
&& apt-get update \
&& apt-get install -y --no-install-recommends \
python3.12 \
python3.12-venv \
python3.12-dev \
&& rm -rf /var/lib/apt/lists/*
# Set Python 3.12 as default
RUN update-alternatives --install /usr/bin/python python /usr/bin/python3.12 1 \
&& update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.12 1
# Install pip for Python 3.12
RUN python3.12 -m ensurepip --upgrade \
&& python3.12 -m pip install --upgrade pip
# Install supervisor via pip (system package incompatible with Python 3.12)
RUN pip install --no-cache-dir supervisor
# Build and install pgvector
RUN cd /tmp \
&& git clone --branch v0.7.4 https://github.com/pgvector/pgvector.git \
&& cd pgvector \
&& make \
&& make install \
&& rm -rf /tmp/pgvector
# Update certificates
RUN update-ca-certificates
# Create data directories
RUN mkdir -p /data/postgres /data/redis /data/surfsense \
&& chown -R postgres:postgres /data/postgres
# ====================
# Copy Frontend Build
# ====================
WORKDIR /app/frontend
# Copy only the standalone build (not node_modules)
COPY --from=frontend-builder /app/.next/standalone ./
COPY --from=frontend-builder /app/.next/static ./.next/static
COPY --from=frontend-builder /app/public ./public
COPY surfsense_web/content/docs /app/surfsense_web/content/docs
# ====================
# Copy Electric SQL Release
# ====================
COPY --from=electric-builder /app /app/electric-release
# ====================
# Setup Backend
# ====================
WORKDIR /app/backend
# Copy backend dependency files
COPY surfsense_backend/pyproject.toml surfsense_backend/uv.lock ./
# Install PyTorch CPU-only (Docling needs it but OCR is disabled, no GPU needed)
RUN pip install --no-cache-dir torch torchvision --index-url https://download.pytorch.org/whl/cpu
# Install python dependencies
RUN pip install --no-cache-dir certifi pip-system-certs uv \
&& uv pip install --system --no-cache-dir -e .
# Set SSL environment variables
RUN CERTIFI_PATH=$(python -c "import certifi; print(certifi.where())") \
&& echo "export SSL_CERT_FILE=$CERTIFI_PATH" >> /etc/profile.d/ssl.sh \
&& echo "export REQUESTS_CA_BUNDLE=$CERTIFI_PATH" >> /etc/profile.d/ssl.sh
# Note: EasyOCR models NOT downloaded - OCR is disabled in docling_service.py
# GPU support will be added in a future :cuda tagged image
# Install Playwright browsers
RUN pip install --no-cache-dir playwright \
&& playwright install chromium \
&& rm -rf /root/.cache/ms-playwright/ffmpeg*
# Copy backend source
COPY surfsense_backend/ ./
# ====================
# Configuration
# ====================
WORKDIR /app
# Copy supervisor configuration
COPY scripts/docker/supervisor-allinone.conf /etc/supervisor/conf.d/surfsense.conf
# Copy entrypoint script
COPY scripts/docker/entrypoint-allinone.sh /app/entrypoint.sh
RUN dos2unix /app/entrypoint.sh && chmod +x /app/entrypoint.sh
# PostgreSQL initialization script
COPY scripts/docker/init-postgres.sh /app/init-postgres.sh
RUN dos2unix /app/init-postgres.sh && chmod +x /app/init-postgres.sh
# Clean up build dependencies to reduce image size
RUN apt-get purge -y build-essential postgresql-server-dev-14 \
&& apt-get autoremove -y \
&& rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
# Environment variables with defaults
ENV POSTGRES_USER=surfsense
ENV POSTGRES_PASSWORD=surfsense
ENV POSTGRES_DB=surfsense
ENV DATABASE_URL=postgresql+asyncpg://surfsense:surfsense@localhost:5432/surfsense
ENV CELERY_BROKER_URL=redis://localhost:6379/0
ENV CELERY_RESULT_BACKEND=redis://localhost:6379/0
ENV CELERY_TASK_DEFAULT_QUEUE=surfsense
ENV PYTHONPATH=/app/backend
ENV NEXT_FRONTEND_URL=http://localhost:3000
ENV AUTH_TYPE=LOCAL
ENV ETL_SERVICE=DOCLING
ENV EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2
# Frontend configuration (can be overridden at runtime)
# These are injected into the Next.js build at container startup
ENV NEXT_PUBLIC_FASTAPI_BACKEND_URL=http://localhost:8000
ENV NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE=LOCAL
ENV NEXT_PUBLIC_ETL_SERVICE=DOCLING
# Electric SQL configuration (ELECTRIC_DATABASE_URL is built dynamically by entrypoint from these values)
ENV ELECTRIC_DB_USER=electric
ENV ELECTRIC_DB_PASSWORD=electric_password
# Note: ELECTRIC_DATABASE_URL is NOT set here - entrypoint builds it dynamically from ELECTRIC_DB_USER/PASSWORD
ENV ELECTRIC_INSECURE=true
ENV ELECTRIC_WRITE_TO_PG_MODE=direct
ENV ELECTRIC_PORT=5133
ENV PORT=5133
ENV NEXT_PUBLIC_ELECTRIC_URL=http://localhost:5133
ENV NEXT_PUBLIC_ELECTRIC_AUTH_MODE=insecure
# Data volume
VOLUME ["/data"]
# Expose ports (Frontend: 3000, Backend: 8000, Electric: 5133)
EXPOSE 3000 8000 5133
# Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=120s --retries=3 \
CMD curl -f http://localhost:3000 || exit 1
# Run entrypoint
CMD ["/app/entrypoint.sh"]

View file

@ -1,80 +0,0 @@
# SurfSense Quick Start Docker Compose
#
# This is a simplified docker-compose for quick local deployment using pre-built images.
# For production or customized deployments, use the main docker-compose.yml
#
# Usage:
# 1. (Optional) Create a .env file with your configuration
# 2. Run: docker compose -f docker-compose.quickstart.yml up -d
# 3. Access SurfSense at http://localhost:3000
#
# All Environment Variables are Optional:
# - SECRET_KEY: JWT secret key (auto-generated and persisted if not set)
# - EMBEDDING_MODEL: Embedding model to use (default: sentence-transformers/all-MiniLM-L6-v2)
# - ETL_SERVICE: Document parsing service - DOCLING, UNSTRUCTURED, or LLAMACLOUD (default: DOCLING)
# - TTS_SERVICE: Text-to-speech service for podcasts (default: local/kokoro)
# - STT_SERVICE: Speech-to-text service with model size (default: local/base)
# - FIRECRAWL_API_KEY: For web crawling features
version: "3.8"
services:
# All-in-one SurfSense container
surfsense:
image: ghcr.io/modsetter/surfsense:latest
container_name: surfsense
ports:
- "${FRONTEND_PORT:-3000}:3000"
- "${BACKEND_PORT:-8000}:8000"
volumes:
- surfsense-data:/data
environment:
# Authentication (auto-generated if not set)
- SECRET_KEY=${SECRET_KEY:-}
# Auth Configuration
- AUTH_TYPE=${AUTH_TYPE:-LOCAL}
- GOOGLE_OAUTH_CLIENT_ID=${GOOGLE_OAUTH_CLIENT_ID:-}
- GOOGLE_OAUTH_CLIENT_SECRET=${GOOGLE_OAUTH_CLIENT_SECRET:-}
# AI/ML Configuration
- EMBEDDING_MODEL=${EMBEDDING_MODEL:-sentence-transformers/all-MiniLM-L6-v2}
- RERANKERS_ENABLED=${RERANKERS_ENABLED:-FALSE}
- RERANKERS_MODEL_NAME=${RERANKERS_MODEL_NAME:-}
- RERANKERS_MODEL_TYPE=${RERANKERS_MODEL_TYPE:-}
# Document Processing
- ETL_SERVICE=${ETL_SERVICE:-DOCLING}
- UNSTRUCTURED_API_KEY=${UNSTRUCTURED_API_KEY:-}
- LLAMA_CLOUD_API_KEY=${LLAMA_CLOUD_API_KEY:-}
# Audio Services
- TTS_SERVICE=${TTS_SERVICE:-local/kokoro}
- TTS_SERVICE_API_KEY=${TTS_SERVICE_API_KEY:-}
- STT_SERVICE=${STT_SERVICE:-local/base}
- STT_SERVICE_API_KEY=${STT_SERVICE_API_KEY:-}
# Web Crawling
- FIRECRAWL_API_KEY=${FIRECRAWL_API_KEY:-}
# Optional Features
- REGISTRATION_ENABLED=${REGISTRATION_ENABLED:-TRUE}
- SCHEDULE_CHECKER_INTERVAL=${SCHEDULE_CHECKER_INTERVAL:-1m}
# LangSmith Observability (optional)
- LANGSMITH_TRACING=${LANGSMITH_TRACING:-false}
- LANGSMITH_ENDPOINT=${LANGSMITH_ENDPOINT:-}
- LANGSMITH_API_KEY=${LANGSMITH_API_KEY:-}
- LANGSMITH_PROJECT=${LANGSMITH_PROJECT:-}
restart: unless-stopped
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:3000", "&&", "curl", "-f", "http://localhost:8000/docs"]
interval: 30s
timeout: 10s
retries: 3
start_period: 120s
volumes:
surfsense-data:
name: surfsense-data

View file

@ -1,165 +0,0 @@
services:
db:
image: ankane/pgvector:latest
ports:
- "${POSTGRES_PORT:-5432}:5432"
volumes:
- postgres_data:/var/lib/postgresql/data
- ./scripts/docker/postgresql.conf:/etc/postgresql/postgresql.conf:ro
- ./scripts/docker/init-electric-user.sh:/docker-entrypoint-initdb.d/init-electric-user.sh:ro
environment:
- POSTGRES_USER=${POSTGRES_USER:-postgres}
- POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-postgres}
- POSTGRES_DB=${POSTGRES_DB:-surfsense}
- ELECTRIC_DB_USER=${ELECTRIC_DB_USER:-electric}
- ELECTRIC_DB_PASSWORD=${ELECTRIC_DB_PASSWORD:-electric_password}
command: postgres -c config_file=/etc/postgresql/postgresql.conf
pgadmin:
image: dpage/pgadmin4
ports:
- "${PGADMIN_PORT:-5050}:80"
environment:
- PGADMIN_DEFAULT_EMAIL=${PGADMIN_DEFAULT_EMAIL:-admin@surfsense.com}
- PGADMIN_DEFAULT_PASSWORD=${PGADMIN_DEFAULT_PASSWORD:-surfsense}
volumes:
- pgadmin_data:/var/lib/pgadmin
depends_on:
- db
redis:
image: redis:7-alpine
ports:
- "${REDIS_PORT:-6379}:6379"
volumes:
- redis_data:/data
command: redis-server --appendonly yes
backend:
build: ./surfsense_backend
# image: ghcr.io/modsetter/surfsense_backend:latest
ports:
- "${BACKEND_PORT:-8000}:8000"
volumes:
- ./surfsense_backend/app:/app/app
- shared_temp:/tmp
# Uncomment and edit the line below to enable Obsidian vault indexing
# - /path/to/your/obsidian/vault:/obsidian-vault:ro
env_file:
- ./surfsense_backend/.env
environment:
- DATABASE_URL=postgresql+asyncpg://${POSTGRES_USER:-postgres}:${POSTGRES_PASSWORD:-postgres}@db:5432/${POSTGRES_DB:-surfsense}
- CELERY_BROKER_URL=redis://redis:${REDIS_PORT:-6379}/0
- CELERY_RESULT_BACKEND=redis://redis:${REDIS_PORT:-6379}/0
- REDIS_APP_URL=redis://redis:${REDIS_PORT:-6379}/0
# Queue name isolation - prevents task collision if Redis is shared with other apps
- CELERY_TASK_DEFAULT_QUEUE=surfsense
- PYTHONPATH=/app
- UVICORN_LOOP=asyncio
- UNSTRUCTURED_HAS_PATCHED_LOOP=1
- LANGCHAIN_TRACING_V2=false
- LANGSMITH_TRACING=false
- ELECTRIC_DB_USER=${ELECTRIC_DB_USER:-electric}
- ELECTRIC_DB_PASSWORD=${ELECTRIC_DB_PASSWORD:-electric_password}
- AUTH_TYPE=${AUTH_TYPE:-LOCAL}
- NEXT_FRONTEND_URL=${NEXT_FRONTEND_URL:-http://localhost:3000}
depends_on:
- db
- redis
# Run these services separately in production
# celery_worker:
# build: ./surfsense_backend
# # image: ghcr.io/modsetter/surfsense_backend:latest
# command: celery -A app.celery_app worker --loglevel=info --concurrency=1 --pool=solo
# volumes:
# - ./surfsense_backend:/app
# - shared_temp:/tmp
# env_file:
# - ./surfsense_backend/.env
# environment:
# - DATABASE_URL=postgresql+asyncpg://${POSTGRES_USER:-postgres}:${POSTGRES_PASSWORD:-postgres}@db:5432/${POSTGRES_DB:-surfsense}
# - CELERY_BROKER_URL=redis://redis:${REDIS_PORT:-6379}/0
# - CELERY_RESULT_BACKEND=redis://redis:${REDIS_PORT:-6379}/0
# - PYTHONPATH=/app
# depends_on:
# - db
# - redis
# - backend
# celery_beat:
# build: ./surfsense_backend
# # image: ghcr.io/modsetter/surfsense_backend:latest
# command: celery -A app.celery_app beat --loglevel=info
# volumes:
# - ./surfsense_backend:/app
# - shared_temp:/tmp
# env_file:
# - ./surfsense_backend/.env
# environment:
# - DATABASE_URL=postgresql+asyncpg://${POSTGRES_USER:-postgres}:${POSTGRES_PASSWORD:-postgres}@db:5432/${POSTGRES_DB:-surfsense}
# - CELERY_BROKER_URL=redis://redis:${REDIS_PORT:-6379}/0
# - CELERY_RESULT_BACKEND=redis://redis:${REDIS_PORT:-6379}/0
# - PYTHONPATH=/app
# depends_on:
# - db
# - redis
# - celery_worker
# flower:
# build: ./surfsense_backend
# # image: ghcr.io/modsetter/surfsense_backend:latest
# command: celery -A app.celery_app flower --port=5555
# ports:
# - "${FLOWER_PORT:-5555}:5555"
# env_file:
# - ./surfsense_backend/.env
# environment:
# - CELERY_BROKER_URL=redis://redis:${REDIS_PORT:-6379}/0
# - CELERY_RESULT_BACKEND=redis://redis:${REDIS_PORT:-6379}/0
# - PYTHONPATH=/app
# depends_on:
# - redis
# - celery_worker
electric:
image: electricsql/electric:latest
ports:
- "${ELECTRIC_PORT:-5133}:3000"
environment:
- DATABASE_URL=${ELECTRIC_DATABASE_URL:-postgresql://${ELECTRIC_DB_USER:-electric}:${ELECTRIC_DB_PASSWORD:-electric_password}@${POSTGRES_HOST:-db}:${POSTGRES_PORT:-5432}/${POSTGRES_DB:-surfsense}?sslmode=disable}
- ELECTRIC_INSECURE=true
- ELECTRIC_WRITE_TO_PG_MODE=direct
restart: unless-stopped
# depends_on:
# - db
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:3000/v1/health"]
interval: 10s
timeout: 5s
retries: 5
frontend:
build:
context: ./surfsense_web
# image: ghcr.io/modsetter/surfsense_ui:latest
args:
NEXT_PUBLIC_FASTAPI_BACKEND_URL: ${NEXT_PUBLIC_FASTAPI_BACKEND_URL:-http://localhost:8000}
NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE: ${NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE:-LOCAL}
NEXT_PUBLIC_ETL_SERVICE: ${NEXT_PUBLIC_ETL_SERVICE:-DOCLING}
ports:
- "${FRONTEND_PORT:-3000}:3000"
env_file:
- ./surfsense_web/.env
environment:
- NEXT_PUBLIC_ELECTRIC_URL=${NEXT_PUBLIC_ELECTRIC_URL:-http://localhost:5133}
- NEXT_PUBLIC_ELECTRIC_AUTH_MODE=insecure
depends_on:
- backend
- electric
volumes:
postgres_data:
pgadmin_data:
redis_data:
shared_temp:

View file

@ -1,243 +0,0 @@
#!/bin/bash
set -e
echo "==========================================="
echo " 🏄 SurfSense All-in-One Container"
echo "==========================================="
# Create log directory
mkdir -p /var/log/supervisor
# ================================================
# Ensure data directory exists
# ================================================
mkdir -p /data
# ================================================
# Generate SECRET_KEY if not provided
# ================================================
if [ -z "$SECRET_KEY" ]; then
# Generate a random secret key and persist it
if [ -f /data/.secret_key ]; then
export SECRET_KEY=$(cat /data/.secret_key)
echo "✅ Using existing SECRET_KEY from persistent storage"
else
export SECRET_KEY=$(python3 -c "import secrets; print(secrets.token_urlsafe(32))")
echo "$SECRET_KEY" > /data/.secret_key
chmod 600 /data/.secret_key
echo "✅ Generated new SECRET_KEY (saved for persistence)"
fi
fi
# ================================================
# Set default TTS/STT services if not provided
# ================================================
if [ -z "$TTS_SERVICE" ]; then
export TTS_SERVICE="local/kokoro"
echo "✅ Using default TTS_SERVICE: local/kokoro"
fi
if [ -z "$STT_SERVICE" ]; then
export STT_SERVICE="local/base"
echo "✅ Using default STT_SERVICE: local/base"
fi
# ================================================
# Set Electric SQL configuration
# ================================================
export ELECTRIC_DB_USER="${ELECTRIC_DB_USER:-electric}"
export ELECTRIC_DB_PASSWORD="${ELECTRIC_DB_PASSWORD:-electric_password}"
if [ -z "$ELECTRIC_DATABASE_URL" ]; then
export ELECTRIC_DATABASE_URL="postgresql://${ELECTRIC_DB_USER}:${ELECTRIC_DB_PASSWORD}@localhost:5432/${POSTGRES_DB:-surfsense}?sslmode=disable"
echo "✅ Electric SQL URL configured dynamically"
else
# Ensure sslmode=disable is in the URL if not already present
if [[ "$ELECTRIC_DATABASE_URL" != *"sslmode="* ]]; then
# Add sslmode=disable (handle both cases: with or without existing query params)
if [[ "$ELECTRIC_DATABASE_URL" == *"?"* ]]; then
export ELECTRIC_DATABASE_URL="${ELECTRIC_DATABASE_URL}&sslmode=disable"
else
export ELECTRIC_DATABASE_URL="${ELECTRIC_DATABASE_URL}?sslmode=disable"
fi
fi
echo "✅ Electric SQL URL configured from environment"
fi
# Set Electric SQL port
export ELECTRIC_PORT="${ELECTRIC_PORT:-5133}"
export PORT="${ELECTRIC_PORT}"
# ================================================
# Initialize PostgreSQL if needed
# ================================================
if [ ! -f /data/postgres/PG_VERSION ]; then
echo "📦 Initializing PostgreSQL database..."
# Initialize PostgreSQL data directory
chown -R postgres:postgres /data/postgres
chmod 700 /data/postgres
# Initialize with UTF8 encoding (required for proper text handling)
su - postgres -c "/usr/lib/postgresql/14/bin/initdb -D /data/postgres --encoding=UTF8 --locale=C.UTF-8"
# Configure PostgreSQL for connections
echo "host all all 0.0.0.0/0 md5" >> /data/postgres/pg_hba.conf
echo "local all all trust" >> /data/postgres/pg_hba.conf
echo "listen_addresses='*'" >> /data/postgres/postgresql.conf
# Enable logical replication for Electric SQL
echo "wal_level = logical" >> /data/postgres/postgresql.conf
echo "max_replication_slots = 10" >> /data/postgres/postgresql.conf
echo "max_wal_senders = 10" >> /data/postgres/postgresql.conf
# Start PostgreSQL temporarily to create database and user
su - postgres -c "/usr/lib/postgresql/14/bin/pg_ctl -D /data/postgres -l /tmp/postgres_init.log start"
# Wait for PostgreSQL to be ready
sleep 5
# Create user and database
su - postgres -c "psql -c \"CREATE USER ${POSTGRES_USER:-surfsense} WITH PASSWORD '${POSTGRES_PASSWORD:-surfsense}' SUPERUSER;\""
su - postgres -c "psql -c \"CREATE DATABASE ${POSTGRES_DB:-surfsense} OWNER ${POSTGRES_USER:-surfsense};\""
# Enable pgvector extension
su - postgres -c "psql -d ${POSTGRES_DB:-surfsense} -c 'CREATE EXTENSION IF NOT EXISTS vector;'"
# Create Electric SQL replication user (idempotent - uses IF NOT EXISTS)
echo "📡 Creating Electric SQL replication user..."
su - postgres -c "psql -d ${POSTGRES_DB:-surfsense} <<-EOSQL
DO \\\$\\\$
BEGIN
IF NOT EXISTS (SELECT FROM pg_user WHERE usename = '${ELECTRIC_DB_USER}') THEN
CREATE USER ${ELECTRIC_DB_USER} WITH REPLICATION PASSWORD '${ELECTRIC_DB_PASSWORD}';
END IF;
END
\\\$\\\$;
GRANT CONNECT ON DATABASE ${POSTGRES_DB:-surfsense} TO ${ELECTRIC_DB_USER};
GRANT USAGE ON SCHEMA public TO ${ELECTRIC_DB_USER};
GRANT SELECT ON ALL TABLES IN SCHEMA public TO ${ELECTRIC_DB_USER};
GRANT SELECT ON ALL SEQUENCES IN SCHEMA public TO ${ELECTRIC_DB_USER};
ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT SELECT ON TABLES TO ${ELECTRIC_DB_USER};
ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT SELECT ON SEQUENCES TO ${ELECTRIC_DB_USER};
-- Create the publication for Electric SQL (if not exists)
DO \\\$\\\$
BEGIN
IF NOT EXISTS (SELECT FROM pg_publication WHERE pubname = 'electric_publication_default') THEN
CREATE PUBLICATION electric_publication_default;
END IF;
END
\\\$\\\$;
EOSQL"
echo "✅ Electric SQL user '${ELECTRIC_DB_USER}' created"
# Stop temporary PostgreSQL
su - postgres -c "/usr/lib/postgresql/14/bin/pg_ctl -D /data/postgres stop"
echo "✅ PostgreSQL initialized successfully"
else
echo "✅ PostgreSQL data directory already exists"
fi
# ================================================
# Initialize Redis data directory
# ================================================
mkdir -p /data/redis
chmod 755 /data/redis
echo "✅ Redis data directory ready"
# ================================================
# Copy frontend build to runtime location
# ================================================
if [ -d /app/frontend/.next/standalone ]; then
cp -r /app/frontend/.next/standalone/* /app/frontend/ 2>/dev/null || true
cp -r /app/frontend/.next/static /app/frontend/.next/static 2>/dev/null || true
fi
# ================================================
# Runtime Environment Variable Replacement
# ================================================
# Next.js NEXT_PUBLIC_* vars are baked in at build time.
# This replaces placeholder values with actual runtime env vars.
echo "🔧 Applying runtime environment configuration..."
# Set defaults if not provided
NEXT_PUBLIC_FASTAPI_BACKEND_URL="${NEXT_PUBLIC_FASTAPI_BACKEND_URL:-http://localhost:8000}"
NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE="${NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE:-LOCAL}"
NEXT_PUBLIC_ETL_SERVICE="${NEXT_PUBLIC_ETL_SERVICE:-DOCLING}"
NEXT_PUBLIC_ELECTRIC_URL="${NEXT_PUBLIC_ELECTRIC_URL:-http://localhost:5133}"
NEXT_PUBLIC_ELECTRIC_AUTH_MODE="${NEXT_PUBLIC_ELECTRIC_AUTH_MODE:-insecure}"
NEXT_PUBLIC_DEPLOYMENT_MODE="${NEXT_PUBLIC_DEPLOYMENT_MODE:-self-hosted}"
# Replace placeholders in all JS files
find /app/frontend -type f \( -name "*.js" -o -name "*.json" \) -exec sed -i \
-e "s|__NEXT_PUBLIC_FASTAPI_BACKEND_URL__|${NEXT_PUBLIC_FASTAPI_BACKEND_URL}|g" \
-e "s|__NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE__|${NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE}|g" \
-e "s|__NEXT_PUBLIC_ETL_SERVICE__|${NEXT_PUBLIC_ETL_SERVICE}|g" \
-e "s|__NEXT_PUBLIC_ELECTRIC_URL__|${NEXT_PUBLIC_ELECTRIC_URL}|g" \
-e "s|__NEXT_PUBLIC_ELECTRIC_AUTH_MODE__|${NEXT_PUBLIC_ELECTRIC_AUTH_MODE}|g" \
-e "s|__NEXT_PUBLIC_DEPLOYMENT_MODE__|${NEXT_PUBLIC_DEPLOYMENT_MODE}|g" \
{} +
echo "✅ Environment configuration applied"
echo " Backend URL: ${NEXT_PUBLIC_FASTAPI_BACKEND_URL}"
echo " Auth Type: ${NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE}"
echo " ETL Service: ${NEXT_PUBLIC_ETL_SERVICE}"
echo " Electric URL: ${NEXT_PUBLIC_ELECTRIC_URL}"
echo " Deployment Mode: ${NEXT_PUBLIC_DEPLOYMENT_MODE}"
# ================================================
# Run database migrations
# ================================================
run_migrations() {
echo "🔄 Running database migrations..."
# Start PostgreSQL temporarily for migrations
su - postgres -c "/usr/lib/postgresql/14/bin/pg_ctl -D /data/postgres -l /tmp/postgres_migrate.log start"
sleep 5
# Start Redis temporarily for migrations (some might need it)
redis-server --dir /data/redis --daemonize yes
sleep 2
# Run alembic migrations
cd /app/backend
alembic upgrade head || echo "⚠️ Migrations may have already been applied"
# Stop temporary services
redis-cli shutdown || true
su - postgres -c "/usr/lib/postgresql/14/bin/pg_ctl -D /data/postgres stop"
echo "✅ Database migrations complete"
}
# Always run migrations on startup - alembic upgrade head is safe to run
# every time. It only applies pending migrations (never re-runs applied ones,
# never calls downgrade). This ensures updates are applied automatically.
run_migrations
# ================================================
# Environment Variables Info
# ================================================
echo ""
echo "==========================================="
echo " 📋 Configuration"
echo "==========================================="
echo " Frontend URL: http://localhost:3000"
echo " Backend API: ${NEXT_PUBLIC_FASTAPI_BACKEND_URL}"
echo " API Docs: ${NEXT_PUBLIC_FASTAPI_BACKEND_URL}/docs"
echo " Electric URL: ${NEXT_PUBLIC_ELECTRIC_URL:-http://localhost:5133}"
echo " Auth Type: ${NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE}"
echo " ETL Service: ${NEXT_PUBLIC_ETL_SERVICE}"
echo " TTS Service: ${TTS_SERVICE}"
echo " STT Service: ${STT_SERVICE}"
echo "==========================================="
echo ""
# ================================================
# Start Supervisor (manages all services)
# ================================================
echo "🚀 Starting all services..."
exec /usr/local/bin/supervisord -c /etc/supervisor/conf.d/surfsense.conf

View file

@ -1,56 +0,0 @@
#!/bin/sh
# ============================================================================
# Electric SQL User Initialization Script (docker-compose only)
# ============================================================================
# This script is ONLY used when running via docker-compose.
#
# How it works:
# - docker-compose.yml mounts this script into the PostgreSQL container's
# /docker-entrypoint-initdb.d/ directory
# - PostgreSQL automatically executes scripts in that directory on first
# container initialization
#
# For local PostgreSQL users (non-Docker), this script is NOT used.
# Instead, the Electric user is created by Alembic migration 66
# (66_add_notifications_table_and_electric_replication.py).
#
# Both approaches are idempotent (use IF NOT EXISTS), so running both
# will not cause conflicts.
# ============================================================================
set -e
# Use environment variables with defaults
ELECTRIC_DB_USER="${ELECTRIC_DB_USER:-electric}"
ELECTRIC_DB_PASSWORD="${ELECTRIC_DB_PASSWORD:-electric_password}"
echo "Creating Electric SQL replication user: $ELECTRIC_DB_USER"
psql -v ON_ERROR_STOP=1 --username "$POSTGRES_USER" --dbname "$POSTGRES_DB" <<-EOSQL
DO \$\$
BEGIN
IF NOT EXISTS (SELECT FROM pg_user WHERE usename = '$ELECTRIC_DB_USER') THEN
CREATE USER $ELECTRIC_DB_USER WITH REPLICATION PASSWORD '$ELECTRIC_DB_PASSWORD';
END IF;
END
\$\$;
GRANT CONNECT ON DATABASE $POSTGRES_DB TO $ELECTRIC_DB_USER;
GRANT CREATE ON DATABASE $POSTGRES_DB TO $ELECTRIC_DB_USER;
GRANT USAGE ON SCHEMA public TO $ELECTRIC_DB_USER;
GRANT SELECT ON ALL TABLES IN SCHEMA public TO $ELECTRIC_DB_USER;
GRANT SELECT ON ALL SEQUENCES IN SCHEMA public TO $ELECTRIC_DB_USER;
ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT SELECT ON TABLES TO $ELECTRIC_DB_USER;
ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT SELECT ON SEQUENCES TO $ELECTRIC_DB_USER;
-- Create the publication for Electric SQL (if not exists)
DO \$\$
BEGIN
IF NOT EXISTS (SELECT FROM pg_publication WHERE pubname = 'electric_publication_default') THEN
CREATE PUBLICATION electric_publication_default;
END IF;
END
\$\$;
EOSQL
echo "Electric SQL user '$ELECTRIC_DB_USER' and publication created successfully"

View file

@ -1,77 +0,0 @@
#!/bin/bash
# PostgreSQL initialization script for SurfSense
# This script is called during container startup if the database needs initialization
set -e
PGDATA=${PGDATA:-/data/postgres}
POSTGRES_USER=${POSTGRES_USER:-surfsense}
POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-surfsense}
POSTGRES_DB=${POSTGRES_DB:-surfsense}
# Electric SQL user credentials (configurable)
ELECTRIC_DB_USER=${ELECTRIC_DB_USER:-electric}
ELECTRIC_DB_PASSWORD=${ELECTRIC_DB_PASSWORD:-electric_password}
echo "Initializing PostgreSQL..."
# Check if PostgreSQL is already initialized
if [ -f "$PGDATA/PG_VERSION" ]; then
echo "PostgreSQL data directory already exists. Skipping initialization."
exit 0
fi
# Initialize the database cluster
/usr/lib/postgresql/14/bin/initdb -D "$PGDATA" --username=postgres
# Configure PostgreSQL
cat >> "$PGDATA/postgresql.conf" << EOF
listen_addresses = '*'
max_connections = 200
shared_buffers = 256MB
# Enable logical replication (required for Electric SQL)
wal_level = logical
max_replication_slots = 10
max_wal_senders = 10
# Performance settings
checkpoint_timeout = 10min
max_wal_size = 1GB
min_wal_size = 80MB
EOF
cat >> "$PGDATA/pg_hba.conf" << EOF
# Allow connections from anywhere with password
host all all 0.0.0.0/0 md5
host all all ::0/0 md5
EOF
# Start PostgreSQL temporarily
/usr/lib/postgresql/14/bin/pg_ctl -D "$PGDATA" -l /tmp/postgres_init.log start
# Wait for PostgreSQL to start
sleep 3
# Create user and database
psql -U postgres << EOF
CREATE USER $POSTGRES_USER WITH PASSWORD '$POSTGRES_PASSWORD' SUPERUSER;
CREATE DATABASE $POSTGRES_DB OWNER $POSTGRES_USER;
\c $POSTGRES_DB
CREATE EXTENSION IF NOT EXISTS vector;
-- Create Electric SQL replication user
CREATE USER $ELECTRIC_DB_USER WITH REPLICATION PASSWORD '$ELECTRIC_DB_PASSWORD';
GRANT CONNECT ON DATABASE $POSTGRES_DB TO $ELECTRIC_DB_USER;
GRANT USAGE ON SCHEMA public TO $ELECTRIC_DB_USER;
GRANT SELECT ON ALL TABLES IN SCHEMA public TO $ELECTRIC_DB_USER;
GRANT SELECT ON ALL SEQUENCES IN SCHEMA public TO $ELECTRIC_DB_USER;
ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT SELECT ON TABLES TO $ELECTRIC_DB_USER;
ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT SELECT ON SEQUENCES TO $ELECTRIC_DB_USER;
EOF
echo "PostgreSQL initialized successfully."
# Stop PostgreSQL (supervisor will start it)
/usr/lib/postgresql/14/bin/pg_ctl -D "$PGDATA" stop

View file

@ -1,20 +0,0 @@
# PostgreSQL configuration for Electric SQL
# This file is mounted into the PostgreSQL container
listen_addresses = '*'
max_connections = 200
shared_buffers = 256MB
# Enable logical replication (required for Electric SQL)
wal_level = logical
max_replication_slots = 10
max_wal_senders = 10
# Performance settings
checkpoint_timeout = 10min
max_wal_size = 1GB
min_wal_size = 80MB
# Logging (optional, for debugging)
# log_statement = 'all'
# log_replication_commands = on

View file

@ -1,121 +0,0 @@
[supervisord]
nodaemon=true
logfile=/dev/stdout
logfile_maxbytes=0
pidfile=/var/run/supervisord.pid
loglevel=info
user=root
[unix_http_server]
file=/var/run/supervisor.sock
chmod=0700
[rpcinterface:supervisor]
supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface
[supervisorctl]
serverurl=unix:///var/run/supervisor.sock
# PostgreSQL
[program:postgresql]
command=/usr/lib/postgresql/14/bin/postgres -D /data/postgres
user=postgres
autostart=true
autorestart=true
priority=10
stdout_logfile=/dev/stdout
stdout_logfile_maxbytes=0
stderr_logfile=/dev/stderr
stderr_logfile_maxbytes=0
environment=PGDATA="/data/postgres"
# Redis
[program:redis]
command=/usr/bin/redis-server --dir /data/redis --appendonly yes
autostart=true
autorestart=true
priority=20
stdout_logfile=/dev/stdout
stdout_logfile_maxbytes=0
stderr_logfile=/dev/stderr
stderr_logfile_maxbytes=0
# Backend API
[program:backend]
command=python main.py
directory=/app/backend
autostart=true
autorestart=true
priority=30
startsecs=10
startretries=3
stdout_logfile=/dev/stdout
stdout_logfile_maxbytes=0
stderr_logfile=/dev/stderr
stderr_logfile_maxbytes=0
environment=PYTHONPATH="/app/backend",UVICORN_LOOP="asyncio",UNSTRUCTURED_HAS_PATCHED_LOOP="1"
# Celery Worker
[program:celery-worker]
command=celery -A app.celery_app worker --loglevel=info --concurrency=2 --pool=solo --queues=surfsense,surfsense.connectors
directory=/app/backend
autostart=true
autorestart=true
priority=40
startsecs=15
startretries=3
stdout_logfile=/dev/stdout
stdout_logfile_maxbytes=0
stderr_logfile=/dev/stderr
stderr_logfile_maxbytes=0
environment=PYTHONPATH="/app/backend"
# Celery Beat (scheduler)
[program:celery-beat]
command=celery -A app.celery_app beat --loglevel=info
directory=/app/backend
autostart=true
autorestart=true
priority=50
startsecs=20
startretries=3
stdout_logfile=/dev/stdout
stdout_logfile_maxbytes=0
stderr_logfile=/dev/stderr
stderr_logfile_maxbytes=0
environment=PYTHONPATH="/app/backend"
# Electric SQL (real-time sync)
[program:electric]
command=/app/electric-release/bin/entrypoint start
autostart=true
autorestart=true
priority=25
startsecs=10
startretries=3
stdout_logfile=/dev/stdout
stdout_logfile_maxbytes=0
stderr_logfile=/dev/stderr
stderr_logfile_maxbytes=0
environment=DATABASE_URL="%(ENV_ELECTRIC_DATABASE_URL)s",ELECTRIC_INSECURE="%(ENV_ELECTRIC_INSECURE)s",ELECTRIC_WRITE_TO_PG_MODE="%(ENV_ELECTRIC_WRITE_TO_PG_MODE)s",RELEASE_COOKIE="surfsense_electric_cookie",PORT="%(ENV_ELECTRIC_PORT)s"
# Frontend
[program:frontend]
command=node server.js
directory=/app/frontend
autostart=true
autorestart=true
priority=60
startsecs=5
startretries=3
stdout_logfile=/dev/stdout
stdout_logfile_maxbytes=0
stderr_logfile=/dev/stderr
stderr_logfile_maxbytes=0
environment=NODE_ENV="production",PORT="3000",HOSTNAME="0.0.0.0"
# Process Groups
[group:surfsense]
programs=postgresql,redis,electric,backend,celery-worker,celery-beat,frontend
priority=999