diff --git a/Dockerfile.allinone b/Dockerfile.allinone index c7a2505f6..0765deb15 100644 --- a/Dockerfile.allinone +++ b/Dockerfile.allinone @@ -69,8 +69,6 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ curl \ ca-certificates \ gnupg \ - # Supervisor - supervisor \ # Backend dependencies gcc \ wget \ @@ -139,6 +137,9 @@ RUN update-alternatives --install /usr/bin/python python /usr/bin/python3.12 1 \ RUN python3.12 -m ensurepip --upgrade \ && python3.12 -m pip install --upgrade pip +# Install supervisor via pip (system package incompatible with Python 3.12) +RUN pip install --no-cache-dir supervisor + # Build and install pgvector RUN cd /tmp \ && git clone --branch v0.7.4 https://github.com/pgvector/pgvector.git \ diff --git a/README.md b/README.md index b717cacbe..249ca0f0b 100644 --- a/README.md +++ b/README.md @@ -155,23 +155,31 @@ Check out our public roadmap and contribute your ideas or feedback: > [!TIP] > For production deployments, use the full [Docker Compose setup](https://www.surfsense.net/docs/docker-installation) which offers more control and scalability. -**Quick Start :** +**Linux/macOS:** ```bash docker run -d -p 3000:3000 -p 8000:8000 \ -v surfsense-data:/data \ - -e SECRET_KEY=$(openssl rand -hex 32) \ --name surfsense \ --restart unless-stopped \ ghcr.io/modsetter/surfsense:latest ``` -**With Custom Embedding Model (e.g., OpenAI):** +**Windows (PowerShell):** + +```powershell +docker run -d -p 3000:3000 -p 8000:8000 ` + -v surfsense-data:/data ` + --name surfsense ` + --restart unless-stopped ` + ghcr.io/modsetter/surfsense:latest +``` + +**With Custom Configuration (e.g., OpenAI Embeddings):** ```bash docker run -d -p 3000:3000 -p 8000:8000 \ -v surfsense-data:/data \ - -e SECRET_KEY=$(openssl rand -hex 32) \ -e EMBEDDING_MODEL=openai://text-embedding-ada-002 \ -e OPENAI_API_KEY=your_openai_api_key \ --name surfsense \ @@ -179,24 +187,20 @@ docker run -d -p 3000:3000 -p 8000:8000 \ ghcr.io/modsetter/surfsense:latest ``` -**Using Docker Compose (Recommended for easier management):** - -```bash -# Download the quick start compose file -curl -o docker-compose.yml https://raw.githubusercontent.com/MODSetter/SurfSense/main/docker-compose.quickstart.yml - -# Create .env file with your secret key -echo "SECRET_KEY=$(openssl rand -hex 32)" > .env - -# Start SurfSense -docker compose up -d -``` - After starting, access SurfSense at: - **Frontend**: [http://localhost:3000](http://localhost:3000) - **Backend API**: [http://localhost:8000](http://localhost:8000) - **API Docs**: [http://localhost:8000/docs](http://localhost:8000/docs) +**Useful Commands:** + +```bash +docker logs -f surfsense # View logs +docker stop surfsense # Stop +docker start surfsense # Start +docker rm surfsense # Remove (data preserved in volume) +``` + ### Installation Options SurfSense provides multiple options to get started: diff --git a/docker-compose.quickstart.yml b/docker-compose.quickstart.yml index 012388335..ff72618b7 100644 --- a/docker-compose.quickstart.yml +++ b/docker-compose.quickstart.yml @@ -4,18 +4,16 @@ # For production or customized deployments, use the main docker-compose.yml # # Usage: -# 1. Create a .env file with your required configuration (see below) +# 1. (Optional) Create a .env file with your configuration # 2. Run: docker compose -f docker-compose.quickstart.yml up -d # 3. Access SurfSense at http://localhost:3000 # -# Required Environment Variables: -# - SECRET_KEY: JWT secret key (generate with: openssl rand -hex 32) -# -# Optional Environment Variables: +# All Environment Variables are Optional: +# - SECRET_KEY: JWT secret key (auto-generated and persisted if not set) # - EMBEDDING_MODEL: Embedding model to use (default: sentence-transformers/all-MiniLM-L6-v2) # - ETL_SERVICE: Document parsing service - DOCLING, UNSTRUCTURED, or LLAMACLOUD (default: DOCLING) # - TTS_SERVICE: Text-to-speech service for podcasts (default: local/kokoro) -# - STT_SERVICE: Speech-to-text service (default: local/base) +# - STT_SERVICE: Speech-to-text service with model size (default: local/base) # - FIRECRAWL_API_KEY: For web crawling features version: "3.8" @@ -31,8 +29,8 @@ services: volumes: - surfsense-data:/data environment: - # Required - - SECRET_KEY=${SECRET_KEY:-change-me-in-production} + # Authentication (auto-generated if not set) + - SECRET_KEY=${SECRET_KEY:-} # Auth Configuration - AUTH_TYPE=${AUTH_TYPE:-LOCAL} diff --git a/scripts/docker/entrypoint-allinone.sh b/scripts/docker/entrypoint-allinone.sh index 0df2555c2..427256f6d 100644 --- a/scripts/docker/entrypoint-allinone.sh +++ b/scripts/docker/entrypoint-allinone.sh @@ -8,6 +8,40 @@ echo "===========================================" # Create log directory mkdir -p /var/log/supervisor +# ================================================ +# Ensure data directory exists +# ================================================ +mkdir -p /data + +# ================================================ +# Generate SECRET_KEY if not provided +# ================================================ +if [ -z "$SECRET_KEY" ]; then + # Generate a random secret key and persist it + if [ -f /data/.secret_key ]; then + export SECRET_KEY=$(cat /data/.secret_key) + echo "✅ Using existing SECRET_KEY from persistent storage" + else + export SECRET_KEY=$(python3 -c "import secrets; print(secrets.token_urlsafe(32))") + echo "$SECRET_KEY" > /data/.secret_key + chmod 600 /data/.secret_key + echo "✅ Generated new SECRET_KEY (saved for persistence)" + fi +fi + +# ================================================ +# Set default TTS/STT services if not provided +# ================================================ +if [ -z "$TTS_SERVICE" ]; then + export TTS_SERVICE="local/kokoro" + echo "✅ Using default TTS_SERVICE: local/kokoro" +fi + +if [ -z "$STT_SERVICE" ]; then + export STT_SERVICE="local/base" + echo "✅ Using default STT_SERVICE: local/base" +fi + # ================================================ # Initialize PostgreSQL if needed # ================================================ @@ -18,7 +52,8 @@ if [ ! -f /data/postgres/PG_VERSION ]; then chown -R postgres:postgres /data/postgres chmod 700 /data/postgres - su - postgres -c "/usr/lib/postgresql/14/bin/initdb -D /data/postgres" + # Initialize with UTF8 encoding (required for proper text handling) + su - postgres -c "/usr/lib/postgresql/14/bin/initdb -D /data/postgres --encoding=UTF8 --locale=C.UTF-8" # Configure PostgreSQL for connections echo "host all all 0.0.0.0/0 md5" >> /data/postgres/pg_hba.conf @@ -104,6 +139,8 @@ echo " Backend API: http://localhost:8000" echo " API Docs: http://localhost:8000/docs" echo " Auth Type: ${AUTH_TYPE:-LOCAL}" echo " ETL Service: ${ETL_SERVICE:-DOCLING}" +echo " TTS Service: ${TTS_SERVICE}" +echo " STT Service: ${STT_SERVICE}" echo "===========================================" echo "" @@ -111,5 +148,5 @@ echo "" # Start Supervisor (manages all services) # ================================================ echo "🚀 Starting all services..." -exec /usr/bin/supervisord -c /etc/supervisor/conf.d/surfsense.conf +exec /usr/local/bin/supervisord -c /etc/supervisor/conf.d/surfsense.conf diff --git a/scripts/docker/supervisor-allinone.conf b/scripts/docker/supervisor-allinone.conf index 15685592a..6cada0dc2 100644 --- a/scripts/docker/supervisor-allinone.conf +++ b/scripts/docker/supervisor-allinone.conf @@ -1,8 +1,9 @@ [supervisord] nodaemon=true -logfile=/var/log/supervisor/supervisord.log +logfile=/dev/stdout +logfile_maxbytes=0 pidfile=/var/run/supervisord.pid -childlogdir=/var/log/supervisor +loglevel=info user=root [unix_http_server] @@ -22,8 +23,10 @@ user=postgres autostart=true autorestart=true priority=10 -stdout_logfile=/var/log/supervisor/postgresql.log -stderr_logfile=/var/log/supervisor/postgresql-error.log +stdout_logfile=/dev/stdout +stdout_logfile_maxbytes=0 +stderr_logfile=/dev/stderr +stderr_logfile_maxbytes=0 environment=PGDATA="/data/postgres" # Redis @@ -32,8 +35,10 @@ command=/usr/bin/redis-server --dir /data/redis --appendonly yes autostart=true autorestart=true priority=20 -stdout_logfile=/var/log/supervisor/redis.log -stderr_logfile=/var/log/supervisor/redis-error.log +stdout_logfile=/dev/stdout +stdout_logfile_maxbytes=0 +stderr_logfile=/dev/stderr +stderr_logfile_maxbytes=0 # Backend API [program:backend] @@ -44,8 +49,10 @@ autorestart=true priority=30 startsecs=10 startretries=3 -stdout_logfile=/var/log/supervisor/backend.log -stderr_logfile=/var/log/supervisor/backend-error.log +stdout_logfile=/dev/stdout +stdout_logfile_maxbytes=0 +stderr_logfile=/dev/stderr +stderr_logfile_maxbytes=0 environment=PYTHONPATH="/app/backend",UVICORN_LOOP="asyncio",UNSTRUCTURED_HAS_PATCHED_LOOP="1" # Celery Worker @@ -57,8 +64,10 @@ autorestart=true priority=40 startsecs=15 startretries=3 -stdout_logfile=/var/log/supervisor/celery-worker.log -stderr_logfile=/var/log/supervisor/celery-worker-error.log +stdout_logfile=/dev/stdout +stdout_logfile_maxbytes=0 +stderr_logfile=/dev/stderr +stderr_logfile_maxbytes=0 environment=PYTHONPATH="/app/backend" # Celery Beat (scheduler) @@ -70,8 +79,10 @@ autorestart=true priority=50 startsecs=20 startretries=3 -stdout_logfile=/var/log/supervisor/celery-beat.log -stderr_logfile=/var/log/supervisor/celery-beat-error.log +stdout_logfile=/dev/stdout +stdout_logfile_maxbytes=0 +stderr_logfile=/dev/stderr +stderr_logfile_maxbytes=0 environment=PYTHONPATH="/app/backend" # Frontend @@ -83,8 +94,10 @@ autorestart=true priority=60 startsecs=5 startretries=3 -stdout_logfile=/var/log/supervisor/frontend.log -stderr_logfile=/var/log/supervisor/frontend-error.log +stdout_logfile=/dev/stdout +stdout_logfile_maxbytes=0 +stderr_logfile=/dev/stderr +stderr_logfile_maxbytes=0 environment=NODE_ENV="production",PORT="3000",HOSTNAME="0.0.0.0" # Process Groups diff --git a/surfsense_web/content/docs/docker-installation.mdx b/surfsense_web/content/docs/docker-installation.mdx index e4ae03e92..32532725b 100644 --- a/surfsense_web/content/docs/docker-installation.mdx +++ b/surfsense_web/content/docs/docker-installation.mdx @@ -29,7 +29,6 @@ Make sure to include the `-v surfsense-data:/data` in your Docker command. This ```bash docker run -d -p 3000:3000 -p 8000:8000 \ -v surfsense-data:/data \ - -e SECRET_KEY=$(openssl rand -hex 32) \ --name surfsense \ --restart unless-stopped \ ghcr.io/modsetter/surfsense:latest @@ -38,15 +37,15 @@ docker run -d -p 3000:3000 -p 8000:8000 \ **Windows (PowerShell):** ```powershell -$secretKey = -join ((48..57) + (65..90) + (97..122) | Get-Random -Count 32 | ForEach-Object {[char]$_}) docker run -d -p 3000:3000 -p 8000:8000 ` -v surfsense-data:/data ` - -e SECRET_KEY=$secretKey ` --name surfsense ` --restart unless-stopped ` ghcr.io/modsetter/surfsense:latest ``` +> **Note:** A secure `SECRET_KEY` is automatically generated and persisted in the data volume on first run. + ### With Custom Configuration **Using OpenAI Embeddings:** @@ -54,7 +53,6 @@ docker run -d -p 3000:3000 -p 8000:8000 ` ```bash docker run -d -p 3000:3000 -p 8000:8000 \ -v surfsense-data:/data \ - -e SECRET_KEY=$(openssl rand -hex 32) \ -e EMBEDDING_MODEL=openai://text-embedding-ada-002 \ -e OPENAI_API_KEY=your_openai_api_key \ --name surfsense \ @@ -67,7 +65,6 @@ docker run -d -p 3000:3000 -p 8000:8000 \ ```bash docker run -d -p 3000:3000 -p 8000:8000 \ -v surfsense-data:/data \ - -e SECRET_KEY=$(openssl rand -hex 32) \ -e AUTH_TYPE=GOOGLE \ -e GOOGLE_OAUTH_CLIENT_ID=your_client_id \ -e GOOGLE_OAUTH_CLIENT_SECRET=your_client_secret \ @@ -84,12 +81,11 @@ For easier management with environment files: # Download the quick start compose file curl -o docker-compose.yml https://raw.githubusercontent.com/MODSetter/SurfSense/main/docker-compose.quickstart.yml -# Create .env file +# Create .env file (optional - for custom configuration) cat > .env << EOF -SECRET_KEY=$(openssl rand -hex 32) -# Add other configuration as needed # EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2 # ETL_SERVICE=DOCLING +# SECRET_KEY=your_custom_secret_key # Auto-generated if not set EOF # Start SurfSense @@ -105,12 +101,12 @@ After starting, access SurfSense at: | Variable | Description | Default | |----------|-------------|---------| -| SECRET_KEY | JWT secret key (required) | - | +| SECRET_KEY | JWT secret key (auto-generated if not set) | Auto-generated | | AUTH_TYPE | Authentication: `LOCAL` or `GOOGLE` | LOCAL | | EMBEDDING_MODEL | Model for embeddings | sentence-transformers/all-MiniLM-L6-v2 | | ETL_SERVICE | Document parser: `DOCLING`, `UNSTRUCTURED`, `LLAMACLOUD` | DOCLING | | TTS_SERVICE | Text-to-speech for podcasts | local/kokoro | -| STT_SERVICE | Speech-to-text for audio | local/base | +| STT_SERVICE | Speech-to-text for audio (model size: tiny, base, small, medium, large) | local/base | | REGISTRATION_ENABLED | Allow new user registration | TRUE | ### Useful Commands