diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 000000000..ad6805174 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,98 @@ +# Git +.git +.gitignore +.gitattributes + +# Documentation +*.md +!README.md +docs/ +CONTRIBUTING.md +CODE_OF_CONDUCT.md +LICENSE + +# IDE +.vscode/ +.idea/ +*.swp +*.swo +.cursor/ + +# Node +**/node_modules/ +**/.next/ +**/dist/ +**/.turbo/ +**/.cache/ +**/coverage/ + +# Python +**/__pycache__/ +**/*.pyc +**/*.pyo +**/*.pyd +**/.Python +**/build/ +**/develop-eggs/ +**/downloads/ +**/eggs/ +**/.eggs/ +# Python venv lib folders (but not frontend lib folders) +surfsense_backend/lib/ +surfsense_backend/lib64/ +**/parts/ +**/sdist/ +**/var/ +**/wheels/ +**/*.egg-info/ +**/.installed.cfg +**/*.egg +**/pip-log.txt +**/.tox/ +**/.coverage +**/htmlcov/ +**/.pytest_cache/ +**/nosetests.xml +**/coverage.xml + +# Environment +**/.env +**/.env.* +!**/.env.example +**/*.local + +# Docker +**/Dockerfile +**/docker-compose*.yml +**/.docker/ + +# Testing +**/tests/ +**/test/ +**/__tests__/ +**/*.test.* +**/*.spec.* + +# Logs +**/*.log +**/logs/ + +# Temporary files +**/tmp/ +**/temp/ +**/.tmp/ +**/.temp/ + +# Build artifacts from backend +surfsense_backend/podcasts/ +surfsense_backend/temp_audio/ +surfsense_backend/*.bak +surfsense_backend/*.dat +surfsense_backend/*.dir + +# GitHub +.github/ + +# Browser extension (not needed for main deployment) +surfsense_browser_extension/ + diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml deleted file mode 100644 index a391ba83c..000000000 --- a/.github/workflows/docker-publish.yml +++ /dev/null @@ -1,75 +0,0 @@ -name: Docker Publish - -on: - workflow_dispatch: - -jobs: - # build_and_push_backend: - # runs-on: ubuntu-latest - # permissions: - # contents: read - # packages: write - # steps: - # - name: Checkout repository - # uses: actions/checkout@v4 - - # - name: Set up QEMU - # uses: docker/setup-qemu-action@v3 - - # - name: Set up Docker Buildx - # uses: docker/setup-buildx-action@v3 - - # - name: Log in to GitHub Container Registry - # uses: docker/login-action@v3 - # with: - # registry: ghcr.io - # username: ${{ github.actor }} - # password: ${{ secrets.GITHUB_TOKEN }} - - # - name: Build and push backend image - # uses: docker/build-push-action@v5 - # with: - # context: ./surfsense_backend - # file: ./surfsense_backend/Dockerfile - # push: true - # tags: ghcr.io/${{ github.repository_owner }}/surfsense_backend:${{ github.sha }} - # platforms: linux/amd64,linux/arm64 - # labels: | - # org.opencontainers.image.source=${{ github.repositoryUrl }} - # org.opencontainers.image.created=${{ fromJSON(steps.meta.outputs.json).labels['org.opencontainers.image.created'] }} - # org.opencontainers.image.revision=${{ github.sha }} - - build_and_push_frontend: - runs-on: ubuntu-latest - permissions: - contents: read - packages: write - steps: - - name: Checkout repository - uses: actions/checkout@v4 - - - name: Set up QEMU - uses: docker/setup-qemu-action@v3 - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 - - - name: Log in to GitHub Container Registry - uses: docker/login-action@v3 - with: - registry: ghcr.io - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - - name: Build and push frontend image - uses: docker/build-push-action@v5 - with: - context: ./surfsense_web - file: ./surfsense_web/Dockerfile - push: true - tags: ghcr.io/${{ github.repository_owner }}/surfsense_web:${{ github.sha }} - platforms: linux/amd64,linux/arm64 - labels: | - org.opencontainers.image.source=${{ github.repositoryUrl }} - org.opencontainers.image.created=${{ fromJSON(steps.meta.outputs.json).labels['org.opencontainers.image.created'] }} - org.opencontainers.image.revision=${{ github.sha }} diff --git a/.github/workflows/docker_build.yaml b/.github/workflows/docker_build.yaml index de0ecfe02..e8916b47a 100644 --- a/.github/workflows/docker_build.yaml +++ b/.github/workflows/docker_build.yaml @@ -18,39 +18,30 @@ on: default: '' permissions: - contents: write # Needed for pushing tags - packages: write # Needed for pushing docker images to GHCR + contents: write + packages: write jobs: tag_release: runs-on: ubuntu-latest outputs: - # Define output to pass the tag to the next job new_tag: ${{ steps.tag_version.outputs.next_version }} steps: - name: Checkout code uses: actions/checkout@v4 with: - # Fetch all history and tags to find the latest SemVer tag fetch-depth: 0 - # Checkout the specific branch if provided, otherwise default ref: ${{ github.event.inputs.branch }} - # Token needed to push tags back token: ${{ secrets.GITHUB_TOKEN }} - name: Get latest SemVer tag and calculate next version id: tag_version run: | - # Fetch all tags from remote just in case git fetch --tags - - # Get the latest SemVer tag (handles vX.Y.Z pattern) - # Filters tags, sorts them version-aware, takes the last one LATEST_TAG=$(git tag --list 'v[0-9]*.[0-9]*.[0-9]*' --sort='v:refname' | tail -n 1) if [ -z "$LATEST_TAG" ]; then echo "No previous SemVer tag found. Starting with v0.1.0" - # Determine initial version based on bump type (optional, v0.1.0 is often fine) case "${{ github.event.inputs.bump_type }}" in patch|minor) NEXT_VERSION="v0.1.0" @@ -58,22 +49,18 @@ jobs: major) NEXT_VERSION="v1.0.0" ;; - *) # Should not happen due to 'choice' input, but good practice + *) echo "Invalid bump type: ${{ github.event.inputs.bump_type }}" exit 1 ;; esac else echo "Latest tag found: $LATEST_TAG" - # Remove 'v' prefix for calculation VERSION=${LATEST_TAG#v} - - # Split into parts MAJOR=$(echo $VERSION | cut -d. -f1) MINOR=$(echo $VERSION | cut -d. -f2) PATCH=$(echo $VERSION | cut -d. -f3) - # Bump version based on input case "${{ github.event.inputs.bump_type }}" in patch) PATCH=$((PATCH + 1)) @@ -96,12 +83,10 @@ jobs: fi echo "Calculated next version: $NEXT_VERSION" - # Set output for subsequent steps echo "next_version=$NEXT_VERSION" >> $GITHUB_OUTPUT - name: Create and Push Tag run: | - # Configure Git user identity for annotated tag (FIX) git config --global user.name 'github-actions[bot]' git config --global user.email 'github-actions[bot]@users.noreply.github.com' @@ -109,74 +94,23 @@ jobs: COMMIT_SHA=$(git rev-parse HEAD) echo "Tagging commit $COMMIT_SHA with $NEXT_TAG" - # Create an annotated tag (recommended) - this requires user.name/email git tag -a "$NEXT_TAG" -m "Release $NEXT_TAG" - - # Push the tag to the remote repository echo "Pushing tag $NEXT_TAG to origin" git push origin "$NEXT_TAG" - name: Verify Tag Push run: | echo "Checking if tag ${{ steps.tag_version.outputs.next_version }} exists remotely..." - # Give remote a second to update sleep 5 git ls-remote --tags origin | grep "refs/tags/${{ steps.tag_version.outputs.next_version }}" || (echo "Tag push verification failed!" && exit 1) echo "Tag successfully pushed." - - # build_and_push_backend_image: - # runs-on: ubuntu-latest - # needs: tag_release # Depends on the tag being created successfully - # permissions: - # packages: write # Need permission to write to GHCR - # contents: read # Need permission to read repo contents (checkout) - # steps: - # - name: Checkout code - # uses: actions/checkout@v4 - - # - name: Login to GitHub Container Registry - # uses: docker/login-action@v3 - # with: - # registry: ghcr.io - # username: ${{ github.repository_owner }} - # password: ${{ secrets.GITHUB_TOKEN }} - - # - name: Set up QEMU - # uses: docker/setup-qemu-action@v3 - - # - name: Set up Docker Buildx - # uses: docker/setup-buildx-action@v3 - - # - name: Extract metadata (tags, labels) for Docker build - # id: meta - # uses: docker/metadata-action@v5 - # with: - # images: ghcr.io/${{ github.repository_owner }}/surfsense_backend - # tags: | - # # Use the tag generated in the previous job - # type=raw,value=${{ needs.tag_release.outputs.new_tag }} - # # Optionally add 'latest' tag if building from the default branch - # type=raw,value=latest,enable=${{ github.ref == format('refs/heads/{0}', github.event.repository.default_branch) || github.event.inputs.branch == github.event.repository.default_branch }} - - # - name: Build and push surfsense backend - # uses: docker/build-push-action@v5 - # with: - # context: ./surfsense_backend - # push: true - # tags: ${{ steps.meta.outputs.tags }} - # labels: ${{ steps.meta.outputs.labels }} - # platforms: linux/amd64,linux/arm64 - # # Optional: Add build cache for faster builds - # cache-from: type=gha - # cache-to: type=gha,mode=max - - build_and_push_ui_image: + build_and_push: runs-on: ubuntu-latest - needs: tag_release # Depends on the tag being created successfully + needs: tag_release permissions: - packages: write # Need permission to write to GHCR - contents: read # Need permission to read repo contents (checkout) + packages: write + contents: read steps: - name: Checkout code @@ -195,25 +129,31 @@ jobs: - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 - - name: Extract metadata (tags, labels) for Docker build + - name: Extract metadata for Docker id: meta uses: docker/metadata-action@v5 with: - images: ghcr.io/${{ github.repository_owner }}/surfsense_ui + images: ghcr.io/${{ github.repository_owner }}/surfsense tags: | - # Use the tag generated in the previous job type=raw,value=${{ needs.tag_release.outputs.new_tag }} - # Optionally add 'latest' tag if building from the default branch type=raw,value=latest,enable=${{ github.ref == format('refs/heads/{0}', github.event.repository.default_branch) || github.event.inputs.branch == github.event.repository.default_branch }} - - name: Build and push surfsense UI image + - name: Free up disk space + run: | + sudo rm -rf /usr/share/dotnet + sudo rm -rf /opt/ghc + sudo rm -rf /usr/local/share/boost + sudo rm -rf "$AGENT_TOOLSDIRECTORY" + docker system prune -af + + - name: Build and push SurfSense image uses: docker/build-push-action@v5 with: - context: ./surfsense_web + context: . + file: ./Dockerfile.allinone push: true tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} - platforms: linux/amd64,linux/arm64 - # Optional: Add build cache for faster builds + platforms: linux/amd64 cache-from: type=gha cache-to: type=gha,mode=max diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index cc5dde3cc..bb3d607c1 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -57,14 +57,14 @@ Want to fix it? Go for it! Just link the issue in your PR. 2. **Choose your setup method**: - **Docker Setup**: Follow the [Docker Setup Guide](./DOCKER_SETUP.md) - - **Manual Setup**: Follow the [Installation Guide](https://www.surfsense.net/docs/) + - **Manual Setup**: Follow the [Installation Guide](https://www.surfsense.com/docs/) 3. **Configure services**: - Set up PGVector & PostgreSQL - Configure a file ETL service: `Unstructured.io` or `LlamaIndex` - Add API keys for external services -For detailed setup instructions, refer to our [Installation Guide](https://www.surfsense.net/docs/). +For detailed setup instructions, refer to our [Installation Guide](https://www.surfsense.com/docs/). ## 🏗️ Project Structure @@ -146,7 +146,7 @@ When contributing, please: Stuck? Need clarification? Here's how to get help: 1. **Check existing issues** - your question might already be answered -2. **Search the docs** - [https://www.surfsense.net/docs/](https://www.surfsense.net/docs/) +2. **Search the docs** - [https://www.surfsense.com/docs/](https://www.surfsense.com/docs/) 3. **Ask in Discord** - [https://discord.gg/ejRNvftDp9](https://discord.gg/ejRNvftDp9) 4. **Create an issue** - if it's a bug or feature request diff --git a/Dockerfile.allinone b/Dockerfile.allinone new file mode 100644 index 000000000..0765deb15 --- /dev/null +++ b/Dockerfile.allinone @@ -0,0 +1,244 @@ +# SurfSense All-in-One Docker Image +# This image bundles PostgreSQL+pgvector, Redis, Backend, and Frontend +# Usage: docker run -d -p 3000:3000 -p 8000:8000 -v surfsense-data:/data --name surfsense ghcr.io/modsetter/surfsense:latest +# +# Included Services (all run locally by default): +# - PostgreSQL 14 + pgvector (vector database) +# - Redis (task queue) +# - Docling (document processing, CPU-only, OCR disabled) +# - Kokoro TTS (local text-to-speech for podcasts) +# - Faster-Whisper (local speech-to-text for audio files) +# - Playwright Chromium (web scraping) +# +# Note: This is the CPU-only version. A :cuda tagged image with GPU support +# will be available in the future for faster AI inference. + +# ==================== +# Stage 1: Build Frontend +# ==================== +FROM node:20-alpine AS frontend-builder + +WORKDIR /app + +# Install pnpm +RUN corepack enable pnpm + +# Copy package files +COPY surfsense_web/package.json surfsense_web/pnpm-lock.yaml* ./ +COPY surfsense_web/source.config.ts ./ +COPY surfsense_web/content ./content + +# Install dependencies +RUN pnpm install --frozen-lockfile + +# Copy source +COPY surfsense_web/ ./ + +# Build args for frontend +ARG NEXT_PUBLIC_FASTAPI_BACKEND_URL=http://localhost:8000 +ARG NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE=LOCAL +ARG NEXT_PUBLIC_ETL_SERVICE=DOCLING + +ENV NEXT_PUBLIC_FASTAPI_BACKEND_URL=$NEXT_PUBLIC_FASTAPI_BACKEND_URL +ENV NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE=$NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE +ENV NEXT_PUBLIC_ETL_SERVICE=$NEXT_PUBLIC_ETL_SERVICE + +# Build +RUN pnpm run build + +# ==================== +# Stage 2: Runtime Image +# ==================== +FROM ubuntu:22.04 AS runtime + +# Prevent interactive prompts +ENV DEBIAN_FRONTEND=noninteractive + +# Install system dependencies +RUN apt-get update && apt-get install -y --no-install-recommends \ + # PostgreSQL + postgresql-14 \ + postgresql-contrib-14 \ + # Build tools for pgvector + build-essential \ + postgresql-server-dev-14 \ + git \ + # Redis + redis-server \ + # Node.js prerequisites + curl \ + ca-certificates \ + gnupg \ + # Backend dependencies + gcc \ + wget \ + unzip \ + dos2unix \ + # For PPAs + software-properties-common \ + # ============================ + # Local TTS (Kokoro) dependencies + # ============================ + espeak-ng \ + libespeak-ng1 \ + # ============================ + # Local STT (Faster-Whisper) dependencies + # ============================ + ffmpeg \ + # ============================ + # Audio processing (soundfile) + # ============================ + libsndfile1 \ + # ============================ + # Image/OpenCV dependencies (for Docling) + # ============================ + libgl1 \ + libglib2.0-0 \ + libsm6 \ + libxext6 \ + libxrender1 \ + # ============================ + # Playwright browser dependencies + # ============================ + libnspr4 \ + libnss3 \ + libatk1.0-0 \ + libatk-bridge2.0-0 \ + libcups2 \ + libxkbcommon0 \ + libatspi2.0-0 \ + libxcomposite1 \ + libxdamage1 \ + libxrandr2 \ + libgbm1 \ + libcairo2 \ + libpango-1.0-0 \ + && rm -rf /var/lib/apt/lists/* + +# Install Node.js 20.x (for running frontend) +RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash - \ + && apt-get install -y nodejs \ + && rm -rf /var/lib/apt/lists/* + +# Install Python 3.12 from deadsnakes PPA +RUN add-apt-repository ppa:deadsnakes/ppa -y \ + && apt-get update \ + && apt-get install -y --no-install-recommends \ + python3.12 \ + python3.12-venv \ + python3.12-dev \ + && rm -rf /var/lib/apt/lists/* + +# Set Python 3.12 as default +RUN update-alternatives --install /usr/bin/python python /usr/bin/python3.12 1 \ + && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.12 1 + +# Install pip for Python 3.12 +RUN python3.12 -m ensurepip --upgrade \ + && python3.12 -m pip install --upgrade pip + +# Install supervisor via pip (system package incompatible with Python 3.12) +RUN pip install --no-cache-dir supervisor + +# Build and install pgvector +RUN cd /tmp \ + && git clone --branch v0.7.4 https://github.com/pgvector/pgvector.git \ + && cd pgvector \ + && make \ + && make install \ + && rm -rf /tmp/pgvector + +# Update certificates +RUN update-ca-certificates + +# Create data directories +RUN mkdir -p /data/postgres /data/redis /data/surfsense \ + && chown -R postgres:postgres /data/postgres + +# ==================== +# Copy Frontend Build +# ==================== +WORKDIR /app/frontend + +# Copy only the standalone build (not node_modules) +COPY --from=frontend-builder /app/.next/standalone ./ +COPY --from=frontend-builder /app/.next/static ./.next/static +COPY --from=frontend-builder /app/public ./public + +# ==================== +# Setup Backend +# ==================== +WORKDIR /app/backend + +# Copy backend dependency files +COPY surfsense_backend/pyproject.toml surfsense_backend/uv.lock ./ + +# Install PyTorch CPU-only (Docling needs it but OCR is disabled, no GPU needed) +RUN pip install --no-cache-dir torch torchvision --index-url https://download.pytorch.org/whl/cpu + +# Install python dependencies +RUN pip install --no-cache-dir certifi pip-system-certs uv \ + && uv pip install --system --no-cache-dir -e . + +# Set SSL environment variables +RUN CERTIFI_PATH=$(python -c "import certifi; print(certifi.where())") \ + && echo "export SSL_CERT_FILE=$CERTIFI_PATH" >> /etc/profile.d/ssl.sh \ + && echo "export REQUESTS_CA_BUNDLE=$CERTIFI_PATH" >> /etc/profile.d/ssl.sh + +# Note: EasyOCR models NOT downloaded - OCR is disabled in docling_service.py +# GPU support will be added in a future :cuda tagged image + +# Install Playwright browsers +RUN pip install --no-cache-dir playwright \ + && playwright install chromium \ + && rm -rf /root/.cache/ms-playwright/ffmpeg* + +# Copy backend source +COPY surfsense_backend/ ./ + +# ==================== +# Configuration +# ==================== +WORKDIR /app + +# Copy supervisor configuration +COPY scripts/docker/supervisor-allinone.conf /etc/supervisor/conf.d/surfsense.conf + +# Copy entrypoint script +COPY scripts/docker/entrypoint-allinone.sh /app/entrypoint.sh +RUN dos2unix /app/entrypoint.sh && chmod +x /app/entrypoint.sh + +# PostgreSQL initialization script +COPY scripts/docker/init-postgres.sh /app/init-postgres.sh +RUN dos2unix /app/init-postgres.sh && chmod +x /app/init-postgres.sh + +# Clean up build dependencies to reduce image size +RUN apt-get purge -y build-essential postgresql-server-dev-14 git \ + && apt-get autoremove -y \ + && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* + +# Environment variables with defaults +ENV POSTGRES_USER=surfsense +ENV POSTGRES_PASSWORD=surfsense +ENV POSTGRES_DB=surfsense +ENV DATABASE_URL=postgresql+asyncpg://surfsense:surfsense@localhost:5432/surfsense +ENV CELERY_BROKER_URL=redis://localhost:6379/0 +ENV CELERY_RESULT_BACKEND=redis://localhost:6379/0 +ENV PYTHONPATH=/app/backend +ENV NEXT_FRONTEND_URL=http://localhost:3000 +ENV AUTH_TYPE=LOCAL +ENV ETL_SERVICE=DOCLING +ENV EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2 + +# Data volume +VOLUME ["/data"] + +# Expose ports +EXPOSE 3000 8000 + +# Health check +HEALTHCHECK --interval=30s --timeout=10s --start-period=120s --retries=3 \ + CMD curl -f http://localhost:3000 || exit 1 + +# Run entrypoint +CMD ["/app/entrypoint.sh"] diff --git a/README.md b/README.md index f97a5813f..e675c45c3 100644 --- a/README.md +++ b/README.md @@ -150,32 +150,88 @@ Check out our public roadmap and contribute your ideas or feedback: ## How to get started? +### Quick Start with Docker 🐳 + +> [!TIP] +> For production deployments, use the full [Docker Compose setup](https://www.surfsense.com/docs/docker-installation) which offers more control and scalability. + +**Linux/macOS:** + +```bash +docker run -d -p 3000:3000 -p 8000:8000 \ + -v surfsense-data:/data \ + --name surfsense \ + --restart unless-stopped \ + ghcr.io/modsetter/surfsense:latest +``` + +**Windows (PowerShell):** + +```powershell +docker run -d -p 3000:3000 -p 8000:8000 ` + -v surfsense-data:/data ` + --name surfsense ` + --restart unless-stopped ` + ghcr.io/modsetter/surfsense:latest +``` + +**With Custom Configuration (e.g., OpenAI Embeddings):** + +```bash +docker run -d -p 3000:3000 -p 8000:8000 \ + -v surfsense-data:/data \ + -e EMBEDDING_MODEL=openai://text-embedding-ada-002 \ + -e OPENAI_API_KEY=your_openai_api_key \ + --name surfsense \ + --restart unless-stopped \ + ghcr.io/modsetter/surfsense:latest +``` + +After starting, access SurfSense at: +- **Frontend**: [http://localhost:3000](http://localhost:3000) +- **Backend API**: [http://localhost:8000](http://localhost:8000) +- **API Docs**: [http://localhost:8000/docs](http://localhost:8000/docs) + +**Useful Commands:** + +```bash +docker logs -f surfsense # View logs +docker stop surfsense # Stop +docker start surfsense # Start +docker rm surfsense # Remove (data preserved in volume) +``` + ### Installation Options -SurfSense provides three options to get started: +SurfSense provides multiple options to get started: 1. **[SurfSense Cloud](https://www.surfsense.com/login)** - The easiest way to try SurfSense without any setup. - No installation required - Instant access to all features - Perfect for getting started quickly -2. **[Docker Installation (Recommended for Self-Hosting)](https://www.surfsense.net/docs/docker-installation)** - Easy way to get SurfSense up and running with all dependencies containerized. +2. **Quick Start Docker (Above)** - Single command to get SurfSense running locally. + - All-in-one image with PostgreSQL, Redis, and all services bundled + - Perfect for evaluation, development, and small deployments + - Data persisted via Docker volume + +3. **[Docker Compose (Production)](https://www.surfsense.com/docs/docker-installation)** - Full stack deployment with separate services. - Includes pgAdmin for database management through a web UI - Supports environment variable customization via `.env` file - Flexible deployment options (full stack or core services only) - - No need to manually edit configuration files between environments + - Better for production with separate scaling of services -3. **[Manual Installation](https://www.surfsense.net/docs/manual-installation)** - For users who prefer more control over their setup or need to customize their deployment. +4. **[Manual Installation](https://www.surfsense.com/docs/manual-installation)** - For users who prefer more control over their setup or need to customize their deployment. Docker and manual installation guides include detailed OS-specific instructions for Windows, macOS, and Linux. -Before self-hosting installation, make sure to complete the [prerequisite setup steps](https://www.surfsense.net/docs/) including: -- Auth setup -- **File Processing ETL Service** (choose one): +Before self-hosting installation, make sure to complete the [prerequisite setup steps](https://www.surfsense.com/docs/) including: +- Auth setup (optional - defaults to LOCAL auth) +- **File Processing ETL Service** (optional - defaults to Docling): + - Docling (default, local processing, no API key required, supports PDF, Office docs, images, HTML, CSV) - Unstructured.io API key (supports 34+ formats) - LlamaIndex API key (enhanced parsing, supports 50+ formats) - - Docling (local processing, no API key required, supports PDF, Office docs, images, HTML, CSV) -- Other required API keys +- Other API keys as needed for your use case ## Screenshots diff --git a/README.zh-CN.md b/README.zh-CN.md index 464242a4d..1c3a6b159 100644 --- a/README.zh-CN.md +++ b/README.zh-CN.md @@ -157,32 +157,88 @@ https://github.com/user-attachments/assets/a0a16566-6967-4374-ac51-9b3e07fbecd7 ## 如何开始? +### 使用 Docker 快速开始 🐳 + +> [!TIP] +> 对于生产部署,请使用完整的 [Docker Compose 设置](https://www.surfsense.com/docs/docker-installation),它提供更多控制和可扩展性。 + +**Linux/macOS:** + +```bash +docker run -d -p 3000:3000 -p 8000:8000 \ + -v surfsense-data:/data \ + --name surfsense \ + --restart unless-stopped \ + ghcr.io/modsetter/surfsense:latest +``` + +**Windows (PowerShell):** + +```powershell +docker run -d -p 3000:3000 -p 8000:8000 ` + -v surfsense-data:/data ` + --name surfsense ` + --restart unless-stopped ` + ghcr.io/modsetter/surfsense:latest +``` + +**使用自定义配置(例如 OpenAI 嵌入):** + +```bash +docker run -d -p 3000:3000 -p 8000:8000 \ + -v surfsense-data:/data \ + -e EMBEDDING_MODEL=openai://text-embedding-ada-002 \ + -e OPENAI_API_KEY=your_openai_api_key \ + --name surfsense \ + --restart unless-stopped \ + ghcr.io/modsetter/surfsense:latest +``` + +启动后,访问 SurfSense: +- **前端**: [http://localhost:3000](http://localhost:3000) +- **后端 API**: [http://localhost:8000](http://localhost:8000) +- **API 文档**: [http://localhost:8000/docs](http://localhost:8000/docs) + +**常用命令:** + +```bash +docker logs -f surfsense # 查看日志 +docker stop surfsense # 停止 +docker start surfsense # 启动 +docker rm surfsense # 删除(数据保留在卷中) +``` + ### 安装选项 -SurfSense 提供三种入门方式: +SurfSense 提供多种入门方式: 1. **[SurfSense Cloud](https://www.surfsense.com/login)** - 无需任何设置即可试用 SurfSense 的最简单方法。 - 无需安装 - 即时访问所有功能 - 非常适合快速上手 -2. **[Docker 安装(推荐用于自托管)](https://www.surfsense.net/docs/docker-installation)** - 通过容器化所有依赖项,轻松启动和运行 SurfSense。 +2. **快速启动 Docker(上述方法)** - 一条命令即可在本地运行 SurfSense。 + - 一体化镜像,捆绑 PostgreSQL、Redis 和所有服务 + - 非常适合评估、开发和小型部署 + - 数据通过 Docker 卷持久化 + +3. **[Docker Compose(生产环境)](https://www.surfsense.com/docs/docker-installation)** - 使用独立服务进行完整堆栈部署。 - 包含 pgAdmin,通过 Web UI 进行数据库管理 - 支持通过 `.env` 文件自定义环境变量 - 灵活的部署选项(完整堆栈或仅核心服务) - - 无需在环境之间手动编辑配置文件 + - 更适合生产环境,支持独立扩展服务 -3. **[手动安装](https://www.surfsense.net/docs/manual-installation)** - 适合希望对设置有更多控制或需要自定义部署的用户。 +4. **[手动安装](https://www.surfsense.com/docs/manual-installation)** - 适合希望对设置有更多控制或需要自定义部署的用户。 Docker 和手动安装指南都包含适用于 Windows、macOS 和 Linux 的详细操作系统特定说明。 -在自托管安装之前,请确保完成[先决条件设置步骤](https://www.surfsense.net/docs/),包括: -- 身份验证设置 -- **文件处理 ETL 服务**(选择其一): +在自托管安装之前,请确保完成[先决条件设置步骤](https://www.surfsense.com/docs/),包括: +- 身份验证设置(可选 - 默认为 LOCAL 身份验证) +- **文件处理 ETL 服务**(可选 - 默认为 Docling): + - Docling(默认,本地处理,无需 API 密钥,支持 PDF、Office 文档、图像、HTML、CSV) - Unstructured.io API 密钥(支持 34+ 种格式) - LlamaIndex API 密钥(增强解析,支持 50+ 种格式) - - Docling(本地处理,无需 API 密钥,支持 PDF、Office 文档、图像、HTML、CSV) -- 其他所需的 API 密钥 +- 其他根据用例需要的 API 密钥 ## 截图 diff --git a/docker-compose.quickstart.yml b/docker-compose.quickstart.yml new file mode 100644 index 000000000..ff72618b7 --- /dev/null +++ b/docker-compose.quickstart.yml @@ -0,0 +1,80 @@ +# SurfSense Quick Start Docker Compose +# +# This is a simplified docker-compose for quick local deployment using pre-built images. +# For production or customized deployments, use the main docker-compose.yml +# +# Usage: +# 1. (Optional) Create a .env file with your configuration +# 2. Run: docker compose -f docker-compose.quickstart.yml up -d +# 3. Access SurfSense at http://localhost:3000 +# +# All Environment Variables are Optional: +# - SECRET_KEY: JWT secret key (auto-generated and persisted if not set) +# - EMBEDDING_MODEL: Embedding model to use (default: sentence-transformers/all-MiniLM-L6-v2) +# - ETL_SERVICE: Document parsing service - DOCLING, UNSTRUCTURED, or LLAMACLOUD (default: DOCLING) +# - TTS_SERVICE: Text-to-speech service for podcasts (default: local/kokoro) +# - STT_SERVICE: Speech-to-text service with model size (default: local/base) +# - FIRECRAWL_API_KEY: For web crawling features + +version: "3.8" + +services: + # All-in-one SurfSense container + surfsense: + image: ghcr.io/modsetter/surfsense:latest + container_name: surfsense + ports: + - "${FRONTEND_PORT:-3000}:3000" + - "${BACKEND_PORT:-8000}:8000" + volumes: + - surfsense-data:/data + environment: + # Authentication (auto-generated if not set) + - SECRET_KEY=${SECRET_KEY:-} + + # Auth Configuration + - AUTH_TYPE=${AUTH_TYPE:-LOCAL} + - GOOGLE_OAUTH_CLIENT_ID=${GOOGLE_OAUTH_CLIENT_ID:-} + - GOOGLE_OAUTH_CLIENT_SECRET=${GOOGLE_OAUTH_CLIENT_SECRET:-} + + # AI/ML Configuration + - EMBEDDING_MODEL=${EMBEDDING_MODEL:-sentence-transformers/all-MiniLM-L6-v2} + - RERANKERS_ENABLED=${RERANKERS_ENABLED:-FALSE} + - RERANKERS_MODEL_NAME=${RERANKERS_MODEL_NAME:-} + - RERANKERS_MODEL_TYPE=${RERANKERS_MODEL_TYPE:-} + + # Document Processing + - ETL_SERVICE=${ETL_SERVICE:-DOCLING} + - UNSTRUCTURED_API_KEY=${UNSTRUCTURED_API_KEY:-} + - LLAMA_CLOUD_API_KEY=${LLAMA_CLOUD_API_KEY:-} + + # Audio Services + - TTS_SERVICE=${TTS_SERVICE:-local/kokoro} + - TTS_SERVICE_API_KEY=${TTS_SERVICE_API_KEY:-} + - STT_SERVICE=${STT_SERVICE:-local/base} + - STT_SERVICE_API_KEY=${STT_SERVICE_API_KEY:-} + + # Web Crawling + - FIRECRAWL_API_KEY=${FIRECRAWL_API_KEY:-} + + # Optional Features + - REGISTRATION_ENABLED=${REGISTRATION_ENABLED:-TRUE} + - SCHEDULE_CHECKER_INTERVAL=${SCHEDULE_CHECKER_INTERVAL:-1m} + + # LangSmith Observability (optional) + - LANGSMITH_TRACING=${LANGSMITH_TRACING:-false} + - LANGSMITH_ENDPOINT=${LANGSMITH_ENDPOINT:-} + - LANGSMITH_API_KEY=${LANGSMITH_API_KEY:-} + - LANGSMITH_PROJECT=${LANGSMITH_PROJECT:-} + restart: unless-stopped + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:3000", "&&", "curl", "-f", "http://localhost:8000/docs"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 120s + +volumes: + surfsense-data: + name: surfsense-data + diff --git a/scripts/docker/entrypoint-allinone.sh b/scripts/docker/entrypoint-allinone.sh new file mode 100644 index 000000000..427256f6d --- /dev/null +++ b/scripts/docker/entrypoint-allinone.sh @@ -0,0 +1,152 @@ +#!/bin/bash +set -e + +echo "===========================================" +echo " 🏄 SurfSense All-in-One Container" +echo "===========================================" + +# Create log directory +mkdir -p /var/log/supervisor + +# ================================================ +# Ensure data directory exists +# ================================================ +mkdir -p /data + +# ================================================ +# Generate SECRET_KEY if not provided +# ================================================ +if [ -z "$SECRET_KEY" ]; then + # Generate a random secret key and persist it + if [ -f /data/.secret_key ]; then + export SECRET_KEY=$(cat /data/.secret_key) + echo "✅ Using existing SECRET_KEY from persistent storage" + else + export SECRET_KEY=$(python3 -c "import secrets; print(secrets.token_urlsafe(32))") + echo "$SECRET_KEY" > /data/.secret_key + chmod 600 /data/.secret_key + echo "✅ Generated new SECRET_KEY (saved for persistence)" + fi +fi + +# ================================================ +# Set default TTS/STT services if not provided +# ================================================ +if [ -z "$TTS_SERVICE" ]; then + export TTS_SERVICE="local/kokoro" + echo "✅ Using default TTS_SERVICE: local/kokoro" +fi + +if [ -z "$STT_SERVICE" ]; then + export STT_SERVICE="local/base" + echo "✅ Using default STT_SERVICE: local/base" +fi + +# ================================================ +# Initialize PostgreSQL if needed +# ================================================ +if [ ! -f /data/postgres/PG_VERSION ]; then + echo "📦 Initializing PostgreSQL database..." + + # Initialize PostgreSQL data directory + chown -R postgres:postgres /data/postgres + chmod 700 /data/postgres + + # Initialize with UTF8 encoding (required for proper text handling) + su - postgres -c "/usr/lib/postgresql/14/bin/initdb -D /data/postgres --encoding=UTF8 --locale=C.UTF-8" + + # Configure PostgreSQL for connections + echo "host all all 0.0.0.0/0 md5" >> /data/postgres/pg_hba.conf + echo "local all all trust" >> /data/postgres/pg_hba.conf + echo "listen_addresses='*'" >> /data/postgres/postgresql.conf + + # Start PostgreSQL temporarily to create database and user + su - postgres -c "/usr/lib/postgresql/14/bin/pg_ctl -D /data/postgres -l /tmp/postgres_init.log start" + + # Wait for PostgreSQL to be ready + sleep 5 + + # Create user and database + su - postgres -c "psql -c \"CREATE USER ${POSTGRES_USER:-surfsense} WITH PASSWORD '${POSTGRES_PASSWORD:-surfsense}' SUPERUSER;\"" + su - postgres -c "psql -c \"CREATE DATABASE ${POSTGRES_DB:-surfsense} OWNER ${POSTGRES_USER:-surfsense};\"" + + # Enable pgvector extension + su - postgres -c "psql -d ${POSTGRES_DB:-surfsense} -c 'CREATE EXTENSION IF NOT EXISTS vector;'" + + # Stop temporary PostgreSQL + su - postgres -c "/usr/lib/postgresql/14/bin/pg_ctl -D /data/postgres stop" + + echo "✅ PostgreSQL initialized successfully" +else + echo "✅ PostgreSQL data directory already exists" +fi + +# ================================================ +# Initialize Redis data directory +# ================================================ +mkdir -p /data/redis +chmod 755 /data/redis +echo "✅ Redis data directory ready" + +# ================================================ +# Copy frontend build to runtime location +# ================================================ +if [ -d /app/frontend/.next/standalone ]; then + cp -r /app/frontend/.next/standalone/* /app/frontend/ 2>/dev/null || true + cp -r /app/frontend/.next/static /app/frontend/.next/static 2>/dev/null || true +fi + +# ================================================ +# Run database migrations +# ================================================ +run_migrations() { + echo "🔄 Running database migrations..." + + # Start PostgreSQL temporarily for migrations + su - postgres -c "/usr/lib/postgresql/14/bin/pg_ctl -D /data/postgres -l /tmp/postgres_migrate.log start" + sleep 5 + + # Start Redis temporarily for migrations (some might need it) + redis-server --dir /data/redis --daemonize yes + sleep 2 + + # Run alembic migrations + cd /app/backend + alembic upgrade head || echo "⚠️ Migrations may have already been applied" + + # Stop temporary services + redis-cli shutdown || true + su - postgres -c "/usr/lib/postgresql/14/bin/pg_ctl -D /data/postgres stop" + + echo "✅ Database migrations complete" +} + +# Run migrations on first start or when explicitly requested +if [ ! -f /data/.migrations_run ] || [ "${FORCE_MIGRATIONS:-false}" = "true" ]; then + run_migrations + touch /data/.migrations_run +fi + +# ================================================ +# Environment Variables Info +# ================================================ +echo "" +echo "===========================================" +echo " 📋 Configuration" +echo "===========================================" +echo " Frontend URL: http://localhost:3000" +echo " Backend API: http://localhost:8000" +echo " API Docs: http://localhost:8000/docs" +echo " Auth Type: ${AUTH_TYPE:-LOCAL}" +echo " ETL Service: ${ETL_SERVICE:-DOCLING}" +echo " TTS Service: ${TTS_SERVICE}" +echo " STT Service: ${STT_SERVICE}" +echo "===========================================" +echo "" + +# ================================================ +# Start Supervisor (manages all services) +# ================================================ +echo "🚀 Starting all services..." +exec /usr/local/bin/supervisord -c /etc/supervisor/conf.d/surfsense.conf + diff --git a/scripts/docker/init-postgres.sh b/scripts/docker/init-postgres.sh new file mode 100644 index 000000000..3d2a15f46 --- /dev/null +++ b/scripts/docker/init-postgres.sh @@ -0,0 +1,54 @@ +#!/bin/bash +# PostgreSQL initialization script for SurfSense +# This script is called during container startup if the database needs initialization + +set -e + +PGDATA=${PGDATA:-/data/postgres} +POSTGRES_USER=${POSTGRES_USER:-surfsense} +POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-surfsense} +POSTGRES_DB=${POSTGRES_DB:-surfsense} + +echo "Initializing PostgreSQL..." + +# Check if PostgreSQL is already initialized +if [ -f "$PGDATA/PG_VERSION" ]; then + echo "PostgreSQL data directory already exists. Skipping initialization." + exit 0 +fi + +# Initialize the database cluster +/usr/lib/postgresql/14/bin/initdb -D "$PGDATA" --username=postgres + +# Configure PostgreSQL +cat >> "$PGDATA/postgresql.conf" << EOF +listen_addresses = '*' +max_connections = 100 +shared_buffers = 128MB +EOF + +cat >> "$PGDATA/pg_hba.conf" << EOF +# Allow connections from anywhere with password +host all all 0.0.0.0/0 md5 +host all all ::0/0 md5 +EOF + +# Start PostgreSQL temporarily +/usr/lib/postgresql/14/bin/pg_ctl -D "$PGDATA" -l /tmp/postgres_init.log start + +# Wait for PostgreSQL to start +sleep 3 + +# Create user and database +psql -U postgres << EOF +CREATE USER $POSTGRES_USER WITH PASSWORD '$POSTGRES_PASSWORD' SUPERUSER; +CREATE DATABASE $POSTGRES_DB OWNER $POSTGRES_USER; +\c $POSTGRES_DB +CREATE EXTENSION IF NOT EXISTS vector; +EOF + +echo "PostgreSQL initialized successfully." + +# Stop PostgreSQL (supervisor will start it) +/usr/lib/postgresql/14/bin/pg_ctl -D "$PGDATA" stop + diff --git a/scripts/docker/supervisor-allinone.conf b/scripts/docker/supervisor-allinone.conf new file mode 100644 index 000000000..6cada0dc2 --- /dev/null +++ b/scripts/docker/supervisor-allinone.conf @@ -0,0 +1,107 @@ +[supervisord] +nodaemon=true +logfile=/dev/stdout +logfile_maxbytes=0 +pidfile=/var/run/supervisord.pid +loglevel=info +user=root + +[unix_http_server] +file=/var/run/supervisor.sock +chmod=0700 + +[rpcinterface:supervisor] +supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface + +[supervisorctl] +serverurl=unix:///var/run/supervisor.sock + +# PostgreSQL +[program:postgresql] +command=/usr/lib/postgresql/14/bin/postgres -D /data/postgres +user=postgres +autostart=true +autorestart=true +priority=10 +stdout_logfile=/dev/stdout +stdout_logfile_maxbytes=0 +stderr_logfile=/dev/stderr +stderr_logfile_maxbytes=0 +environment=PGDATA="/data/postgres" + +# Redis +[program:redis] +command=/usr/bin/redis-server --dir /data/redis --appendonly yes +autostart=true +autorestart=true +priority=20 +stdout_logfile=/dev/stdout +stdout_logfile_maxbytes=0 +stderr_logfile=/dev/stderr +stderr_logfile_maxbytes=0 + +# Backend API +[program:backend] +command=python main.py +directory=/app/backend +autostart=true +autorestart=true +priority=30 +startsecs=10 +startretries=3 +stdout_logfile=/dev/stdout +stdout_logfile_maxbytes=0 +stderr_logfile=/dev/stderr +stderr_logfile_maxbytes=0 +environment=PYTHONPATH="/app/backend",UVICORN_LOOP="asyncio",UNSTRUCTURED_HAS_PATCHED_LOOP="1" + +# Celery Worker +[program:celery-worker] +command=celery -A app.celery_app worker --loglevel=info --concurrency=2 --pool=solo +directory=/app/backend +autostart=true +autorestart=true +priority=40 +startsecs=15 +startretries=3 +stdout_logfile=/dev/stdout +stdout_logfile_maxbytes=0 +stderr_logfile=/dev/stderr +stderr_logfile_maxbytes=0 +environment=PYTHONPATH="/app/backend" + +# Celery Beat (scheduler) +[program:celery-beat] +command=celery -A app.celery_app beat --loglevel=info +directory=/app/backend +autostart=true +autorestart=true +priority=50 +startsecs=20 +startretries=3 +stdout_logfile=/dev/stdout +stdout_logfile_maxbytes=0 +stderr_logfile=/dev/stderr +stderr_logfile_maxbytes=0 +environment=PYTHONPATH="/app/backend" + +# Frontend +[program:frontend] +command=node server.js +directory=/app/frontend +autostart=true +autorestart=true +priority=60 +startsecs=5 +startretries=3 +stdout_logfile=/dev/stdout +stdout_logfile_maxbytes=0 +stderr_logfile=/dev/stderr +stderr_logfile_maxbytes=0 +environment=NODE_ENV="production",PORT="3000",HOSTNAME="0.0.0.0" + +# Process Groups +[group:surfsense] +programs=postgresql,redis,backend,celery-worker,celery-beat,frontend +priority=999 + diff --git a/surfsense_browser_extension/routes/pages/ApiKeyForm.tsx b/surfsense_browser_extension/routes/pages/ApiKeyForm.tsx index 2c8a7f286..b6deb1c05 100644 --- a/surfsense_browser_extension/routes/pages/ApiKeyForm.tsx +++ b/surfsense_browser_extension/routes/pages/ApiKeyForm.tsx @@ -103,7 +103,7 @@ const ApiKeyForm = () => {

Need an API key?{" "} { className="grid grid-cols-1 md:grid-cols-3 gap-6 mb-10" > router.push(`/dashboard/${searchSpaceId}/team`)} - colorScheme="emerald" - delay={0.7} + buttonText="Start Chatting" + onClick={() => router.push(`/dashboard/${searchSpaceId}/researcher`)} + colorScheme="violet" + delay={0.9} /> { /> router.push(`/dashboard/${searchSpaceId}/researcher`)} - colorScheme="violet" - delay={0.9} + buttonText="Manage Team" + onClick={() => router.push(`/dashboard/${searchSpaceId}/team`)} + colorScheme="emerald" + delay={0.7} /> diff --git a/surfsense_web/app/layout.tsx b/surfsense_web/app/layout.tsx index 23ba616cc..54086194b 100644 --- a/surfsense_web/app/layout.tsx +++ b/surfsense_web/app/layout.tsx @@ -50,12 +50,12 @@ export const metadata: Metadata = { title: "SurfSense – AI Research & Knowledge Management Assistant", description: "Connect your documents and tools like Notion, Slack, GitHub, and more to your private AI assistant. SurfSense offers powerful search, document chat, podcast generation, and RAG APIs to enhance your workflow.", - url: "https://surfsense.net", + url: "https://surfsense.com", siteName: "SurfSense", type: "website", images: [ { - url: "https://surfsense.net/og-image.png", + url: "https://surfsense.com/og-image.png", width: 1200, height: 630, alt: "SurfSense AI Research Assistant", @@ -68,11 +68,11 @@ export const metadata: Metadata = { title: "SurfSense – AI Assistant for Research & Knowledge Management", description: "Have your own NotebookLM or Perplexity, but better. SurfSense connects external tools, allows chat with your documents, and generates fast, high-quality podcasts.", - creator: "https://surfsense.net", - site: "https://surfsense.net", + creator: "https://surfsense.com", + site: "https://surfsense.com", images: [ { - url: "https://surfsense.net/og-image-twitter.png", + url: "https://surfsense.com/og-image-twitter.png", width: 1200, height: 630, alt: "SurfSense AI Assistant Preview", diff --git a/surfsense_web/components/onboard/setup-llm-step.tsx b/surfsense_web/components/onboard/setup-llm-step.tsx index 9735061ee..41cc5be99 100644 --- a/surfsense_web/components/onboard/setup-llm-step.tsx +++ b/surfsense_web/components/onboard/setup-llm-step.tsx @@ -521,21 +521,65 @@ export function SetupLLMStep({ handleInputChange("api_key", e.target.value)} required /> + {formData.provider === "OLLAMA" && ( +

+ 💡 Ollama doesn't require authentication — enter any value (e.g., + "ollama") +

+ )}
handleInputChange("api_base", e.target.value)} /> + {/* Ollama-specific help */} + {formData.provider === "OLLAMA" && ( +
+

+ 💡 Ollama API Base URL Examples: +

+
+ + +
+
+ )}
diff --git a/surfsense_web/components/search-space-form.tsx b/surfsense_web/components/search-space-form.tsx index ccb290dc8..d683772ef 100644 --- a/surfsense_web/components/search-space-form.tsx +++ b/surfsense_web/components/search-space-form.tsx @@ -36,7 +36,7 @@ import { cn } from "@/lib/utils"; // Define the form schema with Zod const searchSpaceFormSchema = z.object({ - name: z.string().min(3, "Name must be at least 3 characters"), + name: z.string().min(1, "Name is required"), description: z.string().optional(), }); diff --git a/surfsense_web/components/settings/model-config-manager.tsx b/surfsense_web/components/settings/model-config-manager.tsx index 16bd57e71..3bd871135 100644 --- a/surfsense_web/components/settings/model-config-manager.tsx +++ b/surfsense_web/components/settings/model-config-manager.tsx @@ -677,11 +677,18 @@ export function ModelConfigManager({ searchSpaceId }: ModelConfigManagerProps) { handleInputChange("api_key", e.target.value)} required /> + {formData.provider === "OLLAMA" && ( +

+ 💡 Ollama doesn't require authentication — enter any value (e.g., "ollama") +

+ )}
@@ -718,6 +725,36 @@ export function ModelConfigManager({ searchSpaceId }: ModelConfigManagerProps) {

)} + {/* Ollama-specific help */} + {formData.provider === "OLLAMA" && ( +
+

💡 Ollama API Base URL Examples:

+
+ + +
+
+ )}
{/* Optional Inference Parameters */} diff --git a/surfsense_web/content/docs/docker-installation.mdx b/surfsense_web/content/docs/docker-installation.mdx index 46ef4128b..32532725b 100644 --- a/surfsense_web/content/docs/docker-installation.mdx +++ b/surfsense_web/content/docs/docker-installation.mdx @@ -8,7 +8,131 @@ full: true # Docker Installation -This guide explains how to run SurfSense using Docker Compose, which is the preferred and recommended method for deployment. +This guide explains how to run SurfSense using Docker, with options ranging from quick single-command deployment to full production setups. + +## Quick Start with Docker 🐳 + +Get SurfSense running in seconds with a single command: + + +The all-in-one Docker image bundles PostgreSQL (with pgvector), Redis, and all SurfSense services. Perfect for quick evaluation and development. + + + +Make sure to include the `-v surfsense-data:/data` in your Docker command. This ensures your database and files are properly persisted. + + +### One-Line Installation + +**Linux/macOS:** + +```bash +docker run -d -p 3000:3000 -p 8000:8000 \ + -v surfsense-data:/data \ + --name surfsense \ + --restart unless-stopped \ + ghcr.io/modsetter/surfsense:latest +``` + +**Windows (PowerShell):** + +```powershell +docker run -d -p 3000:3000 -p 8000:8000 ` + -v surfsense-data:/data ` + --name surfsense ` + --restart unless-stopped ` + ghcr.io/modsetter/surfsense:latest +``` + +> **Note:** A secure `SECRET_KEY` is automatically generated and persisted in the data volume on first run. + +### With Custom Configuration + +**Using OpenAI Embeddings:** + +```bash +docker run -d -p 3000:3000 -p 8000:8000 \ + -v surfsense-data:/data \ + -e EMBEDDING_MODEL=openai://text-embedding-ada-002 \ + -e OPENAI_API_KEY=your_openai_api_key \ + --name surfsense \ + --restart unless-stopped \ + ghcr.io/modsetter/surfsense:latest +``` + +**With Google OAuth:** + +```bash +docker run -d -p 3000:3000 -p 8000:8000 \ + -v surfsense-data:/data \ + -e AUTH_TYPE=GOOGLE \ + -e GOOGLE_OAUTH_CLIENT_ID=your_client_id \ + -e GOOGLE_OAUTH_CLIENT_SECRET=your_client_secret \ + --name surfsense \ + --restart unless-stopped \ + ghcr.io/modsetter/surfsense:latest +``` + +### Quick Start with Docker Compose + +For easier management with environment files: + +```bash +# Download the quick start compose file +curl -o docker-compose.yml https://raw.githubusercontent.com/MODSetter/SurfSense/main/docker-compose.quickstart.yml + +# Create .env file (optional - for custom configuration) +cat > .env << EOF +# EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2 +# ETL_SERVICE=DOCLING +# SECRET_KEY=your_custom_secret_key # Auto-generated if not set +EOF + +# Start SurfSense +docker compose up -d +``` + +After starting, access SurfSense at: +- **Frontend**: [http://localhost:3000](http://localhost:3000) +- **Backend API**: [http://localhost:8000](http://localhost:8000) +- **API Docs**: [http://localhost:8000/docs](http://localhost:8000/docs) + +### Quick Start Environment Variables + +| Variable | Description | Default | +|----------|-------------|---------| +| SECRET_KEY | JWT secret key (auto-generated if not set) | Auto-generated | +| AUTH_TYPE | Authentication: `LOCAL` or `GOOGLE` | LOCAL | +| EMBEDDING_MODEL | Model for embeddings | sentence-transformers/all-MiniLM-L6-v2 | +| ETL_SERVICE | Document parser: `DOCLING`, `UNSTRUCTURED`, `LLAMACLOUD` | DOCLING | +| TTS_SERVICE | Text-to-speech for podcasts | local/kokoro | +| STT_SERVICE | Speech-to-text for audio (model size: tiny, base, small, medium, large) | local/base | +| REGISTRATION_ENABLED | Allow new user registration | TRUE | + +### Useful Commands + +```bash +# View logs +docker logs -f surfsense + +# Stop SurfSense +docker stop surfsense + +# Start SurfSense +docker start surfsense + +# Remove container (data preserved in volume) +docker rm surfsense + +# Remove container AND data +docker rm surfsense && docker volume rm surfsense-data +``` + +--- + +## Full Docker Compose Setup (Production) + +For production deployments with separate services and more control, use the full Docker Compose setup below. ## Prerequisites diff --git a/surfsense_web/contracts/enums/llm-providers.ts b/surfsense_web/contracts/enums/llm-providers.ts index cbe33d840..40b7ee2df 100644 --- a/surfsense_web/contracts/enums/llm-providers.ts +++ b/surfsense_web/contracts/enums/llm-providers.ts @@ -109,6 +109,7 @@ export const LLM_PROVIDERS: LLMProvider[] = [ label: "Ollama", example: "ollama/llama3.1, ollama/mistral", description: "Run models locally", + apiBase: "http://localhost:11434", }, { value: "ALIBABA_QWEN",