mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-06-08 20:25:19 +02:00
feat(docker): add GPU support and enhance installation scripts
- Introduced a new docker-compose.gpu.yml file to define GPU resource reservations for backend services. - Updated .env.example to include GPU-related environment variables and usage instructions. - Enhanced install.ps1 and install.sh scripts to support GPU variant selection and validation for GPU count. - Improved error handling and user feedback for invalid GPU configurations.
This commit is contained in:
parent
7e4077d67a
commit
c5afce3873
4 changed files with 401 additions and 264 deletions
|
|
@ -8,8 +8,14 @@
|
|||
SURFSENSE_VERSION=latest
|
||||
|
||||
# Image variant: empty = CPU (default), "cuda" = CUDA 12.8, "cuda126" = CUDA 12.6.
|
||||
# NOTE: this only selects the GPU-built image. GPU device access lands in Phase 3.
|
||||
# GPU acceleration also requires the NVIDIA Container Toolkit on the host and
|
||||
# the GPU overlay in COMPOSE_FILE. Linux/macOS use ":"; Windows uses ";".
|
||||
# Example Linux/macOS: COMPOSE_FILE=docker-compose.yml:docker-compose.gpu.yml
|
||||
# Example Windows: COMPOSE_FILE=docker-compose.yml;docker-compose.gpu.yml
|
||||
# Use "cuda126" for older NVIDIA driver stacks; use "cuda" for newer drivers.
|
||||
SURFSENSE_VARIANT=
|
||||
# COMPOSE_FILE=docker-compose.yml:docker-compose.gpu.yml
|
||||
# SURFSENSE_GPU_COUNT=1
|
||||
|
||||
# Deployment environment: dev or production
|
||||
SURFSENSE_ENV=production
|
||||
|
|
|
|||
30
docker/docker-compose.gpu.yml
Normal file
30
docker/docker-compose.gpu.yml
Normal file
|
|
@ -0,0 +1,30 @@
|
|||
services:
|
||||
backend:
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: ${SURFSENSE_GPU_DRIVER:-nvidia}
|
||||
count: ${SURFSENSE_GPU_COUNT:-1}
|
||||
capabilities:
|
||||
- gpu
|
||||
|
||||
celery_worker:
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: ${SURFSENSE_GPU_DRIVER:-nvidia}
|
||||
count: ${SURFSENSE_GPU_COUNT:-1}
|
||||
capabilities:
|
||||
- gpu
|
||||
|
||||
celery_beat:
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: ${SURFSENSE_GPU_DRIVER:-nvidia}
|
||||
count: ${SURFSENSE_GPU_COUNT:-1}
|
||||
capabilities:
|
||||
- gpu
|
||||
|
|
@ -7,6 +7,8 @@
|
|||
# To pass flags, save and run locally:
|
||||
# .\install.ps1 -NoWatchtower
|
||||
# .\install.ps1 -WatchtowerInterval 3600
|
||||
# .\install.ps1 -Variant cuda
|
||||
# .\install.ps1 -Variant cuda -GpuCount all
|
||||
#
|
||||
# Handles two cases automatically:
|
||||
# 1. Fresh install — no prior SurfSense data detected
|
||||
|
|
@ -17,7 +19,11 @@
|
|||
|
||||
param(
|
||||
[switch]$NoWatchtower,
|
||||
[int]$WatchtowerInterval = 86400
|
||||
[int]$WatchtowerInterval = 86400,
|
||||
[ValidateSet("cpu", "cuda", "cuda126")]
|
||||
[string]$Variant,
|
||||
[string]$GpuCount,
|
||||
[switch]$Quiet
|
||||
)
|
||||
|
||||
$ErrorActionPreference = 'Stop'
|
||||
|
|
@ -34,6 +40,11 @@ $MigrationMode = $false
|
|||
$SetupWatchtower = -not $NoWatchtower
|
||||
$WatchtowerContainer = "watchtower"
|
||||
|
||||
if ($GpuCount -and $GpuCount -notmatch '^([0-9]+|all)$') {
|
||||
Write-Host "[SurfSense] ERROR: Invalid -GpuCount '$GpuCount'. Use a number or 'all'." -ForegroundColor Red
|
||||
exit 1
|
||||
}
|
||||
|
||||
# ── Output helpers ──────────────────────────────────────────────────────────
|
||||
|
||||
function Write-Info { param([string]$Msg) Write-Host "[SurfSense] " -ForegroundColor Cyan -NoNewline; Write-Host $Msg }
|
||||
|
|
@ -97,101 +108,7 @@ function Wait-ForPostgres {
|
|||
Write-Ok "PostgreSQL is ready."
|
||||
}
|
||||
|
||||
# ── Stack health helpers ────────────────────────────────────────────────────
|
||||
|
||||
function Get-ComposeServices {
|
||||
Push-Location $InstallDir
|
||||
try {
|
||||
$raw = Invoke-NativeSafe { docker compose ps -a --format json 2>$null }
|
||||
} finally {
|
||||
Pop-Location
|
||||
}
|
||||
if ([string]::IsNullOrWhiteSpace($raw)) { return @() }
|
||||
|
||||
# Compose v2.21+ emits a JSON array; older versions emit one object per line.
|
||||
try {
|
||||
$parsed = $raw | ConvertFrom-Json
|
||||
if ($parsed -is [System.Collections.IEnumerable] -and -not ($parsed -is [string])) {
|
||||
return @($parsed)
|
||||
}
|
||||
return @($parsed)
|
||||
} catch {
|
||||
$services = @()
|
||||
foreach ($line in ($raw -split "`r?`n")) {
|
||||
$line = $line.Trim()
|
||||
if (-not $line) { continue }
|
||||
try { $services += ($line | ConvertFrom-Json) } catch { }
|
||||
}
|
||||
return $services
|
||||
}
|
||||
}
|
||||
|
||||
function Wait-StackHealthy {
|
||||
param([int]$TimeoutSec = 300)
|
||||
|
||||
$deadline = (Get-Date).AddSeconds($TimeoutSec)
|
||||
$lastReport = ""
|
||||
|
||||
while ((Get-Date) -lt $deadline) {
|
||||
$services = Get-ComposeServices
|
||||
if (-not $services -or $services.Count -eq 0) {
|
||||
Start-Sleep -Seconds 3
|
||||
continue
|
||||
}
|
||||
|
||||
$bad = @()
|
||||
$waiting = @()
|
||||
$good = @()
|
||||
|
||||
foreach ($svc in $services) {
|
||||
$name = $svc.Service
|
||||
$state = $svc.State
|
||||
$health = if ($svc.PSObject.Properties.Name -contains 'Health') { $svc.Health } else { '' }
|
||||
$exit = if ($svc.PSObject.Properties.Name -contains 'ExitCode') { $svc.ExitCode } else { $null }
|
||||
|
||||
if ($name -eq 'migrations') {
|
||||
if ($state -eq 'exited' -and $exit -eq 0) { $good += $name }
|
||||
elseif ($state -eq 'exited') { $bad += "${name} (exit=${exit})" }
|
||||
else { $waiting += "${name} (${state})" }
|
||||
continue
|
||||
}
|
||||
|
||||
if ($state -eq 'running') {
|
||||
if ([string]::IsNullOrEmpty($health) -or $health -eq 'healthy') {
|
||||
$good += $name
|
||||
} elseif ($health -eq 'starting') {
|
||||
$waiting += "${name} (starting)"
|
||||
} elseif ($health -eq 'unhealthy') {
|
||||
$bad += "${name} (unhealthy)"
|
||||
} else {
|
||||
$waiting += "${name} (${health})"
|
||||
}
|
||||
} elseif ($state -eq 'restarting') {
|
||||
$bad += "${name} (restarting)"
|
||||
} elseif ($state -eq 'exited') {
|
||||
$bad += "${name} (exited, code=${exit})"
|
||||
} else {
|
||||
$waiting += "${name} (${state})"
|
||||
}
|
||||
}
|
||||
|
||||
if ($bad.Count -gt 0) {
|
||||
return @{ Ok = $false; Reason = 'failure'; Bad = $bad; Waiting = $waiting; Good = $good }
|
||||
}
|
||||
if ($waiting.Count -eq 0) {
|
||||
return @{ Ok = $true; Reason = 'all_healthy'; Good = $good }
|
||||
}
|
||||
|
||||
$report = "Waiting on: " + ($waiting -join ', ')
|
||||
if ($report -ne $lastReport) {
|
||||
Write-Info $report
|
||||
$lastReport = $report
|
||||
}
|
||||
Start-Sleep -Seconds 5
|
||||
}
|
||||
|
||||
return @{ Ok = $false; Reason = 'timeout'; Bad = $bad; Waiting = $waiting; Good = $good }
|
||||
}
|
||||
# ── Stack startup helper ────────────────────────────────────────────────────
|
||||
|
||||
function Test-StaleZeroCacheVolume {
|
||||
$raw = Invoke-NativeSafe { docker volume ls --format '{{.Name}}' 2>$null }
|
||||
|
|
@ -221,19 +138,9 @@ function Invoke-StaleZeroCacheCleanup {
|
|||
Write-Ok "Removed surfsense-zero-cache volume; zero-cache will re-sync on next start."
|
||||
}
|
||||
|
||||
function Write-Err-NoExit {
|
||||
param([string]$Message)
|
||||
Write-Host "[ERROR] $Message" -ForegroundColor Red
|
||||
}
|
||||
|
||||
function Invoke-StackFailureReport {
|
||||
param([hashtable]$Result)
|
||||
|
||||
Write-Host ""
|
||||
Write-Err-NoExit "Stack did not reach a healthy state."
|
||||
if ($Result.Bad.Count -gt 0) { Write-Host (" Failed: " + ($Result.Bad -join ', ')) }
|
||||
if ($Result.Waiting.Count -gt 0) { Write-Host (" Stuck: " + ($Result.Waiting -join ', ')) }
|
||||
|
||||
Write-Host "[ERROR] Stack did not reach a healthy state." -ForegroundColor Red
|
||||
Write-Host ""
|
||||
Write-Info "Recent logs from migrations / zero-cache / backend:"
|
||||
Push-Location $InstallDir
|
||||
|
|
@ -247,11 +154,151 @@ function Invoke-StackFailureReport {
|
|||
Write-Host "Recovery hints:" -ForegroundColor Yellow
|
||||
Write-Host " 1. Inspect migrations: cd $InstallDir; docker compose logs migrations"
|
||||
Write-Host " 2. Verify publication: cd $InstallDir; docker compose exec db psql -U surfsense -d surfsense -c 'SELECT pubname FROM pg_publication;'"
|
||||
Write-Host " 3. Hard reset zero db: cd $InstallDir; docker compose down; docker volume rm surfsense-zero-cache; docker compose up -d"
|
||||
Write-Host " 3. Hard reset zero db: cd $InstallDir; docker compose down; docker volume rm surfsense-zero-cache; docker compose up -d --wait"
|
||||
Write-Host ""
|
||||
exit 1
|
||||
}
|
||||
|
||||
function Invoke-ComposeUpWait {
|
||||
Push-Location $InstallDir
|
||||
try {
|
||||
Invoke-NativeSafe { docker compose up -d --wait }
|
||||
} finally {
|
||||
Pop-Location
|
||||
}
|
||||
if ($LASTEXITCODE -ne 0) {
|
||||
Invoke-StackFailureReport
|
||||
}
|
||||
}
|
||||
|
||||
# ── Variant and .env helpers ────────────────────────────────────────────────
|
||||
|
||||
function Set-EnvValue {
|
||||
param([string]$Path, [string]$Key, [string]$Value)
|
||||
$lines = @()
|
||||
if (Test-Path $Path) {
|
||||
$lines = @(Get-Content $Path)
|
||||
}
|
||||
$updated = $false
|
||||
$newLines = foreach ($line in $lines) {
|
||||
if ($line -match "^$([regex]::Escape($Key))=") {
|
||||
$updated = $true
|
||||
"$Key=$Value"
|
||||
} else {
|
||||
$line
|
||||
}
|
||||
}
|
||||
if (-not $updated) {
|
||||
$newLines += "$Key=$Value"
|
||||
}
|
||||
Set-Content -Path $Path -Value $newLines
|
||||
}
|
||||
|
||||
function Remove-EnvValue {
|
||||
param([string]$Path, [string]$Key)
|
||||
if (-not (Test-Path $Path)) { return }
|
||||
$newLines = Get-Content $Path | Where-Object { $_ -notmatch "^$([regex]::Escape($Key))=" }
|
||||
Set-Content -Path $Path -Value $newLines
|
||||
}
|
||||
|
||||
function Test-NvidiaGpu {
|
||||
if (-not (Get-Command nvidia-smi -ErrorAction SilentlyContinue)) { return $false }
|
||||
Invoke-NativeSafe { nvidia-smi *>$null } | Out-Null
|
||||
return ($LASTEXITCODE -eq 0)
|
||||
}
|
||||
|
||||
function Test-NvidiaRuntime {
|
||||
$info = Invoke-NativeSafe { docker info 2>$null }
|
||||
if ($info -match 'nvidia') { return $true }
|
||||
if (Get-Command nvidia-ctk -ErrorAction SilentlyContinue) { return $true }
|
||||
if (Get-Command nvidia-container-runtime -ErrorAction SilentlyContinue) { return $true }
|
||||
return $false
|
||||
}
|
||||
|
||||
function Get-RecommendedVariant {
|
||||
$driver = (Invoke-NativeSafe { nvidia-smi --query-gpu=driver_version --format=csv,noheader 2>$null } | Select-Object -First 1)
|
||||
$major = 0
|
||||
if ($driver -match '^(\d+)') {
|
||||
$major = [int]$Matches[1]
|
||||
}
|
||||
if ($major -gt 0 -and $major -lt 570) {
|
||||
return "cuda126"
|
||||
}
|
||||
return "cuda"
|
||||
}
|
||||
|
||||
function Resolve-Variant {
|
||||
$hasGpu = Test-NvidiaGpu
|
||||
$hasRuntime = $false
|
||||
$recommended = "cpu"
|
||||
|
||||
if ($hasGpu) {
|
||||
$recommended = Get-RecommendedVariant
|
||||
$hasRuntime = Test-NvidiaRuntime
|
||||
}
|
||||
|
||||
if ($Variant) {
|
||||
if ($Variant -eq "cpu") { return "cpu" }
|
||||
if (-not $hasGpu) {
|
||||
Write-Warn "No NVIDIA GPU detected; falling back to CPU variant."
|
||||
return "cpu"
|
||||
}
|
||||
if (-not $hasRuntime) {
|
||||
Write-Warn "NVIDIA GPU detected, but NVIDIA Container Toolkit was not detected; falling back to CPU variant."
|
||||
Write-Warn "Install the toolkit before enabling SurfSense GPU acceleration."
|
||||
return "cpu"
|
||||
}
|
||||
return $Variant
|
||||
}
|
||||
|
||||
if ($hasGpu -and -not $hasRuntime) {
|
||||
Write-Warn "NVIDIA GPU detected, but NVIDIA Container Toolkit was not detected; using CPU variant."
|
||||
}
|
||||
|
||||
if ($hasGpu -and $hasRuntime -and -not $Quiet -and [Environment]::UserInteractive) {
|
||||
Write-Host ""
|
||||
Write-Host "SurfSense detected an NVIDIA GPU." -ForegroundColor Cyan
|
||||
$choice = Read-Host "Use GPU acceleration? [Y/n]"
|
||||
switch ($choice) {
|
||||
"" { return $recommended }
|
||||
{ $_ -match '^(?i)y(es)?$' } { return $recommended }
|
||||
{ $_ -match '^(?i)n(o)?$' } { return "cpu" }
|
||||
default {
|
||||
Write-Warn "Unrecognized choice '$choice'; using CPU variant."
|
||||
return "cpu"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return "cpu"
|
||||
}
|
||||
|
||||
function Set-VariantEnv {
|
||||
param([string]$Path, [string]$SelectedVariant, [bool]$AllowExistingUpdate)
|
||||
|
||||
if ((Test-Path $Path) -and -not $AllowExistingUpdate) {
|
||||
Write-Warn ".env already exists - keeping your existing configuration."
|
||||
Write-Info "To change variants later, edit SURFSENSE_VARIANT and COMPOSE_FILE in $Path, then run docker compose up -d --wait."
|
||||
return
|
||||
}
|
||||
|
||||
if ($SelectedVariant -eq "cpu") {
|
||||
Set-EnvValue -Path $Path -Key "SURFSENSE_VARIANT" -Value ""
|
||||
Remove-EnvValue -Path $Path -Key "COMPOSE_FILE"
|
||||
Remove-EnvValue -Path $Path -Key "SURFSENSE_GPU_COUNT"
|
||||
} else {
|
||||
Set-EnvValue -Path $Path -Key "SURFSENSE_VARIANT" -Value $SelectedVariant
|
||||
Set-EnvValue -Path $Path -Key "COMPOSE_FILE" -Value "docker-compose.yml;docker-compose.gpu.yml"
|
||||
if ($GpuCount) {
|
||||
Set-EnvValue -Path $Path -Key "SURFSENSE_GPU_COUNT" -Value $GpuCount
|
||||
}
|
||||
}
|
||||
|
||||
Remove-EnvValue -Path $Path -Key "COMPOSE_PROFILES"
|
||||
}
|
||||
|
||||
$SelectedVariant = Resolve-Variant
|
||||
|
||||
# ── Download files ──────────────────────────────────────────────────────────
|
||||
|
||||
Write-Step "Downloading SurfSense files"
|
||||
|
|
@ -262,6 +309,7 @@ New-Item -ItemType Directory -Path "$InstallDir\searxng" -Force | Out-Null
|
|||
|
||||
$Files = @(
|
||||
@{ Src = "docker/docker-compose.yml"; Dest = "docker-compose.yml" }
|
||||
@{ Src = "docker/docker-compose.gpu.yml"; Dest = "docker-compose.gpu.yml" }
|
||||
@{ Src = "docker/.env.example"; Dest = ".env.example" }
|
||||
@{ Src = "docker/postgresql.conf"; Dest = "postgresql.conf" }
|
||||
@{ Src = "docker/scripts/migrate-database.ps1"; Dest = "scripts/migrate-database.ps1" }
|
||||
|
|
@ -339,9 +387,15 @@ if (-not (Test-Path $envPath)) {
|
|||
$content = $content -replace 'SECRET_KEY=replace_me_with_a_random_string', "SECRET_KEY=$SecretKey"
|
||||
Set-Content -Path $envPath -Value $content -NoNewline
|
||||
|
||||
Set-VariantEnv -Path $envPath -SelectedVariant $SelectedVariant -AllowExistingUpdate $false
|
||||
Write-Info "Created $envPath"
|
||||
} else {
|
||||
Write-Warn ".env already exists - keeping your existing configuration."
|
||||
if ($PSBoundParameters.ContainsKey('Variant')) {
|
||||
Set-VariantEnv -Path $envPath -SelectedVariant $SelectedVariant -AllowExistingUpdate $true
|
||||
Write-Info "Updated SurfSense image variant in existing $envPath"
|
||||
} else {
|
||||
Set-VariantEnv -Path $envPath -SelectedVariant $SelectedVariant -AllowExistingUpdate $false
|
||||
}
|
||||
}
|
||||
|
||||
# ── Start containers ────────────────────────────────────────────────────────
|
||||
|
|
@ -405,31 +459,15 @@ if ($MigrationMode) {
|
|||
}
|
||||
|
||||
Write-Step "Starting all SurfSense services"
|
||||
Push-Location $InstallDir
|
||||
Invoke-NativeSafe { docker compose up -d }
|
||||
Pop-Location
|
||||
Write-Ok "All containers started; waiting for stack to become healthy..."
|
||||
|
||||
$waitResult = Wait-StackHealthy -TimeoutSec 300
|
||||
if (-not $waitResult.Ok) {
|
||||
Invoke-StackFailureReport -Result $waitResult
|
||||
}
|
||||
Write-Ok "All services healthy."
|
||||
Invoke-ComposeUpWait
|
||||
Write-Ok "All services started and healthy."
|
||||
|
||||
Remove-Item $KeyFile -ErrorAction SilentlyContinue
|
||||
|
||||
} else {
|
||||
Write-Step "Starting SurfSense"
|
||||
Push-Location $InstallDir
|
||||
Invoke-NativeSafe { docker compose up -d }
|
||||
Pop-Location
|
||||
Write-Ok "All containers started; waiting for stack to become healthy..."
|
||||
|
||||
$waitResult = Wait-StackHealthy -TimeoutSec 300
|
||||
if (-not $waitResult.Ok) {
|
||||
Invoke-StackFailureReport -Result $waitResult
|
||||
}
|
||||
Write-Ok "All services healthy."
|
||||
Invoke-ComposeUpWait
|
||||
Write-Ok "All services started and healthy."
|
||||
}
|
||||
|
||||
# ── Watchtower (auto-update) ────────────────────────────────────────────────
|
||||
|
|
@ -461,7 +499,7 @@ if ($SetupWatchtower) {
|
|||
if ($LASTEXITCODE -eq 0) {
|
||||
Write-Ok "Watchtower started - labeled SurfSense containers will auto-update."
|
||||
} else {
|
||||
Write-Warn "Could not start Watchtower. You can set it up manually or use: docker compose pull; docker compose up -d"
|
||||
Write-Warn "Could not start Watchtower. You can set it up manually or use: docker compose pull; docker compose up -d --wait"
|
||||
}
|
||||
}
|
||||
} else {
|
||||
|
|
@ -488,6 +526,9 @@ Y88b d88P Y88b 888 888 888 Y88b d88P Y8b. 888 888 X88 Y8b.
|
|||
|
||||
$versionDisplay = (Get-Content $envPath | Select-String '^SURFSENSE_VERSION=' | ForEach-Object { ($_ -split '=',2)[1].Trim('"') }) | Select-Object -First 1
|
||||
if (-not $versionDisplay) { $versionDisplay = "latest" }
|
||||
$variantDisplay = (Get-Content $envPath | Select-String '^SURFSENSE_VARIANT=' | ForEach-Object { ($_ -split '=',2)[1].Trim('"') }) | Select-Object -First 1
|
||||
if (-not $variantDisplay) { $variantDisplay = "cpu" }
|
||||
$wtHours = [math]::Floor($WatchtowerInterval / 3600)
|
||||
Write-Host " OSS Alternative to NotebookLM for Teams [$versionDisplay]" -ForegroundColor Yellow
|
||||
Write-Host ("=" * 62) -ForegroundColor Cyan
|
||||
Write-Host ""
|
||||
|
|
@ -497,13 +538,14 @@ Write-Info " Backend: http://localhost:8929"
|
|||
Write-Info " API Docs: http://localhost:8929/docs"
|
||||
Write-Info ""
|
||||
Write-Info " Config: $InstallDir\.env"
|
||||
Write-Info " Variant: $variantDisplay"
|
||||
Write-Info " Logs: cd $InstallDir; docker compose logs -f"
|
||||
Write-Info " Stop: cd $InstallDir; docker compose down"
|
||||
Write-Info " Update: cd $InstallDir; docker compose pull; docker compose up -d"
|
||||
Write-Info " Update: cd $InstallDir; docker compose pull; docker compose up -d --wait"
|
||||
Write-Info ""
|
||||
|
||||
if ($SetupWatchtower) {
|
||||
Write-Info " Watchtower: auto-updates every ${wtHours}h (stop: docker rm -f $WatchtowerContainer)"
|
||||
Write-Info " Watchtower: auto-updates every ${wtHours}h (disable: docker rm -f $WatchtowerContainer)"
|
||||
} else {
|
||||
Write-Warn " Watchtower skipped. For auto-updates, re-run without -NoWatchtower."
|
||||
}
|
||||
|
|
|
|||
|
|
@ -8,6 +8,11 @@
|
|||
# Flags:
|
||||
# --no-watchtower Skip automatic Watchtower setup
|
||||
# --watchtower-interval=SECS Check interval in seconds (default: 86400 = 24h)
|
||||
# --variant=cpu|cuda|cuda126 Select backend image variant
|
||||
# --gpu Alias for --variant=cuda
|
||||
# --cpu Alias for --variant=cpu
|
||||
# --gpu-count=N|all Number of GPUs to reserve when GPU is enabled
|
||||
# --quiet Skip interactive prompts
|
||||
#
|
||||
# Handles two cases automatically:
|
||||
# 1. Fresh install — no prior SurfSense data detected
|
||||
|
|
@ -35,12 +40,21 @@ MIGRATION_MODE=false
|
|||
SETUP_WATCHTOWER=true
|
||||
WATCHTOWER_INTERVAL=86400
|
||||
WATCHTOWER_CONTAINER="watchtower"
|
||||
REQUESTED_VARIANT=""
|
||||
VARIANT_EXPLICIT=false
|
||||
GPU_COUNT=""
|
||||
QUIET=false
|
||||
|
||||
# ── Parse flags ─────────────────────────────────────────────────────────────
|
||||
for arg in "$@"; do
|
||||
case "$arg" in
|
||||
--no-watchtower) SETUP_WATCHTOWER=false ;;
|
||||
--watchtower-interval=*) WATCHTOWER_INTERVAL="${arg#*=}" ;;
|
||||
--variant=*) REQUESTED_VARIANT="${arg#*=}"; VARIANT_EXPLICIT=true ;;
|
||||
--gpu) REQUESTED_VARIANT="cuda"; VARIANT_EXPLICIT=true ;;
|
||||
--cpu) REQUESTED_VARIANT="cpu"; VARIANT_EXPLICIT=true ;;
|
||||
--gpu-count=*) GPU_COUNT="${arg#*=}" ;;
|
||||
--quiet) QUIET=true ;;
|
||||
esac
|
||||
done
|
||||
|
||||
|
|
@ -57,6 +71,15 @@ warn() { printf "${YELLOW}[SurfSense]${NC} %s\n" "$1"; }
|
|||
error() { printf "${RED}[SurfSense]${NC} ERROR: %s\n" "$1" >&2; exit 1; }
|
||||
step() { printf "\n${BOLD}${CYAN}── %s${NC}\n" "$1"; }
|
||||
|
||||
case "${REQUESTED_VARIANT}" in
|
||||
""|cpu|cuda|cuda126) ;;
|
||||
*) error "Invalid --variant='${REQUESTED_VARIANT}'. Use cpu, cuda, or cuda126." ;;
|
||||
esac
|
||||
|
||||
if [[ -n "${GPU_COUNT}" && ! "${GPU_COUNT}" =~ ^([0-9]+|all)$ ]]; then
|
||||
error "Invalid --gpu-count='${GPU_COUNT}'. Use a number or 'all'."
|
||||
fi
|
||||
|
||||
# ── Pre-flight checks ────────────────────────────────────────────────────────
|
||||
|
||||
step "Checking prerequisites"
|
||||
|
|
@ -97,126 +120,11 @@ wait_for_pg() {
|
|||
success "PostgreSQL is ready."
|
||||
}
|
||||
|
||||
# ── Stack health helpers ─────────────────────────────────────────────────────
|
||||
|
||||
# Enumerate compose services for project `surfsense` as `service|state|health|exitcode`
|
||||
# lines. Uses `docker inspect` so we don't depend on `jq`, `python3`, or the
|
||||
# exact ordering of fields in `docker compose ps --format json` output.
|
||||
get_compose_services() {
|
||||
local containers
|
||||
containers=$(docker ps -a --filter "label=com.docker.compose.project=surfsense" --format '{{.Names}}' 2>/dev/null) || true
|
||||
[[ -z "$containers" ]] && return 0
|
||||
|
||||
while IFS= read -r container; do
|
||||
[[ -z "$container" ]] && continue
|
||||
local svc state health code
|
||||
svc=$(docker inspect -f '{{index .Config.Labels "com.docker.compose.service"}}' "$container" 2>/dev/null || echo "")
|
||||
state=$(docker inspect -f '{{.State.Status}}' "$container" 2>/dev/null || echo "unknown")
|
||||
health=$(docker inspect -f '{{if .State.Health}}{{.State.Health.Status}}{{end}}' "$container" 2>/dev/null || echo "")
|
||||
code=$(docker inspect -f '{{.State.ExitCode}}' "$container" 2>/dev/null || echo "")
|
||||
[[ -z "$svc" ]] && continue
|
||||
printf '%s|%s|%s|%s\n' "$svc" "$state" "$health" "$code"
|
||||
done <<< "$containers"
|
||||
}
|
||||
|
||||
# Globals populated by wait_stack_healthy / consumed by stack_failure_report.
|
||||
STACK_BAD=()
|
||||
STACK_WAITING=()
|
||||
STACK_GOOD=()
|
||||
STACK_TIMEOUT=false
|
||||
|
||||
wait_stack_healthy() {
|
||||
local timeout_sec=${1:-300}
|
||||
local deadline=$(($(date +%s) + timeout_sec))
|
||||
local last_report=""
|
||||
local bad=()
|
||||
local waiting=()
|
||||
local good=()
|
||||
|
||||
while [[ $(date +%s) -lt $deadline ]]; do
|
||||
local lines
|
||||
lines=$(get_compose_services)
|
||||
if [[ -z "$lines" ]]; then
|
||||
sleep 3
|
||||
continue
|
||||
fi
|
||||
|
||||
bad=()
|
||||
waiting=()
|
||||
good=()
|
||||
|
||||
while IFS='|' read -r name state health code; do
|
||||
[[ -z "$name" ]] && continue
|
||||
if [[ "$name" == "migrations" ]]; then
|
||||
if [[ "$state" == "exited" && "$code" == "0" ]]; then
|
||||
good+=("$name")
|
||||
elif [[ "$state" == "exited" ]]; then
|
||||
bad+=("${name} (exit=${code})")
|
||||
else
|
||||
waiting+=("${name} (${state})")
|
||||
fi
|
||||
continue
|
||||
fi
|
||||
|
||||
if [[ "$state" == "running" ]]; then
|
||||
if [[ -z "$health" || "$health" == "healthy" ]]; then
|
||||
good+=("$name")
|
||||
elif [[ "$health" == "starting" ]]; then
|
||||
waiting+=("${name} (starting)")
|
||||
elif [[ "$health" == "unhealthy" ]]; then
|
||||
bad+=("${name} (unhealthy)")
|
||||
else
|
||||
waiting+=("${name} (${health})")
|
||||
fi
|
||||
elif [[ "$state" == "restarting" ]]; then
|
||||
bad+=("${name} (restarting)")
|
||||
elif [[ "$state" == "exited" ]]; then
|
||||
bad+=("${name} (exited, code=${code})")
|
||||
else
|
||||
waiting+=("${name} (${state})")
|
||||
fi
|
||||
done <<< "$lines"
|
||||
|
||||
if (( ${#bad[@]} > 0 )); then
|
||||
STACK_BAD=("${bad[@]}")
|
||||
STACK_WAITING=("${waiting[@]}")
|
||||
STACK_GOOD=("${good[@]}")
|
||||
return 1
|
||||
fi
|
||||
if (( ${#waiting[@]} == 0 )); then
|
||||
STACK_GOOD=("${good[@]}")
|
||||
return 0
|
||||
fi
|
||||
|
||||
local report="Waiting on: ${waiting[*]}"
|
||||
if [[ "$report" != "$last_report" ]]; then
|
||||
info "$report"
|
||||
last_report="$report"
|
||||
fi
|
||||
sleep 5
|
||||
done
|
||||
|
||||
# bad/waiting/good are declared at function scope so referencing them is
|
||||
# safe even if the polling loop never executed its body.
|
||||
STACK_BAD=()
|
||||
[[ ${#bad[@]} -gt 0 ]] && STACK_BAD=("${bad[@]}")
|
||||
STACK_WAITING=()
|
||||
[[ ${#waiting[@]} -gt 0 ]] && STACK_WAITING=("${waiting[@]}")
|
||||
STACK_GOOD=()
|
||||
[[ ${#good[@]} -gt 0 ]] && STACK_GOOD=("${good[@]}")
|
||||
STACK_TIMEOUT=true
|
||||
return 1
|
||||
}
|
||||
# ── Stack startup helper ─────────────────────────────────────────────────────
|
||||
|
||||
stack_failure_report() {
|
||||
echo ""
|
||||
echo -e "\033[31m[ERROR]\033[0m Stack did not reach a healthy state."
|
||||
if (( ${#STACK_BAD[@]} > 0 )) && [[ -n "${STACK_BAD[0]}" ]]; then
|
||||
echo " Failed: ${STACK_BAD[*]}"
|
||||
fi
|
||||
if (( ${#STACK_WAITING[@]} > 0 )) && [[ -n "${STACK_WAITING[0]}" ]]; then
|
||||
echo " Stuck: ${STACK_WAITING[*]}"
|
||||
fi
|
||||
echo ""
|
||||
info "Recent logs from migrations / zero-cache / backend:"
|
||||
(cd "${INSTALL_DIR}" && ${DC} logs --tail=60 migrations zero-cache backend 2>&1) || true
|
||||
|
|
@ -224,11 +132,20 @@ stack_failure_report() {
|
|||
echo "Recovery hints:"
|
||||
echo " 1. Inspect migrations: cd ${INSTALL_DIR} && ${DC} logs migrations"
|
||||
echo " 2. Verify publication: cd ${INSTALL_DIR} && ${DC} exec db psql -U surfsense -d surfsense -c 'SELECT pubname FROM pg_publication;'"
|
||||
echo " 3. Hard reset zero db: cd ${INSTALL_DIR} && ${DC} down && docker volume rm surfsense-zero-cache && ${DC} up -d"
|
||||
echo " 3. Hard reset zero db: cd ${INSTALL_DIR} && ${DC} down && docker volume rm surfsense-zero-cache && ${DC} up -d --wait"
|
||||
echo ""
|
||||
exit 1
|
||||
}
|
||||
|
||||
compose_up_wait() {
|
||||
local service="${1:-}"
|
||||
if [[ -n "$service" ]]; then
|
||||
(cd "${INSTALL_DIR}" && ${DC} up -d --wait "$service") < /dev/null
|
||||
else
|
||||
(cd "${INSTALL_DIR}" && ${DC} up -d --wait) < /dev/null
|
||||
fi
|
||||
}
|
||||
|
||||
# True if `surfsense-zero-cache` exists but `surfsense-zero-init` does not.
|
||||
# That signals an install that predates the migrations-service fix; the old
|
||||
# replica may be half-initialized and would block zero-cache on next start.
|
||||
|
|
@ -254,6 +171,144 @@ invoke_stale_zero_cache_cleanup() {
|
|||
success "Removed surfsense-zero-cache volume; zero-cache will re-sync on next start."
|
||||
}
|
||||
|
||||
# ── Variant and .env helpers ─────────────────────────────────────────────────
|
||||
|
||||
set_env_value() {
|
||||
local file="$1"
|
||||
local key="$2"
|
||||
local value="$3"
|
||||
local tmp
|
||||
tmp=$(mktemp)
|
||||
|
||||
if grep -q "^${key}=" "$file" 2>/dev/null; then
|
||||
awk -v key="$key" -v value="$value" 'BEGIN { prefix = key "=" } $0 ~ "^" prefix { print prefix value; next } { print }' "$file" > "$tmp"
|
||||
else
|
||||
cp "$file" "$tmp"
|
||||
printf '\n%s=%s\n' "$key" "$value" >> "$tmp"
|
||||
fi
|
||||
mv "$tmp" "$file"
|
||||
}
|
||||
|
||||
remove_env_value() {
|
||||
local file="$1"
|
||||
local key="$2"
|
||||
local tmp
|
||||
tmp=$(mktemp)
|
||||
awk -v key="$key" 'BEGIN { prefix = key "=" } $0 !~ "^" prefix { print }' "$file" > "$tmp"
|
||||
mv "$tmp" "$file"
|
||||
}
|
||||
|
||||
version_major() {
|
||||
printf '%s' "$1" | cut -d. -f1
|
||||
}
|
||||
|
||||
recommend_cuda_variant() {
|
||||
local driver_version driver_major
|
||||
driver_version=$(nvidia-smi --query-gpu=driver_version --format=csv,noheader 2>/dev/null | head -n 1 | tr -d '[:space:]' || true)
|
||||
driver_major=$(version_major "$driver_version")
|
||||
|
||||
# CUDA 12.8 generally requires an R570+ driver. Use CUDA 12.6 as the
|
||||
# compatibility fallback for older 12.x driver stacks and GPUs.
|
||||
if [[ "$driver_major" =~ ^[0-9]+$ && "$driver_major" -lt 570 ]]; then
|
||||
printf 'cuda126'
|
||||
else
|
||||
printf 'cuda'
|
||||
fi
|
||||
}
|
||||
|
||||
gpu_runtime_available() {
|
||||
docker info 2>/dev/null | grep -qi 'nvidia' \
|
||||
|| command -v nvidia-ctk >/dev/null 2>&1 \
|
||||
|| command -v nvidia-container-runtime >/dev/null 2>&1
|
||||
}
|
||||
|
||||
host_has_nvidia_gpu() {
|
||||
command -v nvidia-smi >/dev/null 2>&1 && nvidia-smi >/dev/null 2>&1
|
||||
}
|
||||
|
||||
resolve_variant() {
|
||||
local detected_variant="cpu"
|
||||
local has_gpu=false
|
||||
local has_runtime=false
|
||||
|
||||
if host_has_nvidia_gpu; then
|
||||
has_gpu=true
|
||||
detected_variant=$(recommend_cuda_variant)
|
||||
if gpu_runtime_available; then
|
||||
has_runtime=true
|
||||
fi
|
||||
fi
|
||||
|
||||
if $VARIANT_EXPLICIT; then
|
||||
if [[ "$REQUESTED_VARIANT" == "cpu" ]]; then
|
||||
printf 'cpu'
|
||||
return 0
|
||||
fi
|
||||
if ! $has_gpu; then
|
||||
warn "No NVIDIA GPU detected; falling back to CPU variant." >&2
|
||||
printf 'cpu'
|
||||
return 0
|
||||
fi
|
||||
if ! $has_runtime; then
|
||||
warn "NVIDIA GPU detected, but NVIDIA Container Toolkit was not detected; falling back to CPU variant." >&2
|
||||
warn "Install the toolkit before enabling SurfSense GPU acceleration." >&2
|
||||
printf 'cpu'
|
||||
return 0
|
||||
fi
|
||||
printf '%s' "$REQUESTED_VARIANT"
|
||||
return 0
|
||||
fi
|
||||
|
||||
if $has_gpu && ! $has_runtime; then
|
||||
warn "NVIDIA GPU detected, but NVIDIA Container Toolkit was not detected; using CPU variant." >&2
|
||||
fi
|
||||
|
||||
if $has_gpu && $has_runtime && ! $QUIET && [[ -r /dev/tty && -w /dev/tty ]]; then
|
||||
local choice
|
||||
echo "" > /dev/tty
|
||||
printf "${BOLD}${CYAN}SurfSense detected an NVIDIA GPU.${NC}\n" > /dev/tty
|
||||
printf "Use GPU acceleration? [Y/n]: " > /dev/tty
|
||||
read -r choice < /dev/tty || choice=""
|
||||
case "$choice" in
|
||||
"") printf '%s' "$detected_variant" ;;
|
||||
[Yy]|[Yy][Ee][Ss]) printf '%s' "$detected_variant" ;;
|
||||
[Nn]|[Nn][Oo]) printf 'cpu' ;;
|
||||
*) warn "Unrecognized choice '${choice}', using CPU variant." >&2; printf 'cpu' ;;
|
||||
esac
|
||||
return 0
|
||||
fi
|
||||
|
||||
printf 'cpu'
|
||||
}
|
||||
|
||||
apply_variant_env() {
|
||||
local env_file="$1"
|
||||
local variant="$2"
|
||||
local allow_existing_update="$3"
|
||||
|
||||
if [[ -f "$env_file" && "$allow_existing_update" != "true" ]]; then
|
||||
warn ".env already exists — keeping your existing configuration."
|
||||
info "To change variants later, edit SURFSENSE_VARIANT and COMPOSE_FILE in ${env_file}, then run ${DC} up -d --wait."
|
||||
return 0
|
||||
fi
|
||||
|
||||
if [[ "$variant" == "cpu" ]]; then
|
||||
set_env_value "$env_file" "SURFSENSE_VARIANT" ""
|
||||
remove_env_value "$env_file" "COMPOSE_FILE"
|
||||
remove_env_value "$env_file" "SURFSENSE_GPU_COUNT"
|
||||
else
|
||||
set_env_value "$env_file" "SURFSENSE_VARIANT" "$variant"
|
||||
set_env_value "$env_file" "COMPOSE_FILE" "docker-compose.yml:docker-compose.gpu.yml"
|
||||
if [[ -n "$GPU_COUNT" ]]; then
|
||||
set_env_value "$env_file" "SURFSENSE_GPU_COUNT" "$GPU_COUNT"
|
||||
fi
|
||||
fi
|
||||
|
||||
remove_env_value "$env_file" "COMPOSE_PROFILES"
|
||||
}
|
||||
|
||||
SELECTED_VARIANT=$(resolve_variant)
|
||||
|
||||
# ── Download files ───────────────────────────────────────────────────────────
|
||||
|
||||
step "Downloading SurfSense files"
|
||||
|
|
@ -263,6 +318,7 @@ mkdir -p "${INSTALL_DIR}/searxng"
|
|||
|
||||
FILES=(
|
||||
"docker/docker-compose.yml:docker-compose.yml"
|
||||
"docker/docker-compose.gpu.yml:docker-compose.gpu.yml"
|
||||
"docker/.env.example:.env.example"
|
||||
"docker/postgresql.conf:postgresql.conf"
|
||||
"docker/scripts/migrate-database.sh:scripts/migrate-database.sh"
|
||||
|
|
@ -336,9 +392,15 @@ if [ ! -f "${INSTALL_DIR}/.env" ]; then
|
|||
else
|
||||
sed -i "s|SECRET_KEY=replace_me_with_a_random_string|SECRET_KEY=${SECRET_KEY}|" "${INSTALL_DIR}/.env"
|
||||
fi
|
||||
apply_variant_env "${INSTALL_DIR}/.env" "$SELECTED_VARIANT" "false"
|
||||
info "Created ${INSTALL_DIR}/.env"
|
||||
else
|
||||
warn ".env already exists — keeping your existing configuration."
|
||||
if $VARIANT_EXPLICIT; then
|
||||
apply_variant_env "${INSTALL_DIR}/.env" "$SELECTED_VARIANT" "true"
|
||||
info "Updated SurfSense image variant in existing ${INSTALL_DIR}/.env"
|
||||
else
|
||||
apply_variant_env "${INSTALL_DIR}/.env" "$SELECTED_VARIANT" "false"
|
||||
fi
|
||||
fi
|
||||
|
||||
# ── Start containers ─────────────────────────────────────────────────────────
|
||||
|
|
@ -401,26 +463,20 @@ if $MIGRATION_MODE; then
|
|||
fi
|
||||
|
||||
step "Starting all SurfSense services"
|
||||
(cd "${INSTALL_DIR}" && ${DC} up -d) < /dev/null
|
||||
success "All containers started; waiting for stack to become healthy..."
|
||||
|
||||
if ! wait_stack_healthy 300; then
|
||||
if ! compose_up_wait; then
|
||||
stack_failure_report
|
||||
fi
|
||||
success "All services healthy."
|
||||
success "All services started and healthy."
|
||||
|
||||
# Key file is no longer needed — SECRET_KEY is now in .env
|
||||
rm -f "${KEY_FILE}"
|
||||
|
||||
else
|
||||
step "Starting SurfSense"
|
||||
(cd "${INSTALL_DIR}" && ${DC} up -d) < /dev/null
|
||||
success "All containers started; waiting for stack to become healthy..."
|
||||
|
||||
if ! wait_stack_healthy 300; then
|
||||
if ! compose_up_wait; then
|
||||
stack_failure_report
|
||||
fi
|
||||
success "All services healthy."
|
||||
success "All services started and healthy."
|
||||
fi
|
||||
|
||||
# ── Watchtower (auto-update) ─────────────────────────────────────────────────
|
||||
|
|
@ -445,7 +501,7 @@ if $SETUP_WATCHTOWER; then
|
|||
--label-enable \
|
||||
--interval "${WATCHTOWER_INTERVAL}" >/dev/null 2>&1 < /dev/null \
|
||||
&& success "Watchtower started — labeled SurfSense containers will auto-update." \
|
||||
|| warn "Could not start Watchtower. You can set it up manually or use: docker compose pull && docker compose up -d"
|
||||
|| warn "Could not start Watchtower. You can set it up manually or use: docker compose pull && docker compose up -d --wait"
|
||||
fi
|
||||
else
|
||||
info "Skipping Watchtower setup (--no-watchtower flag)."
|
||||
|
|
@ -471,6 +527,8 @@ Y88b d88P Y88b 888 888 888 Y88b d88P Y8b. 888 888 X88 Y8b.
|
|||
EOF
|
||||
_version_display=$(grep '^SURFSENSE_VERSION=' "${INSTALL_DIR}/.env" 2>/dev/null | cut -d= -f2 | tr -d '"' | head -1 || true)
|
||||
_version_display="${_version_display:-latest}"
|
||||
_variant_display=$(grep '^SURFSENSE_VARIANT=' "${INSTALL_DIR}/.env" 2>/dev/null | cut -d= -f2 | tr -d '"' | head -1 || true)
|
||||
_variant_display="${_variant_display:-cpu}"
|
||||
printf " OSS Alternative to NotebookLM for Teams ${YELLOW}[%s]${NC}\n" "${_version_display}"
|
||||
printf "${CYAN}══════════════════════════════════════════════════════════════${NC}\n\n"
|
||||
|
||||
|
|
@ -479,13 +537,14 @@ info " Backend: http://localhost:8929"
|
|||
info " API Docs: http://localhost:8929/docs"
|
||||
info ""
|
||||
info " Config: ${INSTALL_DIR}/.env"
|
||||
info " Variant: ${_variant_display}"
|
||||
info " Logs: cd ${INSTALL_DIR} && ${DC} logs -f"
|
||||
info " Stop: cd ${INSTALL_DIR} && ${DC} down"
|
||||
info " Update: cd ${INSTALL_DIR} && ${DC} pull && ${DC} up -d"
|
||||
info " Update: cd ${INSTALL_DIR} && ${DC} pull && ${DC} up -d --wait"
|
||||
info ""
|
||||
|
||||
if $SETUP_WATCHTOWER; then
|
||||
info " Watchtower: auto-updates every $((WATCHTOWER_INTERVAL / 3600))h (stop: docker rm -f ${WATCHTOWER_CONTAINER})"
|
||||
info " Watchtower: auto-updates every $((WATCHTOWER_INTERVAL / 3600))h (disable: docker rm -f ${WATCHTOWER_CONTAINER})"
|
||||
else
|
||||
warn " Watchtower skipped. For auto-updates, re-run without --no-watchtower."
|
||||
fi
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue