diff --git a/.cursor/skills/playwright-testing/browser-apis/iframes.md b/.cursor/skills/playwright-testing/browser-apis/iframes.md index 145e050ff..155cc1c1b 100644 --- a/.cursor/skills/playwright-testing/browser-apis/iframes.md +++ b/.cursor/skills/playwright-testing/browser-apis/iframes.md @@ -372,7 +372,7 @@ test("mock iframe response", async ({ page }) => {
Mocked widget content
`, diff --git a/.cursor/skills/playwright-testing/core/locators.md b/.cursor/skills/playwright-testing/core/locators.md index f806635d6..afe3af361 100644 --- a/.cursor/skills/playwright-testing/core/locators.md +++ b/.cursor/skills/playwright-testing/core/locators.md @@ -100,7 +100,7 @@ use: { Usage: ```typescript -// HTML: +// React: page.getByTestId("submit-btn"); ``` diff --git a/.cursor/skills/vercel-react-best-practices/AGENTS.md b/.cursor/skills/vercel-react-best-practices/AGENTS.md index 94c3c8441..2b839ab51 100644 --- a/.cursor/skills/vercel-react-best-practices/AGENTS.md +++ b/.cursor/skills/vercel-react-best-practices/AGENTS.md @@ -549,6 +549,8 @@ Preload heavy bundles before they're needed to reduce perceived latency. **Example: preload on hover/focus** ```tsx +import { Button } from '@/components/ui/button' + function EditorButton({ onClick }: { onClick: () => void }) { const preload = () => { if (typeof window !== 'undefined') { @@ -557,13 +559,13 @@ function EditorButton({ onClick }: { onClick: () => void }) { } return ( - + ) } ``` @@ -1239,11 +1241,12 @@ function StaticContent() { **For mutations:** ```tsx +import { Button } from '@/components/ui/button' import { useSWRMutation } from 'swr/mutation' function UpdateButton() { const { trigger } = useSWRMutation('/api/user', updateUser) - return + return } ``` @@ -1369,6 +1372,8 @@ Don't subscribe to dynamic state (searchParams, localStorage) if you only read i **Incorrect: subscribes to all searchParams changes** ```tsx +import { Button } from '@/components/ui/button' + function ShareButton({ chatId }: { chatId: string }) { const searchParams = useSearchParams() @@ -1377,13 +1382,15 @@ function ShareButton({ chatId }: { chatId: string }) { shareChat(chatId, { ref }) } - return + return } ``` **Correct: reads on demand, no subscription** ```tsx +import { Button } from '@/components/ui/button' + function ShareButton({ chatId }: { chatId: string }) { const handleShare = () => { const params = new URLSearchParams(window.location.search) @@ -1391,7 +1398,7 @@ function ShareButton({ chatId }: { chatId: string }) { shareChat(chatId, { ref }) } - return + return } ``` @@ -1549,6 +1556,8 @@ If a side effect is triggered by a specific user action (submit, click, drag), r **Incorrect: event modeled as state + effect** ```tsx +import { Button } from '@/components/ui/button' + function Form() { const [submitted, setSubmitted] = useState(false) const theme = useContext(ThemeContext) @@ -1560,13 +1569,15 @@ function Form() { } }, [submitted, theme]) - return + return } ``` **Correct: do it in the handler** ```tsx +import { Button } from '@/components/ui/button' + function Form() { const theme = useContext(ThemeContext) @@ -1575,7 +1586,7 @@ function Form() { showToast('Registered', theme) } - return + return } ``` diff --git a/.cursor/skills/vercel-react-best-practices/rules/bundle-preload.md b/.cursor/skills/vercel-react-best-practices/rules/bundle-preload.md index 700050406..0662ef81b 100644 --- a/.cursor/skills/vercel-react-best-practices/rules/bundle-preload.md +++ b/.cursor/skills/vercel-react-best-practices/rules/bundle-preload.md @@ -12,6 +12,8 @@ Preload heavy bundles before they're needed to reduce perceived latency. **Example (preload on hover/focus):** ```tsx +import { Button } from "@/components/ui/button" + function EditorButton({ onClick }: { onClick: () => void }) { const preload = () => { if (typeof window !== 'undefined') { @@ -20,13 +22,13 @@ function EditorButton({ onClick }: { onClick: () => void }) { } return ( - + ) } ``` diff --git a/.cursor/skills/vercel-react-best-practices/rules/client-swr-dedup.md b/.cursor/skills/vercel-react-best-practices/rules/client-swr-dedup.md index 2a430f27f..22d419bca 100644 --- a/.cursor/skills/vercel-react-best-practices/rules/client-swr-dedup.md +++ b/.cursor/skills/vercel-react-best-practices/rules/client-swr-dedup.md @@ -45,11 +45,12 @@ function StaticContent() { **For mutations:** ```tsx +import { Button } from '@/components/ui/button' import { useSWRMutation } from 'swr/mutation' function UpdateButton() { const { trigger } = useSWRMutation('/api/user', updateUser) - return + return } ``` diff --git a/.cursor/skills/vercel-react-best-practices/rules/rerender-defer-reads.md b/.cursor/skills/vercel-react-best-practices/rules/rerender-defer-reads.md index e867c95f0..94410bc5b 100644 --- a/.cursor/skills/vercel-react-best-practices/rules/rerender-defer-reads.md +++ b/.cursor/skills/vercel-react-best-practices/rules/rerender-defer-reads.md @@ -12,6 +12,8 @@ Don't subscribe to dynamic state (searchParams, localStorage) if you only read i **Incorrect (subscribes to all searchParams changes):** ```tsx +import { Button } from '@/components/ui/button' + function ShareButton({ chatId }: { chatId: string }) { const searchParams = useSearchParams() @@ -20,13 +22,15 @@ function ShareButton({ chatId }: { chatId: string }) { shareChat(chatId, { ref }) } - return + return } ``` **Correct (reads on demand, no subscription):** ```tsx +import { Button } from '@/components/ui/button' + function ShareButton({ chatId }: { chatId: string }) { const handleShare = () => { const params = new URLSearchParams(window.location.search) @@ -34,6 +38,6 @@ function ShareButton({ chatId }: { chatId: string }) { shareChat(chatId, { ref }) } - return + return } ``` diff --git a/.cursor/skills/vercel-react-best-practices/rules/rerender-move-effect-to-event.md b/.cursor/skills/vercel-react-best-practices/rules/rerender-move-effect-to-event.md index dd58a1af0..299815d69 100644 --- a/.cursor/skills/vercel-react-best-practices/rules/rerender-move-effect-to-event.md +++ b/.cursor/skills/vercel-react-best-practices/rules/rerender-move-effect-to-event.md @@ -12,6 +12,8 @@ If a side effect is triggered by a specific user action (submit, click, drag), r **Incorrect (event modeled as state + effect):** ```tsx +import { Button } from '@/components/ui/button' + function Form() { const [submitted, setSubmitted] = useState(false) const theme = useContext(ThemeContext) @@ -23,13 +25,15 @@ function Form() { } }, [submitted, theme]) - return + return } ``` **Correct (do it in the handler):** ```tsx +import { Button } from '@/components/ui/button' + function Form() { const theme = useContext(ThemeContext) @@ -38,7 +42,7 @@ function Form() { showToast('Registered', theme) } - return + return } ``` diff --git a/.gitignore b/.gitignore index ac2ff94c9..a99954efe 100644 --- a/.gitignore +++ b/.gitignore @@ -6,16 +6,15 @@ node_modules/ .venv .pnpm-store .DS_Store -deepagents/ debug.log -opencode/ + +references/ +references # Playwright (E2E test artifacts) surfsense_web/playwright/.auth/ surfsense_web/playwright-report/ surfsense_web/test-results/ surfsense_web/blob-report/ -hermes-agent -hermes-agent/ content_research/ diff --git a/VERSION b/VERSION index df5db66fe..b056f4120 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.0.23 +0.0.24 diff --git a/docker/docker-compose.deps-only.yml b/docker/docker-compose.deps-only.yml index 31dcd8b26..2be0bfe6e 100644 --- a/docker/docker-compose.deps-only.yml +++ b/docker/docker-compose.deps-only.yml @@ -20,6 +20,18 @@ # - Backend .env: SEARXNG_DEFAULT_HOST=http://localhost:${SEARXNG_PORT:-8888} # - Backend .env: CELERY_BROKER_URL / REDIS_APP_URL → redis://localhost:6379/0 # - Web .env: NEXT_PUBLIC_ZERO_CACHE_URL=http://localhost:${ZERO_CACHE_PORT:-4848} +# +# IMPORTANT — schema migrations: +# This compose file does NOT build the backend image and therefore cannot +# run a `migrations` service. You MUST run alembic on the host before +# bringing zero-cache up, or zero-cache will crash-loop with +# `Unknown or invalid publications. Specified: [zero_publication]`. +# +# First-time / after-pull: +# cd surfsense_backend && uv run alembic upgrade head +# +# The other compose files (docker-compose.yml, docker-compose.dev.yml) +# handle this automatically via a dedicated `migrations` service. # ============================================================================= name: surfsense-deps @@ -82,6 +94,10 @@ services: timeout: 5s retries: 5 + # NOTE: zero-cache requires the `zero_publication` Postgres publication to + # exist before it starts. In this deps-only stack there is no backend + # container to run migrations, so you must run `uv run alembic upgrade head` + # from `surfsense_backend/` on the host BEFORE `docker compose up -d`. zero-cache: image: rocicorp/zero:1.4.0 ports: diff --git a/docker/docker-compose.dev.yml b/docker/docker-compose.dev.yml index 5338a649e..53b8ea1a9 100644 --- a/docker/docker-compose.dev.yml +++ b/docker/docker-compose.dev.yml @@ -34,6 +34,25 @@ services: timeout: 5s retries: 5 + # Short-lived schema runner; see docker/docker-compose.yml `migrations` + # service for the full rationale. Builds from the same backend context as + # the dev backend/celery services. + migrations: + build: *backend-build + env_file: + - ../surfsense_backend/.env + environment: + - DATABASE_URL=${DATABASE_URL:-postgresql+asyncpg://${DB_USER:-postgres}:${DB_PASSWORD:-postgres}@${DB_HOST:-db}:${DB_PORT:-5432}/${DB_NAME:-surfsense}} + - PYTHONPATH=/app + - SERVICE_ROLE=migrate + - MIGRATION_TIMEOUT=${MIGRATION_TIMEOUT:-900} + volumes: + - zero_init:/zero-init + depends_on: + db: + condition: service_healthy + restart: "no" + pgadmin: image: dpage/pgadmin4 ports: @@ -111,8 +130,10 @@ services: condition: service_healthy searxng: condition: service_healthy + migrations: + condition: service_completed_successfully healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:8000/health"] + test: ["CMD", "curl", "-f", "http://localhost:8000/ready"] interval: 15s timeout: 5s retries: 30 @@ -141,6 +162,8 @@ services: condition: service_healthy redis: condition: service_healthy + migrations: + condition: service_completed_successfully backend: condition: service_healthy @@ -160,6 +183,8 @@ services: condition: service_healthy redis: condition: service_healthy + migrations: + condition: service_completed_successfully celery_worker: condition: service_started @@ -185,8 +210,10 @@ services: extra_hosts: - "host.docker.internal:host-gateway" depends_on: - backend: + db: condition: service_healthy + migrations: + condition: service_completed_successfully environment: - ZERO_UPSTREAM_DB=${ZERO_UPSTREAM_DB:-postgresql://${DB_USER:-postgres}:${DB_PASSWORD:-postgres}@${DB_HOST:-db}:${DB_PORT:-5432}/${DB_NAME:-surfsense}?sslmode=${DB_SSLMODE:-disable}} - ZERO_CVR_DB=${ZERO_CVR_DB:-postgresql://${DB_USER:-postgres}:${DB_PASSWORD:-postgres}@${DB_HOST:-db}:${DB_PORT:-5432}/${DB_NAME:-surfsense}?sslmode=${DB_SSLMODE:-disable}} @@ -201,6 +228,12 @@ services: - ZERO_MUTATE_URL=${ZERO_MUTATE_URL:-http://frontend:3000/api/zero/mutate} volumes: - zero_cache_data:/data + - zero_init:/zero-init + # Wrapper: see docker/docker-compose.yml `zero-cache` for rationale. + entrypoint: ["sh", "-c"] + # Pass the script as a single list element so Compose does not tokenize it. + command: + - 'if [ -f /zero-init/needs_reset ]; then echo "[zero-init] publication change detected; wiping replica file(s) under /data" && rm -f /data/zero.db /data/zero.db-shm /data/zero.db-wal && rm -f /zero-init/needs_reset; fi; exec zero-cache' restart: unless-stopped healthcheck: test: ["CMD", "curl", "-f", "http://localhost:4848/keepalive"] @@ -238,3 +271,5 @@ volumes: name: surfsense-dev-shared-temp zero_cache_data: name: surfsense-dev-zero-cache + zero_init: + name: surfsense-dev-zero-init diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index 18147a189..82d77f826 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -27,6 +27,28 @@ services: timeout: 5s retries: 5 + # Short-lived schema runner. Executes `alembic upgrade head` and verifies + # that the `zero_publication` Postgres logical-replication publication + # exists, then exits 0. Downstream services (backend, celery_*, zero-cache) + # gate on this with `condition: service_completed_successfully` so a failed + # migration halts the whole stack instead of silently producing a half-built + # system that crash-loops zero-cache on missing publications. + migrations: + image: ghcr.io/modsetter/surfsense-backend:${SURFSENSE_VERSION:-latest} + env_file: + - .env + environment: + DATABASE_URL: ${DATABASE_URL:-postgresql+asyncpg://${DB_USER:-surfsense}:${DB_PASSWORD:-surfsense}@${DB_HOST:-db}:${DB_PORT:-5432}/${DB_NAME:-surfsense}} + PYTHONPATH: /app + SERVICE_ROLE: migrate + MIGRATION_TIMEOUT: ${MIGRATION_TIMEOUT:-900} + volumes: + - zero_init:/zero-init + depends_on: + db: + condition: service_healthy + restart: "no" + redis: image: redis:8-alpine volumes: @@ -88,9 +110,11 @@ services: condition: service_healthy searxng: condition: service_healthy + migrations: + condition: service_completed_successfully restart: unless-stopped healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:8000/health"] + test: ["CMD", "curl", "-f", "http://localhost:8000/ready"] interval: 15s timeout: 5s retries: 30 @@ -118,6 +142,8 @@ services: condition: service_healthy redis: condition: service_healthy + migrations: + condition: service_completed_successfully backend: condition: service_healthy labels: @@ -140,6 +166,8 @@ services: condition: service_healthy redis: condition: service_healthy + migrations: + condition: service_completed_successfully celery_worker: condition: service_started labels: @@ -182,10 +210,21 @@ services: ZERO_MUTATE_URL: ${ZERO_MUTATE_URL:-http://frontend:3000/api/zero/mutate} volumes: - zero_cache_data:/data + - zero_init:/zero-init + # Wrapper: if the migrations service flagged a publication change via + # /zero-init/needs_reset, wipe the SQLite replica before starting so + # zero-cache does a clean initial sync. Recovers from the half-built + # replica state (`_zero.tableMetadata` missing) caused by earlier crashes. + entrypoint: ["sh", "-c"] + # Pass the script as a single list element so Compose does not tokenize it. + command: + - 'if [ -f /zero-init/needs_reset ]; then echo "[zero-init] publication change detected; wiping replica file(s) under /data" && rm -f /data/zero.db /data/zero.db-shm /data/zero.db-wal && rm -f /zero-init/needs_reset; fi; exec zero-cache' restart: unless-stopped depends_on: - backend: + db: condition: service_healthy + migrations: + condition: service_completed_successfully healthcheck: test: ["CMD", "curl", "-f", "http://localhost:4848/keepalive"] interval: 10s @@ -221,3 +260,5 @@ volumes: name: surfsense-shared-temp zero_cache_data: name: surfsense-zero-cache + zero_init: + name: surfsense-zero-init diff --git a/docker/scripts/install.ps1 b/docker/scripts/install.ps1 index 0eb3886a2..60c4fd5df 100644 --- a/docker/scripts/install.ps1 +++ b/docker/scripts/install.ps1 @@ -97,6 +97,161 @@ function Wait-ForPostgres { Write-Ok "PostgreSQL is ready." } +# ── Stack health helpers ──────────────────────────────────────────────────── + +function Get-ComposeServices { + Push-Location $InstallDir + try { + $raw = Invoke-NativeSafe { docker compose ps -a --format json 2>$null } + } finally { + Pop-Location + } + if ([string]::IsNullOrWhiteSpace($raw)) { return @() } + + # Compose v2.21+ emits a JSON array; older versions emit one object per line. + try { + $parsed = $raw | ConvertFrom-Json + if ($parsed -is [System.Collections.IEnumerable] -and -not ($parsed -is [string])) { + return @($parsed) + } + return @($parsed) + } catch { + $services = @() + foreach ($line in ($raw -split "`r?`n")) { + $line = $line.Trim() + if (-not $line) { continue } + try { $services += ($line | ConvertFrom-Json) } catch { } + } + return $services + } +} + +function Wait-StackHealthy { + param([int]$TimeoutSec = 300) + + $deadline = (Get-Date).AddSeconds($TimeoutSec) + $lastReport = "" + + while ((Get-Date) -lt $deadline) { + $services = Get-ComposeServices + if (-not $services -or $services.Count -eq 0) { + Start-Sleep -Seconds 3 + continue + } + + $bad = @() + $waiting = @() + $good = @() + + foreach ($svc in $services) { + $name = $svc.Service + $state = $svc.State + $health = if ($svc.PSObject.Properties.Name -contains 'Health') { $svc.Health } else { '' } + $exit = if ($svc.PSObject.Properties.Name -contains 'ExitCode') { $svc.ExitCode } else { $null } + + if ($name -eq 'migrations') { + if ($state -eq 'exited' -and $exit -eq 0) { $good += $name } + elseif ($state -eq 'exited') { $bad += "${name} (exit=${exit})" } + else { $waiting += "${name} (${state})" } + continue + } + + if ($state -eq 'running') { + if ([string]::IsNullOrEmpty($health) -or $health -eq 'healthy') { + $good += $name + } elseif ($health -eq 'starting') { + $waiting += "${name} (starting)" + } elseif ($health -eq 'unhealthy') { + $bad += "${name} (unhealthy)" + } else { + $waiting += "${name} (${health})" + } + } elseif ($state -eq 'restarting') { + $bad += "${name} (restarting)" + } elseif ($state -eq 'exited') { + $bad += "${name} (exited, code=${exit})" + } else { + $waiting += "${name} (${state})" + } + } + + if ($bad.Count -gt 0) { + return @{ Ok = $false; Reason = 'failure'; Bad = $bad; Waiting = $waiting; Good = $good } + } + if ($waiting.Count -eq 0) { + return @{ Ok = $true; Reason = 'all_healthy'; Good = $good } + } + + $report = "Waiting on: " + ($waiting -join ', ') + if ($report -ne $lastReport) { + Write-Info $report + $lastReport = $report + } + Start-Sleep -Seconds 5 + } + + return @{ Ok = $false; Reason = 'timeout'; Bad = $bad; Waiting = $waiting; Good = $good } +} + +function Test-StaleZeroCacheVolume { + $raw = Invoke-NativeSafe { docker volume ls --format '{{.Name}}' 2>$null } + if ([string]::IsNullOrWhiteSpace($raw)) { return $false } + $names = $raw -split "`r?`n" | ForEach-Object { $_.Trim() } | Where-Object { $_ } + $hasZeroCache = $names -contains 'surfsense-zero-cache' + $hasZeroInit = $names -contains 'surfsense-zero-init' + # Pre-fix installs created surfsense-zero-cache but never surfsense-zero-init. + # Such a volume may hold a half-initialized SQLite replica from an earlier + # crash-loop. Wiping it forces zero-cache to do a fresh initial sync. + return ($hasZeroCache -and -not $hasZeroInit) +} + +function Invoke-StaleZeroCacheCleanup { + if (-not (Test-StaleZeroCacheVolume)) { return } + + Write-Warn "Detected pre-existing 'surfsense-zero-cache' volume from an install that" + Write-Warn "predates the migrations-service fix. It may contain a half-initialized" + Write-Warn "SQLite replica that would block zero-cache from starting." + Write-Warn "The volume will be removed in 5 seconds; press Ctrl+C to cancel." + Start-Sleep -Seconds 5 + + Push-Location $InstallDir + Invoke-NativeSafe { docker compose down --remove-orphans 2>$null } | Out-Null + Pop-Location + Invoke-NativeSafe { docker volume rm surfsense-zero-cache 2>$null } | Out-Null + Write-Ok "Removed surfsense-zero-cache volume; zero-cache will re-sync on next start." +} + +function Write-Err-NoExit { + param([string]$Message) + Write-Host "[ERROR] $Message" -ForegroundColor Red +} + +function Invoke-StackFailureReport { + param([hashtable]$Result) + + Write-Host "" + Write-Err-NoExit "Stack did not reach a healthy state." + if ($Result.Bad.Count -gt 0) { Write-Host (" Failed: " + ($Result.Bad -join ', ')) } + if ($Result.Waiting.Count -gt 0) { Write-Host (" Stuck: " + ($Result.Waiting -join ', ')) } + + Write-Host "" + Write-Info "Recent logs from migrations / zero-cache / backend:" + Push-Location $InstallDir + try { + Invoke-NativeSafe { docker compose logs --tail=60 migrations zero-cache backend 2>&1 } | Write-Host + } finally { + Pop-Location + } + + Write-Host "" + Write-Host "Recovery hints:" -ForegroundColor Yellow + Write-Host " 1. Inspect migrations: cd $InstallDir; docker compose logs migrations" + Write-Host " 2. Verify publication: cd $InstallDir; docker compose exec db psql -U surfsense -d surfsense -c 'SELECT pubname FROM pg_publication;'" + Write-Host " 3. Hard reset zero db: cd $InstallDir; docker compose down; docker volume rm surfsense-zero-cache; docker compose up -d" + Write-Host "" + exit 1 +} + # ── Download files ────────────────────────────────────────────────────────── Write-Step "Downloading SurfSense files" @@ -191,6 +346,8 @@ if (-not (Test-Path $envPath)) { # ── Start containers ──────────────────────────────────────────────────────── +Invoke-StaleZeroCacheCleanup + if ($MigrationMode) { $envContent = Get-Content $envPath $DbUser = ($envContent | Select-String '^DB_USER=' | ForEach-Object { ($_ -split '=',2)[1].Trim('"') }) | Select-Object -First 1 @@ -251,7 +408,13 @@ if ($MigrationMode) { Push-Location $InstallDir Invoke-NativeSafe { docker compose up -d } Pop-Location - Write-Ok "All services started." + Write-Ok "All containers started; waiting for stack to become healthy..." + + $waitResult = Wait-StackHealthy -TimeoutSec 300 + if (-not $waitResult.Ok) { + Invoke-StackFailureReport -Result $waitResult + } + Write-Ok "All services healthy." Remove-Item $KeyFile -ErrorAction SilentlyContinue @@ -260,7 +423,13 @@ if ($MigrationMode) { Push-Location $InstallDir Invoke-NativeSafe { docker compose up -d } Pop-Location - Write-Ok "All services started." + Write-Ok "All containers started; waiting for stack to become healthy..." + + $waitResult = Wait-StackHealthy -TimeoutSec 300 + if (-not $waitResult.Ok) { + Invoke-StackFailureReport -Result $waitResult + } + Write-Ok "All services healthy." } # ── Watchtower (auto-update) ──────────────────────────────────────────────── diff --git a/docker/scripts/install.sh b/docker/scripts/install.sh index fcab4d55a..db81f95eb 100644 --- a/docker/scripts/install.sh +++ b/docker/scripts/install.sh @@ -97,6 +97,163 @@ wait_for_pg() { success "PostgreSQL is ready." } +# ── Stack health helpers ───────────────────────────────────────────────────── + +# Enumerate compose services for project `surfsense` as `service|state|health|exitcode` +# lines. Uses `docker inspect` so we don't depend on `jq`, `python3`, or the +# exact ordering of fields in `docker compose ps --format json` output. +get_compose_services() { + local containers + containers=$(docker ps -a --filter "label=com.docker.compose.project=surfsense" --format '{{.Names}}' 2>/dev/null) || true + [[ -z "$containers" ]] && return 0 + + while IFS= read -r container; do + [[ -z "$container" ]] && continue + local svc state health code + svc=$(docker inspect -f '{{index .Config.Labels "com.docker.compose.service"}}' "$container" 2>/dev/null || echo "") + state=$(docker inspect -f '{{.State.Status}}' "$container" 2>/dev/null || echo "unknown") + health=$(docker inspect -f '{{if .State.Health}}{{.State.Health.Status}}{{end}}' "$container" 2>/dev/null || echo "") + code=$(docker inspect -f '{{.State.ExitCode}}' "$container" 2>/dev/null || echo "") + [[ -z "$svc" ]] && continue + printf '%s|%s|%s|%s\n' "$svc" "$state" "$health" "$code" + done <<< "$containers" +} + +# Globals populated by wait_stack_healthy / consumed by stack_failure_report. +STACK_BAD=() +STACK_WAITING=() +STACK_GOOD=() +STACK_TIMEOUT=false + +wait_stack_healthy() { + local timeout_sec=${1:-300} + local deadline=$(($(date +%s) + timeout_sec)) + local last_report="" + local bad=() + local waiting=() + local good=() + + while [[ $(date +%s) -lt $deadline ]]; do + local lines + lines=$(get_compose_services) + if [[ -z "$lines" ]]; then + sleep 3 + continue + fi + + bad=() + waiting=() + good=() + + while IFS='|' read -r name state health code; do + [[ -z "$name" ]] && continue + if [[ "$name" == "migrations" ]]; then + if [[ "$state" == "exited" && "$code" == "0" ]]; then + good+=("$name") + elif [[ "$state" == "exited" ]]; then + bad+=("${name} (exit=${code})") + else + waiting+=("${name} (${state})") + fi + continue + fi + + if [[ "$state" == "running" ]]; then + if [[ -z "$health" || "$health" == "healthy" ]]; then + good+=("$name") + elif [[ "$health" == "starting" ]]; then + waiting+=("${name} (starting)") + elif [[ "$health" == "unhealthy" ]]; then + bad+=("${name} (unhealthy)") + else + waiting+=("${name} (${health})") + fi + elif [[ "$state" == "restarting" ]]; then + bad+=("${name} (restarting)") + elif [[ "$state" == "exited" ]]; then + bad+=("${name} (exited, code=${code})") + else + waiting+=("${name} (${state})") + fi + done <<< "$lines" + + if (( ${#bad[@]} > 0 )); then + STACK_BAD=("${bad[@]}") + STACK_WAITING=("${waiting[@]}") + STACK_GOOD=("${good[@]}") + return 1 + fi + if (( ${#waiting[@]} == 0 )); then + STACK_GOOD=("${good[@]}") + return 0 + fi + + local report="Waiting on: ${waiting[*]}" + if [[ "$report" != "$last_report" ]]; then + info "$report" + last_report="$report" + fi + sleep 5 + done + + # bad/waiting/good are declared at function scope so referencing them is + # safe even if the polling loop never executed its body. + STACK_BAD=() + [[ ${#bad[@]} -gt 0 ]] && STACK_BAD=("${bad[@]}") + STACK_WAITING=() + [[ ${#waiting[@]} -gt 0 ]] && STACK_WAITING=("${waiting[@]}") + STACK_GOOD=() + [[ ${#good[@]} -gt 0 ]] && STACK_GOOD=("${good[@]}") + STACK_TIMEOUT=true + return 1 +} + +stack_failure_report() { + echo "" + echo -e "\033[31m[ERROR]\033[0m Stack did not reach a healthy state." + if (( ${#STACK_BAD[@]} > 0 )) && [[ -n "${STACK_BAD[0]}" ]]; then + echo " Failed: ${STACK_BAD[*]}" + fi + if (( ${#STACK_WAITING[@]} > 0 )) && [[ -n "${STACK_WAITING[0]}" ]]; then + echo " Stuck: ${STACK_WAITING[*]}" + fi + echo "" + info "Recent logs from migrations / zero-cache / backend:" + (cd "${INSTALL_DIR}" && ${DC} logs --tail=60 migrations zero-cache backend 2>&1) || true + echo "" + echo "Recovery hints:" + echo " 1. Inspect migrations: cd ${INSTALL_DIR} && ${DC} logs migrations" + echo " 2. Verify publication: cd ${INSTALL_DIR} && ${DC} exec db psql -U surfsense -d surfsense -c 'SELECT pubname FROM pg_publication;'" + echo " 3. Hard reset zero db: cd ${INSTALL_DIR} && ${DC} down && docker volume rm surfsense-zero-cache && ${DC} up -d" + echo "" + exit 1 +} + +# True if `surfsense-zero-cache` exists but `surfsense-zero-init` does not. +# That signals an install that predates the migrations-service fix; the old +# replica may be half-initialized and would block zero-cache on next start. +test_stale_zero_cache_volume() { + local has_zc has_zi + has_zc=$(docker volume ls --format '{{.Name}}' 2>/dev/null | grep -Fx 'surfsense-zero-cache' || true) + has_zi=$(docker volume ls --format '{{.Name}}' 2>/dev/null | grep -Fx 'surfsense-zero-init' || true) + [[ -n "$has_zc" && -z "$has_zi" ]] +} + +invoke_stale_zero_cache_cleanup() { + if ! test_stale_zero_cache_volume; then + return 0 + fi + warn "Detected pre-existing 'surfsense-zero-cache' volume from an install that" + warn "predates the migrations-service fix. It may contain a half-initialized" + warn "SQLite replica that would block zero-cache from starting." + warn "The volume will be removed in 5 seconds; press Ctrl+C to cancel." + sleep 5 + + (cd "${INSTALL_DIR}" && ${DC} down --remove-orphans 2>/dev/null) || true + docker volume rm surfsense-zero-cache 2>/dev/null || true + success "Removed surfsense-zero-cache volume; zero-cache will re-sync on next start." +} + # ── Download files ─────────────────────────────────────────────────────────── step "Downloading SurfSense files" @@ -186,6 +343,8 @@ fi # ── Start containers ───────────────────────────────────────────────────────── +invoke_stale_zero_cache_cleanup + if $MIGRATION_MODE; then # Read DB credentials from .env (fall back to defaults from docker-compose.yml) DB_USER=$(grep '^DB_USER=' "${INSTALL_DIR}/.env" 2>/dev/null | cut -d= -f2 | tr -d '"' | head -1 || true) @@ -243,7 +402,12 @@ if $MIGRATION_MODE; then step "Starting all SurfSense services" (cd "${INSTALL_DIR}" && ${DC} up -d) < /dev/null - success "All services started." + success "All containers started; waiting for stack to become healthy..." + + if ! wait_stack_healthy 300; then + stack_failure_report + fi + success "All services healthy." # Key file is no longer needed — SECRET_KEY is now in .env rm -f "${KEY_FILE}" @@ -251,7 +415,12 @@ if $MIGRATION_MODE; then else step "Starting SurfSense" (cd "${INSTALL_DIR}" && ${DC} up -d) < /dev/null - success "All services started." + success "All containers started; waiting for stack to become healthy..." + + if ! wait_stack_healthy 300; then + stack_failure_report + fi + success "All services healthy." fi # ── Watchtower (auto-update) ───────────────────────────────────────────────── diff --git a/surfsense_backend/Dockerfile b/surfsense_backend/Dockerfile index 6e1b2481e..0c783f403 100644 --- a/surfsense_backend/Dockerfile +++ b/surfsense_backend/Dockerfile @@ -167,10 +167,14 @@ COPY scripts/docker/entrypoint.sh /app/scripts/docker/entrypoint.sh RUN dos2unix /app/scripts/docker/entrypoint.sh && chmod +x /app/scripts/docker/entrypoint.sh # SERVICE_ROLE controls which process this container runs: -# api – FastAPI backend only (runs migrations on startup) +# migrate – Run alembic upgrade head, verify zero_publication exists, exit 0. +# Used by the dedicated `migrations` service in docker-compose.yml +# so downstream services gate on `service_completed_successfully`. +# api – FastAPI backend only (does NOT run migrations) # worker – Celery worker only # beat – Celery beat scheduler only -# all – All three (legacy / dev default) +# all – migrations + api + worker + beat (legacy / dev default; +# fails fast on migration error) ENV SERVICE_ROLE=all # Celery worker tuning (only used when SERVICE_ROLE=worker or all) diff --git a/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/research/tools/search_surfsense_docs.py b/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/research/tools/search_surfsense_docs.py index 0d702be4c..ccc5c49e2 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/research/tools/search_surfsense_docs.py +++ b/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/research/tools/search_surfsense_docs.py @@ -9,6 +9,7 @@ from sqlalchemy.ext.asyncio import AsyncSession from app.db import SurfsenseDocsChunk, SurfsenseDocsDocument from app.utils.document_converters import embed_text +from app.utils.surfsense_docs import surfsense_docs_public_url def format_surfsense_docs_results(results: list[tuple]) -> str: @@ -19,13 +20,14 @@ def format_surfsense_docs_results(results: list[tuple]) -> str: # Group chunks by document grouped: dict[int, dict] = {} for chunk, doc in results: + public_url = surfsense_docs_public_url(doc.source) if doc.id not in grouped: grouped[doc.id] = { "document_id": f"doc-{doc.id}", "document_type": "SURFSENSE_DOCS", "title": doc.title, - "url": doc.source, - "metadata": {"source": doc.source}, + "url": public_url, + "metadata": {"source": doc.source, "public_url": public_url}, "chunks": [], } grouped[doc.id]["chunks"].append( diff --git a/surfsense_backend/app/agents/new_chat/tools/search_surfsense_docs.py b/surfsense_backend/app/agents/new_chat/tools/search_surfsense_docs.py index 2965f2f02..d8a0efac7 100644 --- a/surfsense_backend/app/agents/new_chat/tools/search_surfsense_docs.py +++ b/surfsense_backend/app/agents/new_chat/tools/search_surfsense_docs.py @@ -17,6 +17,7 @@ from sqlalchemy.ext.asyncio import AsyncSession from app.db import SurfsenseDocsChunk, SurfsenseDocsDocument, async_session_maker from app.utils.document_converters import embed_text +from app.utils.surfsense_docs import surfsense_docs_public_url def format_surfsense_docs_results(results: list[tuple]) -> str: @@ -40,13 +41,14 @@ def format_surfsense_docs_results(results: list[tuple]) -> str: # Group chunks by document grouped: dict[int, dict] = {} for chunk, doc in results: + public_url = surfsense_docs_public_url(doc.source) if doc.id not in grouped: grouped[doc.id] = { "document_id": f"doc-{doc.id}", "document_type": "SURFSENSE_DOCS", "title": doc.title, - "url": doc.source, - "metadata": {"source": doc.source}, + "url": public_url, + "metadata": {"source": doc.source, "public_url": public_url}, "chunks": [], } grouped[doc.id]["chunks"].append( diff --git a/surfsense_backend/app/app.py b/surfsense_backend/app/app.py index 5057e7d00..fc6242643 100644 --- a/surfsense_backend/app/app.py +++ b/surfsense_backend/app/app.py @@ -945,6 +945,36 @@ async def health_check(): return {"status": "ok"} +@app.get("/ready", tags=["health"]) +@limiter.exempt +async def readiness_check(): + """Readiness probe. + + Verifies that the schema state required by downstream services is + present. Specifically checks that the ``zero_publication`` Postgres + logical-replication publication exists; without it zero-cache crash-loops + on `Unknown or invalid publications`. + + Returns 200 when ready, 503 otherwise. Used by the docker-compose + backend healthcheck and by ``install.ps1`` / ``install.sh`` post-up + verification. + """ + from sqlalchemy import text + + from app.db import async_session_maker + + async with async_session_maker() as session: + result = await session.execute( + text("SELECT 1 FROM pg_publication WHERE pubname = 'zero_publication'") + ) + if result.first() is None: + raise HTTPException( + status_code=503, + detail="zero_publication missing; run alembic upgrade head", + ) + return {"status": "ready"} + + @app.get("/verify-token") async def authenticated_route( user: User = Depends(current_active_user), diff --git a/surfsense_backend/app/routes/surfsense_docs_routes.py b/surfsense_backend/app/routes/surfsense_docs_routes.py index e1713e8a3..0d5428dec 100644 --- a/surfsense_backend/app/routes/surfsense_docs_routes.py +++ b/surfsense_backend/app/routes/surfsense_docs_routes.py @@ -24,6 +24,7 @@ from app.schemas.surfsense_docs import ( SurfsenseDocsDocumentWithChunksRead, ) from app.users import current_active_user +from app.utils.surfsense_docs import surfsense_docs_public_url router = APIRouter() @@ -76,6 +77,7 @@ async def get_surfsense_doc_by_chunk_id( id=document.id, title=document.title, source=document.source, + public_url=surfsense_docs_public_url(document.source), content=document.content, chunks=[ SurfsenseDocsChunkRead(id=c.id, content=c.content) @@ -146,6 +148,7 @@ async def list_surfsense_docs( id=doc.id, title=doc.title, source=doc.source, + public_url=surfsense_docs_public_url(doc.source), content=doc.content, created_at=doc.created_at, updated_at=doc.updated_at, diff --git a/surfsense_backend/app/schemas/surfsense_docs.py b/surfsense_backend/app/schemas/surfsense_docs.py index ce32c0ef8..3adf25032 100644 --- a/surfsense_backend/app/schemas/surfsense_docs.py +++ b/surfsense_backend/app/schemas/surfsense_docs.py @@ -22,6 +22,7 @@ class SurfsenseDocsDocumentRead(BaseModel): id: int title: str source: str + public_url: str content: str created_at: datetime | None = None updated_at: datetime | None = None @@ -35,6 +36,7 @@ class SurfsenseDocsDocumentWithChunksRead(BaseModel): id: int title: str source: str + public_url: str content: str chunks: list[SurfsenseDocsChunkRead] diff --git a/surfsense_backend/app/tasks/chat/stream_new_chat.py b/surfsense_backend/app/tasks/chat/stream_new_chat.py index da84e7350..60c3bd187 100644 --- a/surfsense_backend/app/tasks/chat/stream_new_chat.py +++ b/surfsense_backend/app/tasks/chat/stream_new_chat.py @@ -79,6 +79,7 @@ from app.tasks.chat.streaming.helpers.interrupt_inspector import ( ) from app.utils.content_utils import bootstrap_history_from_db from app.utils.perf import get_perf_logger, log_system_snapshot, trim_native_heap +from app.utils.surfsense_docs import surfsense_docs_public_url from app.utils.user_message_multimodal import build_human_message_content _background_tasks: set[asyncio.Task] = set() @@ -214,14 +215,17 @@ def format_mentioned_surfsense_docs_as_context( ) for doc in documents: - metadata_json = json.dumps({"source": doc.source}, ensure_ascii=False) + public_url = surfsense_docs_public_url(doc.source) + metadata_json = json.dumps( + {"source": doc.source, "public_url": public_url}, ensure_ascii=False + ) context_parts.append("{error.title}
{error.message}
- + )} @@ -191,21 +194,23 @@ export function LocalLoginForm() { }`} disabled={isLoggingIn} /> - + - + {authType === "LOCAL" && ( diff --git a/surfsense_web/app/(home)/login/page.tsx b/surfsense_web/app/(home)/login/page.tsx index c336e757c..42a9182e9 100644 --- a/surfsense_web/app/(home)/login/page.tsx +++ b/surfsense_web/app/(home)/login/page.tsx @@ -6,6 +6,7 @@ import { useTranslations } from "next-intl"; import { Suspense, useEffect, useState } from "react"; import { toast } from "sonner"; import { Logo } from "@/components/Logo"; +import { Button } from "@/components/ui/button"; import { useGlobalLoadingEffect } from "@/hooks/use-global-loading"; import { getAuthErrorDetails, shouldRetry } from "@/lib/auth-errors"; import { setRedirectPath } from "@/lib/auth-utils"; @@ -154,10 +155,12 @@ function LoginContent() {{urlError.title}
{urlError.message}
- + )} diff --git a/surfsense_web/app/(home)/register/page.tsx b/surfsense_web/app/(home)/register/page.tsx index 00f142567..1fd1a4ecb 100644 --- a/surfsense_web/app/(home)/register/page.tsx +++ b/surfsense_web/app/(home)/register/page.tsx @@ -9,6 +9,7 @@ import { useEffect, useState } from "react"; import { type ExternalToast, toast } from "sonner"; import { registerMutationAtom } from "@/atoms/auth/auth-mutation.atoms"; import { Logo } from "@/components/Logo"; +import { Button } from "@/components/ui/button"; import { Spinner } from "@/components/ui/spinner"; import { getAuthErrorDetails, isNetworkError, shouldRetry } from "@/lib/auth-errors"; import { getBearerToken } from "@/lib/auth-utils"; @@ -199,11 +200,13 @@ export default function RegisterPage() {{error.title}
{error.message}
- + )} @@ -295,18 +298,18 @@ export default function RegisterPage() { /> - +