mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-06-10 20:35:17 +02:00
feat(docker): add ZERO_AUTO_RESET configuration for improved replication safety
- Introduced the ZERO_AUTO_RESET environment variable to enable automatic reset of the SQLite replica in case of replication halts. - Updated Docker Compose files to include ZERO_AUTO_RESET in service configurations. - Enhanced documentation to clarify the purpose and usage of the new variable.
This commit is contained in:
parent
19fabaf011
commit
4e00f24a03
12 changed files with 304 additions and 151 deletions
|
|
@ -102,6 +102,10 @@ EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2
|
|||
# Only change this if you manage publications manually.
|
||||
# ZERO_APP_PUBLICATIONS=zero_publication
|
||||
|
||||
# Keep Zero's documented halt safety net enabled. If replication halts, Zero
|
||||
# can wipe and re-sync its local SQLite replica without touching Postgres.
|
||||
# ZERO_AUTO_RESET=true
|
||||
|
||||
# Sync worker tuning. zero-cache defaults ZERO_NUM_SYNC_WORKERS to the number
|
||||
# of CPU cores, which can exceed the connection pool limits on high-core machines.
|
||||
# Each sync worker needs at least 1 connection from both the UPSTREAM and CVR
|
||||
|
|
|
|||
|
|
@ -114,6 +114,7 @@ services:
|
|||
- ZERO_REPLICA_FILE=/data/zero.db
|
||||
- ZERO_ADMIN_PASSWORD=${ZERO_ADMIN_PASSWORD:-surfsense-zero-admin}
|
||||
- ZERO_APP_PUBLICATIONS=${ZERO_APP_PUBLICATIONS:-zero_publication}
|
||||
- ZERO_AUTO_RESET=${ZERO_AUTO_RESET:-true}
|
||||
- ZERO_NUM_SYNC_WORKERS=${ZERO_NUM_SYNC_WORKERS:-4}
|
||||
- ZERO_UPSTREAM_MAX_CONNS=${ZERO_UPSTREAM_MAX_CONNS:-20}
|
||||
- ZERO_CVR_MAX_CONNS=${ZERO_CVR_MAX_CONNS:-30}
|
||||
|
|
@ -122,11 +123,13 @@ services:
|
|||
volumes:
|
||||
- zero_cache_data:/data
|
||||
restart: unless-stopped
|
||||
stop_grace_period: 300s
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:4848/keepalive"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
start_period: 600s
|
||||
|
||||
# OPTIONAL — Azurite emulates Azure Blob Storage for testing the Azure
|
||||
# original-file backend. The default filesystem backend needs none of this.
|
||||
|
|
|
|||
|
|
@ -46,8 +46,6 @@ services:
|
|||
- PYTHONPATH=/app
|
||||
- SERVICE_ROLE=migrate
|
||||
- MIGRATION_TIMEOUT=${MIGRATION_TIMEOUT:-900}
|
||||
volumes:
|
||||
- zero_init:/zero-init
|
||||
depends_on:
|
||||
db:
|
||||
condition: service_healthy
|
||||
|
|
@ -235,6 +233,7 @@ services:
|
|||
- ZERO_REPLICA_FILE=/data/zero.db
|
||||
- ZERO_ADMIN_PASSWORD=${ZERO_ADMIN_PASSWORD:-surfsense-zero-admin}
|
||||
- ZERO_APP_PUBLICATIONS=${ZERO_APP_PUBLICATIONS:-zero_publication}
|
||||
- ZERO_AUTO_RESET=${ZERO_AUTO_RESET:-true}
|
||||
- ZERO_NUM_SYNC_WORKERS=${ZERO_NUM_SYNC_WORKERS:-4}
|
||||
- ZERO_UPSTREAM_MAX_CONNS=${ZERO_UPSTREAM_MAX_CONNS:-20}
|
||||
- ZERO_CVR_MAX_CONNS=${ZERO_CVR_MAX_CONNS:-30}
|
||||
|
|
@ -242,18 +241,14 @@ services:
|
|||
- ZERO_MUTATE_URL=${ZERO_MUTATE_URL:-http://frontend:3000/api/zero/mutate}
|
||||
volumes:
|
||||
- zero_cache_data:/data
|
||||
- zero_init:/zero-init
|
||||
# Wrapper: see docker/docker-compose.yml `zero-cache` for rationale.
|
||||
entrypoint: ["sh", "-c"]
|
||||
# Pass the script as a single list element so Compose does not tokenize it.
|
||||
command:
|
||||
- 'if [ -f /zero-init/needs_reset ]; then echo "[zero-init] publication change detected; wiping replica file(s) under /data" && rm -f /data/zero.db /data/zero.db-shm /data/zero.db-wal && rm -f /zero-init/needs_reset; fi; exec zero-cache'
|
||||
restart: unless-stopped
|
||||
stop_grace_period: 300s
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:4848/keepalive"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
start_period: 600s
|
||||
|
||||
frontend:
|
||||
build:
|
||||
|
|
@ -285,7 +280,5 @@ volumes:
|
|||
name: surfsense-dev-shared-temp
|
||||
zero_cache_data:
|
||||
name: surfsense-dev-zero-cache
|
||||
zero_init:
|
||||
name: surfsense-dev-zero-init
|
||||
whatsapp_sessions:
|
||||
name: surfsense-dev-whatsapp-sessions
|
||||
|
|
|
|||
|
|
@ -29,10 +29,9 @@ services:
|
|||
|
||||
# Short-lived schema runner. Executes `alembic upgrade head` and verifies
|
||||
# that the `zero_publication` Postgres logical-replication publication
|
||||
# exists, then exits 0. Downstream services (backend, celery_*, zero-cache)
|
||||
# gate on this with `condition: service_completed_successfully` so a failed
|
||||
# migration halts the whole stack instead of silently producing a half-built
|
||||
# system that crash-loops zero-cache on missing publications.
|
||||
# matches the canonical shape, then exits 0. Downstream services gate on this
|
||||
# with `condition: service_completed_successfully` so a failed migration halts
|
||||
# the whole stack instead of booting zero-cache against a drifted publication.
|
||||
migrations:
|
||||
image: ghcr.io/modsetter/surfsense-backend:${SURFSENSE_VERSION:-latest}${SURFSENSE_VARIANT:+-${SURFSENSE_VARIANT}}
|
||||
env_file:
|
||||
|
|
@ -42,8 +41,6 @@ services:
|
|||
PYTHONPATH: /app
|
||||
SERVICE_ROLE: migrate
|
||||
MIGRATION_TIMEOUT: ${MIGRATION_TIMEOUT:-900}
|
||||
volumes:
|
||||
- zero_init:/zero-init
|
||||
depends_on:
|
||||
db:
|
||||
condition: service_healthy
|
||||
|
|
@ -231,6 +228,7 @@ services:
|
|||
ZERO_REPLICA_FILE: /data/zero.db
|
||||
ZERO_ADMIN_PASSWORD: ${ZERO_ADMIN_PASSWORD:-surfsense-zero-admin}
|
||||
ZERO_APP_PUBLICATIONS: ${ZERO_APP_PUBLICATIONS:-zero_publication}
|
||||
ZERO_AUTO_RESET: ${ZERO_AUTO_RESET:-true}
|
||||
ZERO_NUM_SYNC_WORKERS: ${ZERO_NUM_SYNC_WORKERS:-4}
|
||||
ZERO_UPSTREAM_MAX_CONNS: ${ZERO_UPSTREAM_MAX_CONNS:-20}
|
||||
ZERO_CVR_MAX_CONNS: ${ZERO_CVR_MAX_CONNS:-30}
|
||||
|
|
@ -238,16 +236,8 @@ services:
|
|||
ZERO_MUTATE_URL: ${ZERO_MUTATE_URL:-http://frontend:3000/api/zero/mutate}
|
||||
volumes:
|
||||
- zero_cache_data:/data
|
||||
- zero_init:/zero-init
|
||||
# Wrapper: if the migrations service flagged a publication change via
|
||||
# /zero-init/needs_reset, wipe the SQLite replica before starting so
|
||||
# zero-cache does a clean initial sync. Recovers from the half-built
|
||||
# replica state (`_zero.tableMetadata` missing) caused by earlier crashes.
|
||||
entrypoint: ["sh", "-c"]
|
||||
# Pass the script as a single list element so Compose does not tokenize it.
|
||||
command:
|
||||
- 'if [ -f /zero-init/needs_reset ]; then echo "[zero-init] publication change detected; wiping replica file(s) under /data" && rm -f /data/zero.db /data/zero.db-shm /data/zero.db-wal && rm -f /zero-init/needs_reset; fi; exec zero-cache'
|
||||
restart: unless-stopped
|
||||
stop_grace_period: 300s
|
||||
depends_on:
|
||||
db:
|
||||
condition: service_healthy
|
||||
|
|
@ -258,6 +248,7 @@ services:
|
|||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
start_period: 600s
|
||||
|
||||
frontend:
|
||||
image: ghcr.io/modsetter/surfsense-web:${SURFSENSE_VERSION:-latest}
|
||||
|
|
@ -289,7 +280,5 @@ volumes:
|
|||
name: surfsense-shared-temp
|
||||
zero_cache_data:
|
||||
name: surfsense-zero-cache
|
||||
zero_init:
|
||||
name: surfsense-zero-init
|
||||
whatsapp_sessions:
|
||||
name: surfsense-whatsapp-sessions
|
||||
|
|
|
|||
|
|
@ -153,34 +153,6 @@ function Wait-ForPostgres {
|
|||
|
||||
# ── Stack startup helper ────────────────────────────────────────────────────
|
||||
|
||||
function Test-StaleZeroCacheVolume {
|
||||
$raw = Invoke-NativeSafe { docker volume ls --format '{{.Name}}' 2>$null }
|
||||
if ([string]::IsNullOrWhiteSpace($raw)) { return $false }
|
||||
$names = $raw -split "`r?`n" | ForEach-Object { $_.Trim() } | Where-Object { $_ }
|
||||
$hasZeroCache = $names -contains 'surfsense-zero-cache'
|
||||
$hasZeroInit = $names -contains 'surfsense-zero-init'
|
||||
# Pre-fix installs created surfsense-zero-cache but never surfsense-zero-init.
|
||||
# Such a volume may hold a half-initialized SQLite replica from an earlier
|
||||
# crash-loop. Wiping it forces zero-cache to do a fresh initial sync.
|
||||
return ($hasZeroCache -and -not $hasZeroInit)
|
||||
}
|
||||
|
||||
function Invoke-StaleZeroCacheCleanup {
|
||||
if (-not (Test-StaleZeroCacheVolume)) { return }
|
||||
|
||||
Write-Warn "Detected pre-existing 'surfsense-zero-cache' volume from an install that"
|
||||
Write-Warn "predates the migrations-service fix. It may contain a half-initialized"
|
||||
Write-Warn "SQLite replica that would block zero-cache from starting."
|
||||
Write-Warn "The volume will be removed in 5 seconds; press Ctrl+C to cancel."
|
||||
Start-Sleep -Seconds 5
|
||||
|
||||
Push-Location $InstallDir
|
||||
Invoke-NativeSafe { docker compose down --remove-orphans 2>$null } | Out-Null
|
||||
Pop-Location
|
||||
Invoke-NativeSafe { docker volume rm surfsense-zero-cache 2>$null } | Out-Null
|
||||
Write-Ok "Removed surfsense-zero-cache volume; zero-cache will re-sync on next start."
|
||||
}
|
||||
|
||||
function Invoke-StackFailureReport {
|
||||
Write-Host ""
|
||||
Write-Host "[ERROR] Stack did not reach a healthy state." -ForegroundColor Red
|
||||
|
|
@ -443,8 +415,6 @@ if (-not (Test-Path $envPath)) {
|
|||
|
||||
# ── Start containers ────────────────────────────────────────────────────────
|
||||
|
||||
Invoke-StaleZeroCacheCleanup
|
||||
|
||||
if ($MigrationMode) {
|
||||
$envContent = Get-Content $envPath
|
||||
$DbUser = ($envContent | Select-String '^DB_USER=' | ForEach-Object { ($_ -split '=',2)[1].Trim('"') }) | Select-Object -First 1
|
||||
|
|
|
|||
|
|
@ -189,31 +189,6 @@ compose_up_wait() {
|
|||
fi
|
||||
}
|
||||
|
||||
# True if `surfsense-zero-cache` exists but `surfsense-zero-init` does not.
|
||||
# That signals an install that predates the migrations-service fix; the old
|
||||
# replica may be half-initialized and would block zero-cache on next start.
|
||||
test_stale_zero_cache_volume() {
|
||||
local has_zc has_zi
|
||||
has_zc=$(docker volume ls --format '{{.Name}}' 2>/dev/null | grep -Fx 'surfsense-zero-cache' || true)
|
||||
has_zi=$(docker volume ls --format '{{.Name}}' 2>/dev/null | grep -Fx 'surfsense-zero-init' || true)
|
||||
[[ -n "$has_zc" && -z "$has_zi" ]]
|
||||
}
|
||||
|
||||
invoke_stale_zero_cache_cleanup() {
|
||||
if ! test_stale_zero_cache_volume; then
|
||||
return 0
|
||||
fi
|
||||
warn "Detected pre-existing 'surfsense-zero-cache' volume from an install that"
|
||||
warn "predates the migrations-service fix. It may contain a half-initialized"
|
||||
warn "SQLite replica that would block zero-cache from starting."
|
||||
warn "The volume will be removed in 5 seconds; press Ctrl+C to cancel."
|
||||
sleep 5
|
||||
|
||||
(cd "${INSTALL_DIR}" && ${DC} down --remove-orphans 2>/dev/null) || true
|
||||
docker volume rm surfsense-zero-cache 2>/dev/null || true
|
||||
success "Removed surfsense-zero-cache volume; zero-cache will re-sync on next start."
|
||||
}
|
||||
|
||||
# ── Variant and .env helpers ─────────────────────────────────────────────────
|
||||
|
||||
set_env_value() {
|
||||
|
|
@ -448,8 +423,6 @@ fi
|
|||
|
||||
# ── Start containers ─────────────────────────────────────────────────────────
|
||||
|
||||
invoke_stale_zero_cache_cleanup
|
||||
|
||||
if $MIGRATION_MODE; then
|
||||
# Read DB credentials from .env (fall back to defaults from docker-compose.yml)
|
||||
DB_USER=$(grep '^DB_USER=' "${INSTALL_DIR}/.env" 2>/dev/null | cut -d= -f2 | tr -d '"' | head -1 || true)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue