SurfSense/surfsense_backend/scripts/docker/entrypoint.sh
Anish Sarkar 4e00f24a03 feat(docker): add ZERO_AUTO_RESET configuration for improved replication safety
- Introduced the ZERO_AUTO_RESET environment variable to enable automatic reset of the SQLite replica in case of replication halts.
- Updated Docker Compose files to include ZERO_AUTO_RESET in service configurations.
- Enhanced documentation to clarify the purpose and usage of the new variable.
2026-06-06 14:21:14 +05:30

160 lines
5.9 KiB
Bash
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/bin/bash
set -e
# ─────────────────────────────────────────────────────────────
# SERVICE_ROLE controls which process(es) this container runs.
#
# migrate Run `alembic upgrade head`, verify zero_publication,
# then exit 0. Used by the dedicated `migrations` service
# in docker-compose.yml so downstream services can gate
# on `condition: service_completed_successfully`.
# api FastAPI backend only (does NOT run migrations)
# worker Celery worker only
# beat Celery beat scheduler only
# all migrations + api + worker + beat in one container
# (legacy / dev default; fails fast on migration error)
#
# Set SERVICE_ROLE as an environment variable in Coolify for
# each service deployment.
# ─────────────────────────────────────────────────────────────
SERVICE_ROLE="${SERVICE_ROLE:-all}"
echo "Starting SurfSense with SERVICE_ROLE=${SERVICE_ROLE}"
# ── Autoscale defaults (override via env) ────────────────────
# CELERY_MAX_WORKERS max concurrent worker processes
# CELERY_MIN_WORKERS min workers kept warm
# CELERY_QUEUES comma-separated queues to consume
# (empty = all queues for backward compat)
CELERY_MAX_WORKERS="${CELERY_MAX_WORKERS:-10}"
CELERY_MIN_WORKERS="${CELERY_MIN_WORKERS:-2}"
CELERY_MAX_TASKS_PER_CHILD="${CELERY_MAX_TASKS_PER_CHILD:-50}"
CELERY_QUEUES="${CELERY_QUEUES:-}"
# ── Graceful shutdown ────────────────────────────────────────
PIDS=()
cleanup() {
echo "Shutting down services..."
for pid in "${PIDS[@]}"; do
kill -TERM "$pid" 2>/dev/null || true
done
for pid in "${PIDS[@]}"; do
wait "$pid" 2>/dev/null || true
done
exit 0
}
trap cleanup SIGTERM SIGINT
# ── Database migrations (only for migrate / all) ─────────────
# Fail-fast contract:
# - alembic upgrade head must succeed within ${MIGRATION_TIMEOUT:-900}s
# - zero_publication must match the canonical app.zero_publication shape
# Either failure exits non-zero so the dedicated `migrations` compose
# service exits non-zero, halting the rest of the stack instead of
# silently producing a drifted Zero publication.
run_migrations() {
echo "Running database migrations..."
for i in {1..30}; do
if python -c "from app.db import engine; import asyncio; asyncio.run(engine.dispose())" 2>/dev/null; then
echo "Database is ready."
break
fi
echo "Waiting for database... ($i/30)"
sleep 1
done
local timeout_secs="${MIGRATION_TIMEOUT:-900}"
echo "Running alembic upgrade head (timeout=${timeout_secs}s)..."
if ! timeout "${timeout_secs}" alembic upgrade head; then
echo "ERROR: alembic upgrade head failed (or exceeded ${timeout_secs}s timeout)." >&2
echo "Refusing to start. Inspect the error above and re-run." >&2
exit 1
fi
echo "Migrations completed successfully."
echo "Verifying zero_publication matches the canonical shape..."
if ! python -m app.zero_publication --verify; then
echo "ERROR: zero_publication does not match the canonical shape." >&2
echo "Inspect alembic state with:" >&2
echo " docker compose exec db psql -U \"\$DB_USER\" -d \"\$DB_NAME\" -c 'SELECT * FROM alembic_version;'" >&2
exit 1
fi
}
# ── Service starters ─────────────────────────────────────────
start_api() {
echo "Starting FastAPI Backend..."
python main.py &
PIDS+=($!)
echo " FastAPI PID=${PIDS[-1]}"
}
start_worker() {
QUEUE_ARGS=""
if [ -n "${CELERY_QUEUES}" ]; then
QUEUE_ARGS="--queues=${CELERY_QUEUES}"
else
# When no queues specified, consume from the default, connectors, and
# gateway maintenance queues. Without --queues, Celery only consumes
# from the default queue, leaving connector/gateway maintenance tasks stuck.
DEFAULT_Q="${CELERY_TASK_DEFAULT_QUEUE:-surfsense}"
QUEUE_ARGS="--queues=${DEFAULT_Q},${DEFAULT_Q}.connectors,${DEFAULT_Q}.gateway"
fi
echo "Starting Celery Worker (autoscale=${CELERY_MAX_WORKERS},${CELERY_MIN_WORKERS}, max-tasks-per-child=${CELERY_MAX_TASKS_PER_CHILD}, queues=${CELERY_QUEUES:-all})..."
celery -A app.celery_app worker \
--loglevel=info \
--autoscale="${CELERY_MAX_WORKERS},${CELERY_MIN_WORKERS}" \
--max-tasks-per-child="${CELERY_MAX_TASKS_PER_CHILD}" \
--prefetch-multiplier=1 \
-Ofair \
${QUEUE_ARGS} &
PIDS+=($!)
echo " Celery Worker PID=${PIDS[-1]}"
}
start_beat() {
echo "Starting Celery Beat..."
celery -A app.celery_app beat --loglevel=info &
PIDS+=($!)
echo " Celery Beat PID=${PIDS[-1]}"
}
# ── Main: run based on role ──────────────────────────────────
case "${SERVICE_ROLE}" in
migrate)
run_migrations
echo "Migrations complete; exiting cleanly."
exit 0
;;
api)
start_api
;;
worker)
start_worker
;;
beat)
start_beat
;;
all)
run_migrations
start_api
sleep 5
start_worker
sleep 3
start_beat
;;
*)
echo "ERROR: Unknown SERVICE_ROLE '${SERVICE_ROLE}'. Use: migrate, api, worker, beat, or all"
exit 1
;;
esac
echo "All requested services started. PIDs: ${PIDS[*]}"
# Wait for any process to exit
wait -n
# If we get here, one process exited unexpectedly
exit $?