mirror of
https://github.com/dograh-hq/dograh.git
synced 2026-06-16 08:25:18 +02:00
feat: add rolling updates for production deployment (#175)
* feat: rolling update uvicorn workers * script fixes
This commit is contained in:
parent
ad6261333d
commit
aed5a782fb
10 changed files with 785 additions and 497 deletions
|
|
@ -1,42 +1,6 @@
|
|||
#!/usr/bin/env bash
|
||||
set -e # Exit on error
|
||||
|
||||
###############################################################################
|
||||
### ARGUMENT PARSING
|
||||
###############################################################################
|
||||
|
||||
DEV_MODE=false
|
||||
|
||||
show_help() {
|
||||
echo "Usage: $0 [OPTIONS]"
|
||||
echo ""
|
||||
echo "Options:"
|
||||
echo " --dev Enable development mode with auto-reload for API changes"
|
||||
echo " --help Show this help message"
|
||||
echo ""
|
||||
echo "Examples:"
|
||||
echo " $0 # Start in production mode"
|
||||
echo " $0 --dev # Start in development mode with auto-reload"
|
||||
}
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case $1 in
|
||||
--dev)
|
||||
DEV_MODE=true
|
||||
shift
|
||||
;;
|
||||
--help|-h)
|
||||
show_help
|
||||
exit 0
|
||||
;;
|
||||
*)
|
||||
echo "Unknown option: $1"
|
||||
show_help
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
###############################################################################
|
||||
### CONFIGURATION
|
||||
###############################################################################
|
||||
|
|
@ -55,16 +19,10 @@ VENV_PATH="$BASE_DIR/venv"
|
|||
|
||||
ARQ_WORKERS=${ARQ_WORKERS:-1}
|
||||
LOG_TO_FILE=${LOG_TO_FILE:-true} # Set to false in Docker to use stdout
|
||||
WAIT_FOR_PROCESSES=${WAIT_FOR_PROCESSES:-false} # Set to true in Docker to keep container alive
|
||||
|
||||
# Log startup
|
||||
cd "$BASE_DIR"
|
||||
if $DEV_MODE; then
|
||||
echo "Starting Dograh Services (DEV MODE) at $(date) in BASE_DIR: ${BASE_DIR}"
|
||||
echo "Auto-reload enabled for api/ directory changes"
|
||||
else
|
||||
echo "Starting Dograh Services at $(date) in BASE_DIR: ${BASE_DIR}"
|
||||
fi
|
||||
echo "Starting Dograh Services at $(date) in BASE_DIR: ${BASE_DIR}"
|
||||
|
||||
###############################################################################
|
||||
### 1) Load environment variables
|
||||
|
|
@ -75,10 +33,34 @@ if [[ -f "$ENV_FILE" ]]; then
|
|||
set -a && . "$ENV_FILE" && set +a
|
||||
fi
|
||||
|
||||
FASTAPI_PORT=${FASTAPI_PORT:-8000}
|
||||
UVICORN_BASE_PORT=${UVICORN_BASE_PORT:-8000}
|
||||
CPU_CORES=$(nproc 2>/dev/null || sysctl -n hw.ncpu 2>/dev/null || echo 1)
|
||||
FASTAPI_WORKERS=${FASTAPI_WORKERS:-$CPU_CORES}
|
||||
|
||||
###############################################################################
|
||||
### 1b) Safety check — refuse to start over running services
|
||||
###############################################################################
|
||||
|
||||
if [[ -d "$RUN_DIR" ]]; then
|
||||
live_count=0
|
||||
for pidfile in "$RUN_DIR"/*.pid; do
|
||||
[[ -e "$pidfile" ]] || continue
|
||||
pid=$(<"$pidfile")
|
||||
if kill -0 "$pid" 2>/dev/null; then
|
||||
live_count=$((live_count + 1))
|
||||
fi
|
||||
done
|
||||
|
||||
if [[ $live_count -gt 0 ]]; then
|
||||
echo "ERROR: $live_count service(s) are still running."
|
||||
echo ""
|
||||
echo " Stop first: ./scripts/stop_services.sh"
|
||||
echo " For a zero-downtime deploy, use: ./scripts/rolling_update.sh"
|
||||
echo ""
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
###############################################################################
|
||||
### 2) Define services
|
||||
###############################################################################
|
||||
|
|
@ -88,24 +70,20 @@ FASTAPI_WORKERS=${FASTAPI_WORKERS:-$CPU_CORES}
|
|||
SERVICE_NAMES=(
|
||||
"ari_manager"
|
||||
"campaign_orchestrator"
|
||||
"uvicorn"
|
||||
)
|
||||
|
||||
# Build uvicorn command based on mode
|
||||
if $DEV_MODE; then
|
||||
# Dev mode: single worker with auto-reload (--reload is incompatible with --workers > 1)
|
||||
UVICORN_CMD="uvicorn api.app:app --host 0.0.0.0 --port $FASTAPI_PORT --reload --reload-dir api"
|
||||
else
|
||||
# Production mode: multiple workers, no reload
|
||||
UVICORN_CMD="uvicorn api.app:app --host 0.0.0.0 --port $FASTAPI_PORT --workers $FASTAPI_WORKERS"
|
||||
fi
|
||||
|
||||
SERVICE_COMMANDS=(
|
||||
"python -m api.services.telephony.ari_manager"
|
||||
"python -m api.services.campaign.campaign_orchestrator"
|
||||
"$UVICORN_CMD"
|
||||
)
|
||||
|
||||
# Add uvicorn workers on separate ports (behind nginx least_conn)
|
||||
for ((w=0; w<FASTAPI_WORKERS; w++)); do
|
||||
port=$((UVICORN_BASE_PORT + w))
|
||||
SERVICE_NAMES+=("uvicorn_$port")
|
||||
SERVICE_COMMANDS+=("uvicorn api.app:app --host 127.0.0.1 --port $port")
|
||||
done
|
||||
|
||||
# Add ARQ workers dynamically
|
||||
for ((i=1; i<=ARQ_WORKERS; i++)); do
|
||||
SERVICE_NAMES+=("arq$i")
|
||||
|
|
@ -124,100 +102,19 @@ else
|
|||
echo "Continuing without virtual environment activation..."
|
||||
fi
|
||||
|
||||
###############################################################################
|
||||
### 4) Stop old services
|
||||
###############################################################################
|
||||
|
||||
mkdir -p "$RUN_DIR"
|
||||
|
||||
# Function to get all descendant PIDs of a process (children, grandchildren, etc.)
|
||||
get_descendants() {
|
||||
local parent_pid=$1
|
||||
local descendants=""
|
||||
local children
|
||||
|
||||
# Get direct children
|
||||
children=$(pgrep -P "$parent_pid" 2>/dev/null || true)
|
||||
|
||||
for child in $children; do
|
||||
# Recursively get descendants of each child
|
||||
descendants="$descendants $child $(get_descendants "$child")"
|
||||
done
|
||||
|
||||
echo "$descendants"
|
||||
}
|
||||
|
||||
# Function to kill a process and all its descendants
|
||||
kill_process_tree() {
|
||||
local pid=$1
|
||||
local signal=$2
|
||||
local descendants
|
||||
|
||||
descendants=$(get_descendants "$pid")
|
||||
|
||||
# Kill children first (bottom-up), then parent
|
||||
for desc_pid in $descendants; do
|
||||
if kill -0 "$desc_pid" 2>/dev/null; then
|
||||
kill "$signal" "$desc_pid" 2>/dev/null || true
|
||||
fi
|
||||
done
|
||||
|
||||
# Kill the parent
|
||||
if kill -0 "$pid" 2>/dev/null; then
|
||||
kill "$signal" "$pid" 2>/dev/null || true
|
||||
fi
|
||||
}
|
||||
|
||||
for name in "${SERVICE_NAMES[@]}"; do
|
||||
pidfile="$RUN_DIR/$name.pid"
|
||||
|
||||
if [[ -f $pidfile ]]; then
|
||||
oldpid=$(<"$pidfile")
|
||||
|
||||
if kill -0 "$oldpid" 2>/dev/null; then
|
||||
echo "Stopping $name (PID $oldpid and all descendants)…"
|
||||
|
||||
# Kill the entire process tree (parent + all descendants)
|
||||
kill_process_tree "$oldpid" "-TERM"
|
||||
sleep 4
|
||||
|
||||
# Check if parent or any descendants are still alive
|
||||
still_alive=false
|
||||
if kill -0 "$oldpid" 2>/dev/null; then
|
||||
still_alive=true
|
||||
else
|
||||
for desc_pid in $(get_descendants "$oldpid"); do
|
||||
if kill -0 "$desc_pid" 2>/dev/null; then
|
||||
still_alive=true
|
||||
break
|
||||
fi
|
||||
done
|
||||
fi
|
||||
|
||||
if $still_alive; then
|
||||
echo "⚠️ $name did not exit cleanly, forcing stop..."
|
||||
kill_process_tree "$oldpid" "-KILL"
|
||||
sleep 1
|
||||
fi
|
||||
fi
|
||||
|
||||
rm -f "$pidfile"
|
||||
else
|
||||
echo "No PID file for $name, skipping stop."
|
||||
fi
|
||||
done
|
||||
|
||||
# Clean up any port tracking files for uvicorn
|
||||
rm -f "$RUN_DIR/uvicorn.port" "$RUN_DIR/uvicorn_new.port" "$RUN_DIR/uvicorn_old.pid"
|
||||
NGINX_UPSTREAM_TEMPLATE="$BASE_DIR/nginx/dograh_upstream.conf.template"
|
||||
NGINX_UPSTREAM_CONF="/etc/nginx/conf.d/dograh_upstream.conf"
|
||||
|
||||
###############################################################################
|
||||
### 5) Run migrations
|
||||
### 4) Run migrations
|
||||
###############################################################################
|
||||
|
||||
alembic -c "$BASE_DIR/api/alembic.ini" upgrade head
|
||||
|
||||
###############################################################################
|
||||
### 6) Prepare logs
|
||||
### 7) Prepare logs
|
||||
###############################################################################
|
||||
|
||||
mkdir -p "$BASE_LOG_DIR" "$LOG_DIR"
|
||||
|
|
@ -232,7 +129,7 @@ echo "Log directory: $LOG_DIR"
|
|||
echo "Latest symlink: $LATEST_LINK -> $TIMESTAMP"
|
||||
|
||||
###############################################################################
|
||||
### 7) Start services
|
||||
### 8) Start services
|
||||
###############################################################################
|
||||
|
||||
for i in "${!SERVICE_NAMES[@]}"; do
|
||||
|
|
@ -255,22 +152,47 @@ for i in "${!SERVICE_NAMES[@]}"; do
|
|||
echo $pid >"$RUN_DIR/$name.pid"
|
||||
echo " Started with PID $pid"
|
||||
|
||||
if [[ "$name" == "uvicorn" ]]; then
|
||||
echo "$FASTAPI_PORT" >"$RUN_DIR/uvicorn.port"
|
||||
fi
|
||||
done
|
||||
|
||||
# Cold start always uses band A (for rolling_update.sh dual-band strategy)
|
||||
echo "A" > "$RUN_DIR/active_band"
|
||||
|
||||
###############################################################################
|
||||
### 8) Summary
|
||||
### 8) Generate nginx upstream config & reload
|
||||
###############################################################################
|
||||
|
||||
if [[ -f "$NGINX_UPSTREAM_TEMPLATE" ]]; then
|
||||
# Build upstream server list from worker ports
|
||||
UPSTREAM_SERVERS=""
|
||||
for ((w=0; w<FASTAPI_WORKERS; w++)); do
|
||||
port=$((UVICORN_BASE_PORT + w))
|
||||
UPSTREAM_SERVERS="${UPSTREAM_SERVERS} server 127.0.0.1:${port};\n"
|
||||
done
|
||||
|
||||
# Generate upstream config from template
|
||||
sed -e "s|{{UVICORN_UPSTREAM_SERVERS}}|${UPSTREAM_SERVERS}|" \
|
||||
"$NGINX_UPSTREAM_TEMPLATE" | sudo tee "$NGINX_UPSTREAM_CONF" > /dev/null
|
||||
|
||||
echo "Generated nginx upstream config with $FASTAPI_WORKERS workers (ports ${UVICORN_BASE_PORT}-$((UVICORN_BASE_PORT + FASTAPI_WORKERS - 1)))"
|
||||
|
||||
# Test and reload nginx
|
||||
if sudo nginx -t 2>/dev/null; then
|
||||
sudo systemctl reload nginx
|
||||
echo "Nginx reloaded successfully"
|
||||
else
|
||||
echo "ERROR: nginx config test failed, not reloading"
|
||||
sudo nginx -t
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
###############################################################################
|
||||
### 9) Summary
|
||||
###############################################################################
|
||||
|
||||
echo
|
||||
echo "──────────────────────────────────────────────────"
|
||||
if $DEV_MODE; then
|
||||
echo "Mode: DEVELOPMENT (auto-reload enabled)"
|
||||
else
|
||||
echo "Mode: PRODUCTION"
|
||||
fi
|
||||
echo "Mode: PRODUCTION"
|
||||
echo ""
|
||||
for name in "${SERVICE_NAMES[@]}"; do
|
||||
pid=$(<"$RUN_DIR/$name.pid")
|
||||
|
|
@ -284,8 +206,3 @@ echo "Logs: tail -f $LOG_DIR/*.log"
|
|||
echo "Rotated logs: ls $LOG_DIR/*.log.*"
|
||||
echo "To stop: ./scripts/stop_services.sh"
|
||||
echo "──────────────────────────────────────────────────"
|
||||
|
||||
# In Docker mode, wait for all background processes to keep container alive
|
||||
if [[ "$WAIT_FOR_PROCESSES" == "true" ]]; then
|
||||
wait
|
||||
fi
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue