#!/usr/bin/env bash # rolling_update_uvicorn.sh — Zero-downtime rolling update for uvicorn workers # # Usage: ./rolling_update_uvicorn.sh # Example: ./rolling_update_uvicorn.sh 8001 set -euo pipefail # Check if running as root or with sudo if [[ $EUID -ne 0 ]]; then echo "This script must be run as root or with sudo" exit 1 fi ### CONFIGURATION ############################################################# ENV_FILE="api/.env" RUN_DIR="run" LOG_ROOT="logs" HEALTH_CHECK_ENDPOINT="/api/v1/health" # Adjust as needed MAX_WAIT_SECONDS=310 # Max wait for graceful shutdown (5 minutes + 10 seconds grace) # Load environment to get ENVIRONMENT variable set -a && . "$ENV_FILE" && set +a ENVIRONMENT="${ENVIRONMENT:-staging}" # Set nginx upstream config based on environment if [[ "$ENVIRONMENT" == "production" ]]; then NGINX_UPSTREAM_CONF="/etc/nginx/conf.d/dograh_production_upstream.conf" UPSTREAM_NAME="dograh_production_backend" echo "Rolling update for PRODUCTION environment" else NGINX_UPSTREAM_CONF="/etc/nginx/conf.d/dograh_staging_upstream.conf" UPSTREAM_NAME="dograh_staging_backend" echo "Rolling update for STAGING environment" fi ### FUNCTIONS ################################################################## log_info() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] INFO: $*" } log_error() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] ERROR: $*" >&2 } log_warning() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] WARN: $*" } check_port_availability() { local port=$1 if lsof -Pi :$port -sTCP:LISTEN -t >/dev/null 2>&1; then return 1 # Port is in use fi return 0 # Port is available } wait_for_health_check() { local port=$1 local max_attempts=30 local attempt=0 log_info "Waiting for new uvicorn workers to be healthy on port $port..." while [ $attempt -lt $max_attempts ]; do if curl -s -o /dev/null -w "%{http_code}" "http://127.0.0.1:${port}${HEALTH_CHECK_ENDPOINT}" | grep -q "200"; then log_info "Health check passed on port $port" return 0 fi attempt=$((attempt + 1)) log_info "Health check attempt $attempt/$max_attempts..." sleep 1 done log_error "Health check failed after $max_attempts attempts" return 1 } get_old_uvicorn_pids() { local pidfile="$RUN_DIR/uvicorn.pid" local pids="" if [[ -f "$pidfile" ]]; then # Read the main PID local main_pid=$(<"$pidfile") if kill -0 "$main_pid" 2>/dev/null; then # Get all PIDs in the process group pids=$(ps -o pid= -g $(ps -o pgid= -p "$main_pid" | tr -d ' ') 2>/dev/null || echo "$main_pid") fi fi echo "$pids" } graceful_shutdown_old_workers() { local old_pids="$1" if [[ -z "$old_pids" ]]; then log_warning "No old uvicorn workers found to shut down" return 0 fi log_info "Starting graceful shutdown of old uvicorn workers (PIDs: $(echo $old_pids | tr '\n' ' '))" # Send SIGTERM to trigger graceful shutdown for pid in $old_pids; do if kill -0 "$pid" 2>/dev/null; then log_info "Sending SIGTERM to PID $pid" kill -TERM "$pid" 2>/dev/null || true fi done # Wait for processes to exit gracefully local start_time=$(date +%s) local all_dead=false while [[ $(($(date +%s) - start_time)) -lt $MAX_WAIT_SECONDS ]]; do all_dead=true for pid in $old_pids; do if kill -0 "$pid" 2>/dev/null; then all_dead=false break fi done if $all_dead; then log_info "All old workers shut down gracefully" return 0 fi log_info "Waiting for workers to complete active requests... ($(( $(date +%s) - start_time ))s elapsed)" sleep 5 done # Force kill if still running after timeout log_warning "Timeout reached, force killing remaining workers" for pid in $old_pids; do if kill -0 "$pid" 2>/dev/null; then log_warning "Force killing PID $pid" kill -KILL "$pid" 2>/dev/null || true fi done sleep 1 return 0 } update_nginx_upstream() { local new_port=$1 local old_port=$2 log_info "Updating nginx upstream configuration for $ENVIRONMENT..." # Create or update the upstream configuration cat > "${NGINX_UPSTREAM_CONF}.tmp" </dev/null || { log_error "Could not update nginx config (need sudo). Run: sudo $0 $NEW_PORT" return 1 } fi # Test nginx configuration (with sudo if needed) if nginx -t 2>/dev/null || sudo nginx -t 2>/dev/null; then log_info "Nginx configuration test passed" # Reload nginx to pick up new configuration (with sudo if needed) if nginx -s reload 2>/dev/null || sudo nginx -s reload 2>/dev/null; then log_info "Nginx reloaded successfully" else log_error "Could not reload nginx" return 1 fi else log_error "Nginx configuration test failed, rolling back" # Restore old configuration if possible if [[ -n "$old_port" ]]; then cat > "${NGINX_UPSTREAM_CONF}.tmp" </dev/null || true fi nginx -s reload 2>/dev/null || sudo nginx -s reload 2>/dev/null || true fi return 1 fi } start_new_uvicorn_workers() { local new_port=$1 log_info "Starting new uvicorn workers on port $new_port..." # Get configuration from environment set -a && . "$ENV_FILE" && set +a if [[ -z "${FASTAPI_WORKERS:-}" ]]; then log_error "FASTAPI_WORKERS environment variable is not set" return 1 fi if [[ -z "${CONDA_ENV_NAME:-}" ]]; then log_error "CONDA_ENV_NAME environment variable is not set" return 1 fi # Source conda if not already available if ! command -v conda &>/dev/null; then source /opt/conda/etc/profile.d/conda.sh fi eval "$(conda shell.bash hook)" conda activate "$CONDA_ENV_NAME" # Use the latest log directory (where start_services.sh put logs) # Resolve the symlink to get the actual directory local log_dir="$LOG_ROOT/latest" if [[ -L "$log_dir" ]]; then # It's a symlink, resolve it log_dir=$(readlink -f "$log_dir") fi if [[ ! -d "$log_dir" ]]; then log_error "No latest log directory found. Run start_services.sh first." return 1 fi # Export rotation settings export LOG_ROTATION_SIZE="${LOG_ROTATION_SIZE:-100 MB}" export LOG_RETENTION="${LOG_RETENTION:-7 days}" export LOG_COMPRESSION="${LOG_COMPRESSION:-gz}" # Create unique log filename using timestamp and script PID to avoid conflicts local script_pid=$$ # PID of this rolling_update script (for uniqueness) local timestamp=$(date '+%H%M%S') export LOG_FILE_PATH="$log_dir/uvicorn-rollover-${timestamp}-${script_pid}.log" log_info "Starting uvicorn with $FASTAPI_WORKERS workers on port $new_port" log_info "Logs: $LOG_FILE_PATH" # If running as root, switch to original user for uvicorn process if [[ $EUID -eq 0 ]] && [[ -n "${SUDO_USER:-}" ]]; then log_info "Starting uvicorn as user: $SUDO_USER (not root)" # Run uvicorn as the original user, similar to start_services.sh # Using setsid and passing LOG_FILE_PATH for loguru to pick up sudo -u "$SUDO_USER" bash -c " cd '$PWD' export HOME='$(getent passwd $SUDO_USER | cut -d: -f6)' export LOG_FILE_PATH='$LOG_FILE_PATH' export LOG_ROTATION_SIZE='$LOG_ROTATION_SIZE' export LOG_RETENTION='$LOG_RETENTION' export LOG_COMPRESSION='$LOG_COMPRESSION' set -a && source '$ENV_FILE' && set +a source /opt/conda/etc/profile.d/conda.sh conda activate '$CONDA_ENV_NAME' setsid nohup bash -c \"LOG_FILE_PATH='$LOG_FILE_PATH' uvicorn api.app:app --host 0.0.0.0 --port $new_port --workers $FASTAPI_WORKERS\" >/dev/null 2>&1 & echo \$! > '$RUN_DIR/uvicorn_new.pid' " # Read the PID that was written local new_pid=$(<"$RUN_DIR/uvicorn_new.pid") else # Start in new process group with setsid (same as start_services.sh) # Each service gets its own LOG_FILE_PATH environment variable setsid nohup bash -c "LOG_FILE_PATH='$LOG_FILE_PATH' uvicorn api.app:app --host 0.0.0.0 --port $new_port --workers $FASTAPI_WORKERS" >/dev/null 2>&1 & local new_pid=$! echo "$new_pid" > "$RUN_DIR/uvicorn_new.pid" fi # Save port information echo "$new_port" > "$RUN_DIR/uvicorn_new.port" log_info "New uvicorn started with PID $new_pid" # Wait a bit for startup sleep 5 # Check if process is still running if ! kill -0 "$new_pid" 2>/dev/null; then log_error "New uvicorn process died immediately" return 1 fi return 0 } finalize_rollover() { log_info "Finalizing rollover..." # Move new PID file to main PID file if [[ -f "$RUN_DIR/uvicorn_new.pid" ]]; then mv "$RUN_DIR/uvicorn_new.pid" "$RUN_DIR/uvicorn.pid" fi # Store the new port for reference if [[ -f "$RUN_DIR/uvicorn_new.port" ]]; then mv "$RUN_DIR/uvicorn_new.port" "$RUN_DIR/uvicorn.port" fi # Clean up old PID file if it exists rm -f "$RUN_DIR/uvicorn_old.pid" log_info "Rollover completed successfully" } rollback() { local old_port=$1 local new_pid=$2 log_error "Rolling back due to failure..." # Kill new workers if they exist if [[ -n "$new_pid" ]] && kill -0 "$new_pid" 2>/dev/null; then log_info "Killing new uvicorn workers (PID: $new_pid)" kill -KILL -"$new_pid" 2>/dev/null || kill -KILL "$new_pid" 2>/dev/null || true fi # Clean up temporary files rm -f "$RUN_DIR/uvicorn_new.pid" "$RUN_DIR/uvicorn_new.port" # Restore nginx configuration if old port is known if [[ -n "$old_port" ]]; then update_nginx_upstream "$old_port" "" fi log_error "Rollback completed" } ### MAIN LOGIC ################################################################ # Check arguments if [[ $# -ne 1 ]]; then echo "Usage: $0 " echo "Example: $0 8001" exit 1 fi NEW_PORT=$1 # Check nginx permissions early and exit if we can't update nginx if [[ ! -w $(dirname "$NGINX_UPSTREAM_CONF") ]] && [[ $EUID -ne 0 ]]; then if ! sudo -n true 2>/dev/null; then log_error "This script needs sudo access to update nginx configuration" log_error "Cannot proceed without nginx update permissions" echo "" echo "Please run with sudo:" echo " sudo $0 $NEW_PORT" echo "" exit 1 fi fi # Validate port number if ! [[ "$NEW_PORT" =~ ^[0-9]+$ ]] || [ "$NEW_PORT" -lt 1 ] || [ "$NEW_PORT" -gt 65535 ]; then log_error "Invalid port number: $NEW_PORT" exit 1 fi # Check if port is available if ! check_port_availability "$NEW_PORT"; then log_error "Port $NEW_PORT is already in use" exit 1 fi # Get old port from file or environment OLD_PORT="" if [[ -f "$RUN_DIR/uvicorn.port" ]]; then OLD_PORT=$(<"$RUN_DIR/uvicorn.port") elif [[ -f "$ENV_FILE" ]]; then set -a && . "$ENV_FILE" && set +a OLD_PORT="${FASTAPI_PORT:-}" fi if [[ "$NEW_PORT" == "$OLD_PORT" ]]; then log_error "New port is the same as old port ($NEW_PORT)" exit 1 fi log_info "Starting rolling update from port ${OLD_PORT:-unknown} to port $NEW_PORT" # Create run directory if it doesn't exist mkdir -p "$RUN_DIR" # Get old uvicorn PIDs before starting new ones OLD_PIDS=$(get_old_uvicorn_pids) if [[ -n "$OLD_PIDS" ]]; then # Save old PIDs for potential rollback echo "$OLD_PIDS" > "$RUN_DIR/uvicorn_old.pid" log_info "Found old uvicorn workers: $(echo $OLD_PIDS | tr '\n' ' ')" else log_warning "No existing uvicorn workers found" fi # Start new uvicorn workers if ! start_new_uvicorn_workers "$NEW_PORT"; then log_error "Failed to start new uvicorn workers" exit 1 fi NEW_PID=$(<"$RUN_DIR/uvicorn_new.pid") # Wait for new workers to be healthy if ! wait_for_health_check "$NEW_PORT"; then log_error "New workers failed health check" rollback "$OLD_PORT" "$NEW_PID" exit 1 fi # Update nginx to point to new workers if ! update_nginx_upstream "$NEW_PORT" "$OLD_PORT"; then log_error "Failed to update nginx configuration" rollback "$OLD_PORT" "$NEW_PID" exit 1 fi # Give nginx some time to start routing to new workers log_info "Waiting for nginx to stabilize..." sleep 5 # Gracefully shutdown old workers if [[ -n "$OLD_PIDS" ]]; then graceful_shutdown_old_workers "$OLD_PIDS" fi # Finalize the rollover finalize_rollover # Summary echo "──────────────────────────────────────────────────" echo "✓ Rolling update completed successfully" echo " Old port: ${OLD_PORT:-none}" echo " New port: $NEW_PORT" echo " New PID: $NEW_PID" echo " Logs: $LOG_ROOT/latest/" echo "──────────────────────────────────────────────────"