From 39dade175508e03c14bcbb16ecd9e068b96a5303 Mon Sep 17 00:00:00 2001 From: Abhishek Kumar Date: Fri, 26 Sep 2025 10:57:51 +0530 Subject: [PATCH] Remove axiom and fix scripts --- api/logging_config.py | 128 ++-------------------- api/requirements.txt | 1 - scripts/rolling_update_uvicorn.sh | 176 +++--------------------------- scripts/start_services.sh | 106 +++--------------- 4 files changed, 43 insertions(+), 368 deletions(-) diff --git a/api/logging_config.py b/api/logging_config.py index de68fb3..a8ea420 100644 --- a/api/logging_config.py +++ b/api/logging_config.py @@ -1,73 +1,21 @@ -import atexit -import logging import os -import queue import sys -from logging.handlers import QueueHandler, QueueListener import loguru -from axiom_py import Client -from axiom_py.logging import AxiomHandler from pipecat.utils.context import run_id_var, turn_var from api.enums import Environment from api.utils.worker import get_worker_id, is_worker_process -# ----- NEW CODE START ----- -# Helper to map string log level to Python logging level, adding support for "TRACE" -TRACE_LEVEL_NUM = 5 # Below DEBUG (10) - - -def _get_logging_level(level_name: str) -> int: - """Return numeric logging level for a given level name. - - Supports the standard logging levels as well as the custom ``TRACE`` level - used by *loguru*. If ``TRACE`` is requested and not yet defined in the - ``logging`` module, it will be registered dynamically. - """ - level_name = level_name.upper() - - # Standard levels are present on the ``logging`` module. - if hasattr(logging, level_name): - return getattr(logging, level_name) - - # Add support for TRACE (finer-grained than DEBUG) - if level_name == "TRACE": - if not hasattr(logging, "TRACE"): - logging.addLevelName(TRACE_LEVEL_NUM, "TRACE") - - def trace(self, message, *args, **kwargs): # type: ignore[override] - if self.isEnabledFor(TRACE_LEVEL_NUM): - self._log(TRACE_LEVEL_NUM, message, args, **kwargs) - - logging.Logger.trace = trace # type: ignore[attr-defined] - return TRACE_LEVEL_NUM - - # Fallback to DEBUG if an unknown level is provided - return logging.DEBUG - - -# ----- NEW CODE END ----- - ENVIRONMENT = os.getenv("ENVIRONMENT", Environment.LOCAL.value) ENABLE_TURN_LOGGING = os.getenv("ENABLE_TURN_LOGGING", "false").lower() == "true" -# Log rotation settings from environment -LOG_ROTATION_SIZE = os.getenv("LOG_ROTATION_SIZE", "100 MB") # e.g., "100 MB", "1 GB" -LOG_ROTATION_TIME = os.getenv("LOG_ROTATION_TIME", None) # e.g., "00:00", "12:00" -LOG_RETENTION = os.getenv( - "LOG_RETENTION", "7 days" -) # e.g., "7 days", "1 week", "10 files" -LOG_COMPRESSION = os.getenv( - "LOG_COMPRESSION", "gz" -) # "gz", "bz2", "xz", "tar", "tar.gz", "tar.bz2", "tar.xz", "zip" -LOG_FILE_PATH = os.getenv( - "LOG_FILE_PATH", None -) # If set, write to file instead of stdout +# We write different uvicorn forked worker log to a different +# file which is then synced to cloudwatch logs +LOG_FILE_PATH = os.getenv("LOG_FILE_PATH", None) # Track if logging has been initialized _logging_initialized = False -_axiom_listener = None def inject_run_id(record): @@ -97,11 +45,11 @@ def inject_run_id(record): def setup_logging(): """Set up logging for the main application""" - global _logging_initialized, _axiom_listener + global _logging_initialized # Return early if already initialized if _logging_initialized: - return _axiom_listener + return log_level = os.getenv("LOG_LEVEL", "DEBUG").upper() @@ -125,31 +73,8 @@ def setup_logging(): else: log_format = "{time:YYYY-MM-DD HH:mm:ss.SSS} | {level} | [run_id={extra[run_id]}] | {file.name}:{line} | {message}" - # Add handler - either file with rotation or console + # Add handler - either file or console if LOG_FILE_PATH: - # File handler with rotation - rotation_config = {} - - # Size-based rotation (e.g., "100 MB", "1 GB") - if LOG_ROTATION_SIZE: - rotation_config["rotation"] = LOG_ROTATION_SIZE - - # Time-based rotation (e.g., "00:00" for daily at midnight) - if LOG_ROTATION_TIME: - rotation_config["rotation"] = LOG_ROTATION_TIME - - # If no rotation specified, default to 100 MB - if not rotation_config: - rotation_config["rotation"] = "100 MB" - - # Retention policy (e.g., "7 days", "10 files") - if LOG_RETENTION: - rotation_config["retention"] = LOG_RETENTION - - # Compression format - if LOG_COMPRESSION and LOG_COMPRESSION.lower() != "none": - rotation_config["compression"] = LOG_COMPRESSION - # Determine the actual log file path actual_log_path = LOG_FILE_PATH @@ -159,7 +84,6 @@ def setup_logging(): # Split the path to insert worker ID before extension base_path, ext = os.path.splitext(LOG_FILE_PATH) actual_log_path = f"{base_path}-worker-{worker_id}{ext}" - loguru.logger.info(f"Worker {worker_id} will log to: {actual_log_path}") patched.add( actual_log_path, @@ -167,7 +91,6 @@ def setup_logging(): level=log_level, colorize=False, # No colors in file logs enqueue=True, # Thread-safe writing - **rotation_config, ) else: # Console handler (existing behavior) @@ -178,40 +101,5 @@ def setup_logging(): colorize=True, ) - # Set up queue-based logging for Axiom - log_q = queue.Queue(-1) # infinite size (tweak if needed) - queue_handler = QueueHandler(log_q) # puts LogRecord on the queue - queue_handler.setLevel(_get_logging_level(log_level)) - - # Set up Axiom handler if credentials are available - axiom_token = os.environ.get("AXIOM_TOKEN") - axiom_org = os.environ.get("AXIOM_ORG") - axiom_dataset = os.getenv("AXIOM_LOG_DATASET") - - if axiom_token and axiom_org and axiom_dataset: - client = Client(token=axiom_token, org_id=axiom_org) - axiom_handler = AxiomHandler(client, axiom_dataset) - axiom_handler.setLevel(_get_logging_level(log_level)) - - listener = QueueListener( - log_q, - axiom_handler, - respect_handler_level=True, - ) - listener.start() - - patched.add(queue_handler, level=log_level, enqueue=False) - - # Register cleanup - atexit.register(listener.stop) - - # Return the listener for manual cleanup if needed - loguru.logger = patched - _logging_initialized = True - _axiom_listener = listener - return listener - else: - # No Axiom logging available - loguru.logger = patched - _logging_initialized = True - return None + loguru.logger = patched + _logging_initialized = True diff --git a/api/requirements.txt b/api/requirements.txt index 4e3644c..d92d3e1 100644 --- a/api/requirements.txt +++ b/api/requirements.txt @@ -7,7 +7,6 @@ redis==5.3.1 uvicorn==0.35.0 aioboto3==15.1.0 arq==0.26.3 -axiom-py==0.9.0 twilio==9.8.0 minio==7.2.16 alembic-postgresql-enum==1.8.0 diff --git a/scripts/rolling_update_uvicorn.sh b/scripts/rolling_update_uvicorn.sh index e6b0100..8d0c85b 100755 --- a/scripts/rolling_update_uvicorn.sh +++ b/scripts/rolling_update_uvicorn.sh @@ -1,38 +1,18 @@ #!/usr/bin/env bash # rolling_update_uvicorn.sh — Zero-downtime rolling update for uvicorn workers -# -# Usage: ./rolling_update_uvicorn.sh -# Example: ./rolling_update_uvicorn.sh 8001 -set -euo pipefail - -# Check if running as root or with sudo -if [[ $EUID -ne 0 ]]; then - echo "This script must be run as root or with sudo" - exit 1 -fi +set -e # Exit on error ### CONFIGURATION ############################################################# ENV_FILE="api/.env" RUN_DIR="run" LOG_ROOT="logs" +VENV_PATH="/home/ubuntu/dograh_venv" HEALTH_CHECK_ENDPOINT="/api/v1/health" # Adjust as needed MAX_WAIT_SECONDS=310 # Max wait for graceful shutdown (5 minutes + 10 seconds grace) -# Load environment to get ENVIRONMENT variable +# Load environment set -a && . "$ENV_FILE" && set +a -ENVIRONMENT="${ENVIRONMENT:-staging}" - -# Set nginx upstream config based on environment -if [[ "$ENVIRONMENT" == "production" ]]; then - NGINX_UPSTREAM_CONF="/etc/nginx/conf.d/dograh_production_upstream.conf" - UPSTREAM_NAME="dograh_production_backend" - echo "Rolling update for PRODUCTION environment" -else - NGINX_UPSTREAM_CONF="/etc/nginx/conf.d/dograh_staging_upstream.conf" - UPSTREAM_NAME="dograh_staging_backend" - echo "Rolling update for STAGING environment" -fi ### FUNCTIONS ################################################################## @@ -146,61 +126,6 @@ graceful_shutdown_old_workers() { return 0 } -update_nginx_upstream() { - local new_port=$1 - local old_port=$2 - - log_info "Updating nginx upstream configuration for $ENVIRONMENT..." - - # Create or update the upstream configuration - cat > "${NGINX_UPSTREAM_CONF}.tmp" </dev/null || { - log_error "Could not update nginx config (need sudo). Run: sudo $0 $NEW_PORT" - return 1 - } - fi - - # Test nginx configuration (with sudo if needed) - if nginx -t 2>/dev/null || sudo nginx -t 2>/dev/null; then - log_info "Nginx configuration test passed" - # Reload nginx to pick up new configuration (with sudo if needed) - if nginx -s reload 2>/dev/null || sudo nginx -s reload 2>/dev/null; then - log_info "Nginx reloaded successfully" - else - log_error "Could not reload nginx" - return 1 - fi - else - log_error "Nginx configuration test failed, rolling back" - # Restore old configuration if possible - if [[ -n "$old_port" ]]; then - cat > "${NGINX_UPSTREAM_CONF}.tmp" </dev/null || true - fi - nginx -s reload 2>/dev/null || sudo nginx -s reload 2>/dev/null || true - fi - return 1 - fi -} - start_new_uvicorn_workers() { local new_port=$1 @@ -214,36 +139,17 @@ start_new_uvicorn_workers() { return 1 fi - if [[ -z "${CONDA_ENV_NAME:-}" ]]; then - log_error "CONDA_ENV_NAME environment variable is not set" - return 1 - fi - - # Source conda if not already available - if ! command -v conda &>/dev/null; then - source /opt/conda/etc/profile.d/conda.sh - fi - eval "$(conda shell.bash hook)" - conda activate "$CONDA_ENV_NAME" + # Activate virtual environment + source ${VENV_PATH}/bin/activate # Use the latest log directory (where start_services.sh put logs) - # Resolve the symlink to get the actual directory local log_dir="$LOG_ROOT/latest" - if [[ -L "$log_dir" ]]; then - # It's a symlink, resolve it - log_dir=$(readlink -f "$log_dir") - fi if [[ ! -d "$log_dir" ]]; then log_error "No latest log directory found. Run start_services.sh first." return 1 fi - # Export rotation settings - export LOG_ROTATION_SIZE="${LOG_ROTATION_SIZE:-100 MB}" - export LOG_RETENTION="${LOG_RETENTION:-7 days}" - export LOG_COMPRESSION="${LOG_COMPRESSION:-gz}" - # Create unique log filename using timestamp and script PID to avoid conflicts local script_pid=$$ # PID of this rolling_update script (for uniqueness) local timestamp=$(date '+%H%M%S') @@ -252,35 +158,12 @@ start_new_uvicorn_workers() { log_info "Starting uvicorn with $FASTAPI_WORKERS workers on port $new_port" log_info "Logs: $LOG_FILE_PATH" - # If running as root, switch to original user for uvicorn process - if [[ $EUID -eq 0 ]] && [[ -n "${SUDO_USER:-}" ]]; then - log_info "Starting uvicorn as user: $SUDO_USER (not root)" - - # Run uvicorn as the original user, similar to start_services.sh - # Using setsid and passing LOG_FILE_PATH for loguru to pick up - sudo -u "$SUDO_USER" bash -c " - cd '$PWD' - export HOME='$(getent passwd $SUDO_USER | cut -d: -f6)' - export LOG_FILE_PATH='$LOG_FILE_PATH' - export LOG_ROTATION_SIZE='$LOG_ROTATION_SIZE' - export LOG_RETENTION='$LOG_RETENTION' - export LOG_COMPRESSION='$LOG_COMPRESSION' - set -a && source '$ENV_FILE' && set +a - source /opt/conda/etc/profile.d/conda.sh - conda activate '$CONDA_ENV_NAME' - setsid nohup bash -c \"LOG_FILE_PATH='$LOG_FILE_PATH' uvicorn api.app:app --host 0.0.0.0 --port $new_port --workers $FASTAPI_WORKERS\" >/dev/null 2>&1 & - echo \$! > '$RUN_DIR/uvicorn_new.pid' - " - # Read the PID that was written - local new_pid=$(<"$RUN_DIR/uvicorn_new.pid") - else - # Start in new process group with setsid (same as start_services.sh) - # Each service gets its own LOG_FILE_PATH environment variable - setsid nohup bash -c "LOG_FILE_PATH='$LOG_FILE_PATH' uvicorn api.app:app --host 0.0.0.0 --port $new_port --workers $FASTAPI_WORKERS" >/dev/null 2>&1 & - - local new_pid=$! - echo "$new_pid" > "$RUN_DIR/uvicorn_new.pid" - fi + # Start in new process group with setsid (same as start_services.sh) + # Each service gets its own LOG_FILE_PATH environment variable + setsid nohup bash -c "LOG_FILE_PATH='$LOG_FILE_PATH' uvicorn api.app:app --host 0.0.0.0 --port $new_port --workers $FASTAPI_WORKERS" >/dev/null 2>&1 & + + local new_pid=$! + echo "$new_pid" > "$RUN_DIR/uvicorn_new.pid" # Save port information echo "$new_port" > "$RUN_DIR/uvicorn_new.port" @@ -321,23 +204,18 @@ finalize_rollover() { rollback() { local old_port=$1 local new_pid=$2 - + log_error "Rolling back due to failure..." - + # Kill new workers if they exist if [[ -n "$new_pid" ]] && kill -0 "$new_pid" 2>/dev/null; then log_info "Killing new uvicorn workers (PID: $new_pid)" kill -KILL -"$new_pid" 2>/dev/null || kill -KILL "$new_pid" 2>/dev/null || true fi - + # Clean up temporary files rm -f "$RUN_DIR/uvicorn_new.pid" "$RUN_DIR/uvicorn_new.port" - - # Restore nginx configuration if old port is known - if [[ -n "$old_port" ]]; then - update_nginx_upstream "$old_port" "" - fi - + log_error "Rollback completed" } @@ -352,19 +230,6 @@ fi NEW_PORT=$1 -# Check nginx permissions early and exit if we can't update nginx -if [[ ! -w $(dirname "$NGINX_UPSTREAM_CONF") ]] && [[ $EUID -ne 0 ]]; then - if ! sudo -n true 2>/dev/null; then - log_error "This script needs sudo access to update nginx configuration" - log_error "Cannot proceed without nginx update permissions" - echo "" - echo "Please run with sudo:" - echo " sudo $0 $NEW_PORT" - echo "" - exit 1 - fi -fi - # Validate port number if ! [[ "$NEW_PORT" =~ ^[0-9]+$ ]] || [ "$NEW_PORT" -lt 1 ] || [ "$NEW_PORT" -gt 65535 ]; then log_error "Invalid port number: $NEW_PORT" @@ -421,15 +286,8 @@ if ! wait_for_health_check "$NEW_PORT"; then exit 1 fi -# Update nginx to point to new workers -if ! update_nginx_upstream "$NEW_PORT" "$OLD_PORT"; then - log_error "Failed to update nginx configuration" - rollback "$OLD_PORT" "$NEW_PID" - exit 1 -fi - -# Give nginx some time to start routing to new workers -log_info "Waiting for nginx to stabilize..." +# Give the system some time to stabilize before shutting down old workers +log_info "Waiting for system to stabilize..." sleep 5 # Gracefully shutdown old workers diff --git a/scripts/start_services.sh b/scripts/start_services.sh index ed36ad1..e02b262 100755 --- a/scripts/start_services.sh +++ b/scripts/start_services.sh @@ -1,18 +1,22 @@ #!/usr/bin/env bash -# restart_services.sh — safer, simplified +# start_services.sh -set -euo pipefail +set -e # Exit on error ### CONFIGURATION ############################################################# ENV_FILE="api/.env" RUN_DIR="run" # where we keep *.pid LOG_ROOT="logs" +VENV_PATH="/home/ubuntu/dograh_venv" +ARQ_WORKERS=${ARQ_WORKERS:-1} -### 1) Load environment vars so that configurations like FASTAPI_WORKERS are loaded # +# Log startup +echo "Starting Dograh Services at $(date)" + +### 1) Load environment vars so that configurations like FASTAPI_WORKERS are loaded set -a && . "$ENV_FILE" && set +a -# Get ENVIRONMENT for nginx config selection -ENVIRONMENT="${ENVIRONMENT:-staging}" +cd /home/ubuntu/app if [[ -z "${FASTAPI_PORT:-}" ]]; then echo "Error: FASTAPI_PORT environment variable is not set." @@ -24,14 +28,6 @@ if [[ -z "${FASTAPI_WORKERS:-}" ]]; then exit 1 fi -if [[ -z "${CONDA_ENV_NAME:-}" ]]; then - echo "Error: CONDA_ENV_NAME environment variable is not set." - exit 1 -fi - -# Default ARQ_WORKERS to 1 if not set -ARQ_WORKERS=${ARQ_WORKERS:-1} - # map "service name" → "command to run" declare -A SERVICES=( [ari_manager]="python -m api.services.telephony.ari_manager" @@ -44,13 +40,8 @@ for ((i=1; i<=ARQ_WORKERS; i++)); do SERVICES[arq$i]="python -m arq api.tasks.arq.WorkerSettings --custom-log-dict api.tasks.arq.LOG_CONFIG" done -### 2) Activate conda ######################################################### -# Source conda if not already available (needed when running from systemd) -if ! command -v conda &>/dev/null; then - source /opt/conda/etc/profile.d/conda.sh -fi -eval "$(conda shell.bash hook)" -conda activate "$CONDA_ENV_NAME" +### 2) Activate virtual environment ######################################### +source ${VENV_PATH}/bin/activate ### 3) Stop old services (only via PID files) ################################# mkdir -p "$RUN_DIR" @@ -58,16 +49,16 @@ for name in "${!SERVICES[@]}"; do pidfile="$RUN_DIR/$name.pid" if [[ -f $pidfile ]]; then oldpid=$(<"$pidfile") - if kill -0 "$oldpid" 2>/dev/null; then + if kill -0 "$oldpid"; then echo "Stopping $name (PID $oldpid and its process group)…" # Kill the entire process group (negative PID) # First try SIGTERM - kill -TERM -"$oldpid" 2>/dev/null || kill -TERM "$oldpid" 2>/dev/null || true + kill -TERM -"$oldpid" || kill -TERM "$oldpid" || true sleep 4 # If still running, use SIGKILL - if kill -0 "$oldpid" 2>/dev/null; then + if kill -0 "$oldpid"; then echo "⚠️ $name did not exit cleanly, forcing stop..." - kill -KILL -"$oldpid" 2>/dev/null || kill -KILL "$oldpid" 2>/dev/null || true + kill -KILL -"$oldpid" || kill -KILL "$oldpid" || true sleep 1 fi fi @@ -80,32 +71,17 @@ done # Clean up any port tracking files for uvicorn rm -f "$RUN_DIR/uvicorn.port" "$RUN_DIR/uvicorn_new.port" "$RUN_DIR/uvicorn_old.pid" - ### 4) Run migrations ######################################################### alembic -c api/alembic.ini upgrade head ### 5) Prepare logs ########################################################### -timestamp=$(date '+%Y-%m-%d_%H-%M-%S') -LOG_DIR="$LOG_ROOT/$timestamp" +LOG_DIR="$LOG_ROOT/latest" mkdir -p "$LOG_DIR" -# Create relative symlink -cd "$LOG_ROOT" && ln -sfn "$timestamp" latest && cd - >/dev/null - -### 6) (Optional) Free FastAPI port ########################################### -FASTAPI_PORT=$FASTAPI_PORT -if command -v lsof &>/dev/null; then - lsof -ti tcp:"$FASTAPI_PORT" | xargs -r kill -9 || true -fi ### 7) Start services ######################################################### -# Export rotation settings for loguru (if using file logging) -export LOG_ROTATION_SIZE="${LOG_ROTATION_SIZE:-100 MB}" -export LOG_RETENTION="${LOG_RETENTION:-7 days}" -export LOG_COMPRESSION="${LOG_COMPRESSION:-gz}" - for name in "${!SERVICES[@]}"; do cmd=${SERVICES[$name]} - echo "→ Starting $name with loguru rotation…" + echo "→ Starting $name" # Export LOG_FILE_PATH for this specific service export LOG_FILE_PATH="$LOG_DIR/$name.log" @@ -118,55 +94,9 @@ for name in "${!SERVICES[@]}"; do pid=$! echo $pid >"$RUN_DIR/$name.pid" - # For uvicorn, also save the port for rolling updates and update nginx + # For uvicorn, also save the port for rolling updates if [[ "$name" == "uvicorn" ]]; then echo "$FASTAPI_PORT" >"$RUN_DIR/uvicorn.port" - - # Update nginx upstream configuration if nginx is installed - if command -v nginx &>/dev/null && [[ -d /etc/nginx ]]; then - # Determine which upstream config to update based on ENVIRONMENT - if [[ "${ENVIRONMENT:-}" == "production" ]]; then - NGINX_UPSTREAM_CONF="/etc/nginx/conf.d/dograh_production_upstream.conf" - UPSTREAM_NAME="dograh_production_backend" - echo "→ Updating PRODUCTION nginx upstream to port $FASTAPI_PORT…" - else - # Default to staging for any non-production environment - NGINX_UPSTREAM_CONF="/etc/nginx/conf.d/dograh_staging_upstream.conf" - UPSTREAM_NAME="dograh_staging_backend" - echo "→ Updating STAGING nginx upstream to port $FASTAPI_PORT…" - fi - - if [[ -w $(dirname "$NGINX_UPSTREAM_CONF") ]] || [[ $EUID -eq 0 ]]; then - cat > "${NGINX_UPSTREAM_CONF}.tmp" </dev/null || \ - echo "⚠️ Could not update nginx config (need sudo). Run: sudo $0" - fi - - # Test and reload nginx if config was updated - if [[ -f "$NGINX_UPSTREAM_CONF" ]]; then - if nginx -t 2>/dev/null || sudo nginx -t 2>/dev/null; then - echo "→ Reloading nginx…" - nginx -s reload 2>/dev/null || sudo nginx -s reload 2>/dev/null || \ - echo "⚠️ Could not reload nginx (may need sudo)" - else - echo "⚠️ Nginx configuration test failed" - fi - fi - else - echo "⚠️ Cannot write to nginx config directory (need sudo privileges)" - echo " Run: sudo $0 to update nginx configuration" - fi - fi fi done disown -a