Remove axiom and fix scripts

This commit is contained in:
Abhishek Kumar 2025-09-26 10:57:51 +05:30
parent 443490b2dd
commit 39dade1755
4 changed files with 43 additions and 368 deletions

View file

@ -1,73 +1,21 @@
import atexit
import logging
import os
import queue
import sys
from logging.handlers import QueueHandler, QueueListener
import loguru
from axiom_py import Client
from axiom_py.logging import AxiomHandler
from pipecat.utils.context import run_id_var, turn_var
from api.enums import Environment
from api.utils.worker import get_worker_id, is_worker_process
# ----- NEW CODE START -----
# Helper to map string log level to Python logging level, adding support for "TRACE"
TRACE_LEVEL_NUM = 5 # Below DEBUG (10)
def _get_logging_level(level_name: str) -> int:
"""Return numeric logging level for a given level name.
Supports the standard logging levels as well as the custom ``TRACE`` level
used by *loguru*. If ``TRACE`` is requested and not yet defined in the
``logging`` module, it will be registered dynamically.
"""
level_name = level_name.upper()
# Standard levels are present on the ``logging`` module.
if hasattr(logging, level_name):
return getattr(logging, level_name)
# Add support for TRACE (finer-grained than DEBUG)
if level_name == "TRACE":
if not hasattr(logging, "TRACE"):
logging.addLevelName(TRACE_LEVEL_NUM, "TRACE")
def trace(self, message, *args, **kwargs): # type: ignore[override]
if self.isEnabledFor(TRACE_LEVEL_NUM):
self._log(TRACE_LEVEL_NUM, message, args, **kwargs)
logging.Logger.trace = trace # type: ignore[attr-defined]
return TRACE_LEVEL_NUM
# Fallback to DEBUG if an unknown level is provided
return logging.DEBUG
# ----- NEW CODE END -----
ENVIRONMENT = os.getenv("ENVIRONMENT", Environment.LOCAL.value)
ENABLE_TURN_LOGGING = os.getenv("ENABLE_TURN_LOGGING", "false").lower() == "true"
# Log rotation settings from environment
LOG_ROTATION_SIZE = os.getenv("LOG_ROTATION_SIZE", "100 MB") # e.g., "100 MB", "1 GB"
LOG_ROTATION_TIME = os.getenv("LOG_ROTATION_TIME", None) # e.g., "00:00", "12:00"
LOG_RETENTION = os.getenv(
"LOG_RETENTION", "7 days"
) # e.g., "7 days", "1 week", "10 files"
LOG_COMPRESSION = os.getenv(
"LOG_COMPRESSION", "gz"
) # "gz", "bz2", "xz", "tar", "tar.gz", "tar.bz2", "tar.xz", "zip"
LOG_FILE_PATH = os.getenv(
"LOG_FILE_PATH", None
) # If set, write to file instead of stdout
# We write different uvicorn forked worker log to a different
# file which is then synced to cloudwatch logs
LOG_FILE_PATH = os.getenv("LOG_FILE_PATH", None)
# Track if logging has been initialized
_logging_initialized = False
_axiom_listener = None
def inject_run_id(record):
@ -97,11 +45,11 @@ def inject_run_id(record):
def setup_logging():
"""Set up logging for the main application"""
global _logging_initialized, _axiom_listener
global _logging_initialized
# Return early if already initialized
if _logging_initialized:
return _axiom_listener
return
log_level = os.getenv("LOG_LEVEL", "DEBUG").upper()
@ -125,31 +73,8 @@ def setup_logging():
else:
log_format = "{time:YYYY-MM-DD HH:mm:ss.SSS} | <level>{level}</level> | [run_id={extra[run_id]}] | {file.name}:{line} | {message}"
# Add handler - either file with rotation or console
# Add handler - either file or console
if LOG_FILE_PATH:
# File handler with rotation
rotation_config = {}
# Size-based rotation (e.g., "100 MB", "1 GB")
if LOG_ROTATION_SIZE:
rotation_config["rotation"] = LOG_ROTATION_SIZE
# Time-based rotation (e.g., "00:00" for daily at midnight)
if LOG_ROTATION_TIME:
rotation_config["rotation"] = LOG_ROTATION_TIME
# If no rotation specified, default to 100 MB
if not rotation_config:
rotation_config["rotation"] = "100 MB"
# Retention policy (e.g., "7 days", "10 files")
if LOG_RETENTION:
rotation_config["retention"] = LOG_RETENTION
# Compression format
if LOG_COMPRESSION and LOG_COMPRESSION.lower() != "none":
rotation_config["compression"] = LOG_COMPRESSION
# Determine the actual log file path
actual_log_path = LOG_FILE_PATH
@ -159,7 +84,6 @@ def setup_logging():
# Split the path to insert worker ID before extension
base_path, ext = os.path.splitext(LOG_FILE_PATH)
actual_log_path = f"{base_path}-worker-{worker_id}{ext}"
loguru.logger.info(f"Worker {worker_id} will log to: {actual_log_path}")
patched.add(
actual_log_path,
@ -167,7 +91,6 @@ def setup_logging():
level=log_level,
colorize=False, # No colors in file logs
enqueue=True, # Thread-safe writing
**rotation_config,
)
else:
# Console handler (existing behavior)
@ -178,40 +101,5 @@ def setup_logging():
colorize=True,
)
# Set up queue-based logging for Axiom
log_q = queue.Queue(-1) # infinite size (tweak if needed)
queue_handler = QueueHandler(log_q) # puts LogRecord on the queue
queue_handler.setLevel(_get_logging_level(log_level))
# Set up Axiom handler if credentials are available
axiom_token = os.environ.get("AXIOM_TOKEN")
axiom_org = os.environ.get("AXIOM_ORG")
axiom_dataset = os.getenv("AXIOM_LOG_DATASET")
if axiom_token and axiom_org and axiom_dataset:
client = Client(token=axiom_token, org_id=axiom_org)
axiom_handler = AxiomHandler(client, axiom_dataset)
axiom_handler.setLevel(_get_logging_level(log_level))
listener = QueueListener(
log_q,
axiom_handler,
respect_handler_level=True,
)
listener.start()
patched.add(queue_handler, level=log_level, enqueue=False)
# Register cleanup
atexit.register(listener.stop)
# Return the listener for manual cleanup if needed
loguru.logger = patched
_logging_initialized = True
_axiom_listener = listener
return listener
else:
# No Axiom logging available
loguru.logger = patched
_logging_initialized = True
return None
loguru.logger = patched
_logging_initialized = True

View file

@ -7,7 +7,6 @@ redis==5.3.1
uvicorn==0.35.0
aioboto3==15.1.0
arq==0.26.3
axiom-py==0.9.0
twilio==9.8.0
minio==7.2.16
alembic-postgresql-enum==1.8.0

View file

@ -1,38 +1,18 @@
#!/usr/bin/env bash
# rolling_update_uvicorn.sh — Zero-downtime rolling update for uvicorn workers
#
# Usage: ./rolling_update_uvicorn.sh <NEW_PORT>
# Example: ./rolling_update_uvicorn.sh 8001
set -euo pipefail
# Check if running as root or with sudo
if [[ $EUID -ne 0 ]]; then
echo "This script must be run as root or with sudo"
exit 1
fi
set -e # Exit on error
### CONFIGURATION #############################################################
ENV_FILE="api/.env"
RUN_DIR="run"
LOG_ROOT="logs"
VENV_PATH="/home/ubuntu/dograh_venv"
HEALTH_CHECK_ENDPOINT="/api/v1/health" # Adjust as needed
MAX_WAIT_SECONDS=310 # Max wait for graceful shutdown (5 minutes + 10 seconds grace)
# Load environment to get ENVIRONMENT variable
# Load environment
set -a && . "$ENV_FILE" && set +a
ENVIRONMENT="${ENVIRONMENT:-staging}"
# Set nginx upstream config based on environment
if [[ "$ENVIRONMENT" == "production" ]]; then
NGINX_UPSTREAM_CONF="/etc/nginx/conf.d/dograh_production_upstream.conf"
UPSTREAM_NAME="dograh_production_backend"
echo "Rolling update for PRODUCTION environment"
else
NGINX_UPSTREAM_CONF="/etc/nginx/conf.d/dograh_staging_upstream.conf"
UPSTREAM_NAME="dograh_staging_backend"
echo "Rolling update for STAGING environment"
fi
### FUNCTIONS ##################################################################
@ -146,61 +126,6 @@ graceful_shutdown_old_workers() {
return 0
}
update_nginx_upstream() {
local new_port=$1
local old_port=$2
log_info "Updating nginx upstream configuration for $ENVIRONMENT..."
# Create or update the upstream configuration
cat > "${NGINX_UPSTREAM_CONF}.tmp" <<EOF
# Auto-generated by rolling_update_uvicorn.sh for $ENVIRONMENT
# Last updated: $(date)
upstream ${UPSTREAM_NAME} {
server 127.0.0.1:${new_port} max_fails=3 fail_timeout=30s;
}
EOF
# Atomic move (with sudo if needed)
if [[ $EUID -eq 0 ]]; then
mv "${NGINX_UPSTREAM_CONF}.tmp" "${NGINX_UPSTREAM_CONF}"
else
sudo mv "${NGINX_UPSTREAM_CONF}.tmp" "${NGINX_UPSTREAM_CONF}" 2>/dev/null || {
log_error "Could not update nginx config (need sudo). Run: sudo $0 $NEW_PORT"
return 1
}
fi
# Test nginx configuration (with sudo if needed)
if nginx -t 2>/dev/null || sudo nginx -t 2>/dev/null; then
log_info "Nginx configuration test passed"
# Reload nginx to pick up new configuration (with sudo if needed)
if nginx -s reload 2>/dev/null || sudo nginx -s reload 2>/dev/null; then
log_info "Nginx reloaded successfully"
else
log_error "Could not reload nginx"
return 1
fi
else
log_error "Nginx configuration test failed, rolling back"
# Restore old configuration if possible
if [[ -n "$old_port" ]]; then
cat > "${NGINX_UPSTREAM_CONF}.tmp" <<EOF
upstream ${UPSTREAM_NAME} {
server 127.0.0.1:${old_port} max_fails=3 fail_timeout=30s;
}
EOF
if [[ $EUID -eq 0 ]]; then
mv "${NGINX_UPSTREAM_CONF}.tmp" "${NGINX_UPSTREAM_CONF}"
else
sudo mv "${NGINX_UPSTREAM_CONF}.tmp" "${NGINX_UPSTREAM_CONF}" 2>/dev/null || true
fi
nginx -s reload 2>/dev/null || sudo nginx -s reload 2>/dev/null || true
fi
return 1
fi
}
start_new_uvicorn_workers() {
local new_port=$1
@ -214,36 +139,17 @@ start_new_uvicorn_workers() {
return 1
fi
if [[ -z "${CONDA_ENV_NAME:-}" ]]; then
log_error "CONDA_ENV_NAME environment variable is not set"
return 1
fi
# Source conda if not already available
if ! command -v conda &>/dev/null; then
source /opt/conda/etc/profile.d/conda.sh
fi
eval "$(conda shell.bash hook)"
conda activate "$CONDA_ENV_NAME"
# Activate virtual environment
source ${VENV_PATH}/bin/activate
# Use the latest log directory (where start_services.sh put logs)
# Resolve the symlink to get the actual directory
local log_dir="$LOG_ROOT/latest"
if [[ -L "$log_dir" ]]; then
# It's a symlink, resolve it
log_dir=$(readlink -f "$log_dir")
fi
if [[ ! -d "$log_dir" ]]; then
log_error "No latest log directory found. Run start_services.sh first."
return 1
fi
# Export rotation settings
export LOG_ROTATION_SIZE="${LOG_ROTATION_SIZE:-100 MB}"
export LOG_RETENTION="${LOG_RETENTION:-7 days}"
export LOG_COMPRESSION="${LOG_COMPRESSION:-gz}"
# Create unique log filename using timestamp and script PID to avoid conflicts
local script_pid=$$ # PID of this rolling_update script (for uniqueness)
local timestamp=$(date '+%H%M%S')
@ -252,35 +158,12 @@ start_new_uvicorn_workers() {
log_info "Starting uvicorn with $FASTAPI_WORKERS workers on port $new_port"
log_info "Logs: $LOG_FILE_PATH"
# If running as root, switch to original user for uvicorn process
if [[ $EUID -eq 0 ]] && [[ -n "${SUDO_USER:-}" ]]; then
log_info "Starting uvicorn as user: $SUDO_USER (not root)"
# Run uvicorn as the original user, similar to start_services.sh
# Using setsid and passing LOG_FILE_PATH for loguru to pick up
sudo -u "$SUDO_USER" bash -c "
cd '$PWD'
export HOME='$(getent passwd $SUDO_USER | cut -d: -f6)'
export LOG_FILE_PATH='$LOG_FILE_PATH'
export LOG_ROTATION_SIZE='$LOG_ROTATION_SIZE'
export LOG_RETENTION='$LOG_RETENTION'
export LOG_COMPRESSION='$LOG_COMPRESSION'
set -a && source '$ENV_FILE' && set +a
source /opt/conda/etc/profile.d/conda.sh
conda activate '$CONDA_ENV_NAME'
setsid nohup bash -c \"LOG_FILE_PATH='$LOG_FILE_PATH' uvicorn api.app:app --host 0.0.0.0 --port $new_port --workers $FASTAPI_WORKERS\" >/dev/null 2>&1 &
echo \$! > '$RUN_DIR/uvicorn_new.pid'
"
# Read the PID that was written
local new_pid=$(<"$RUN_DIR/uvicorn_new.pid")
else
# Start in new process group with setsid (same as start_services.sh)
# Each service gets its own LOG_FILE_PATH environment variable
setsid nohup bash -c "LOG_FILE_PATH='$LOG_FILE_PATH' uvicorn api.app:app --host 0.0.0.0 --port $new_port --workers $FASTAPI_WORKERS" >/dev/null 2>&1 &
local new_pid=$!
echo "$new_pid" > "$RUN_DIR/uvicorn_new.pid"
fi
# Start in new process group with setsid (same as start_services.sh)
# Each service gets its own LOG_FILE_PATH environment variable
setsid nohup bash -c "LOG_FILE_PATH='$LOG_FILE_PATH' uvicorn api.app:app --host 0.0.0.0 --port $new_port --workers $FASTAPI_WORKERS" >/dev/null 2>&1 &
local new_pid=$!
echo "$new_pid" > "$RUN_DIR/uvicorn_new.pid"
# Save port information
echo "$new_port" > "$RUN_DIR/uvicorn_new.port"
@ -321,23 +204,18 @@ finalize_rollover() {
rollback() {
local old_port=$1
local new_pid=$2
log_error "Rolling back due to failure..."
# Kill new workers if they exist
if [[ -n "$new_pid" ]] && kill -0 "$new_pid" 2>/dev/null; then
log_info "Killing new uvicorn workers (PID: $new_pid)"
kill -KILL -"$new_pid" 2>/dev/null || kill -KILL "$new_pid" 2>/dev/null || true
fi
# Clean up temporary files
rm -f "$RUN_DIR/uvicorn_new.pid" "$RUN_DIR/uvicorn_new.port"
# Restore nginx configuration if old port is known
if [[ -n "$old_port" ]]; then
update_nginx_upstream "$old_port" ""
fi
log_error "Rollback completed"
}
@ -352,19 +230,6 @@ fi
NEW_PORT=$1
# Check nginx permissions early and exit if we can't update nginx
if [[ ! -w $(dirname "$NGINX_UPSTREAM_CONF") ]] && [[ $EUID -ne 0 ]]; then
if ! sudo -n true 2>/dev/null; then
log_error "This script needs sudo access to update nginx configuration"
log_error "Cannot proceed without nginx update permissions"
echo ""
echo "Please run with sudo:"
echo " sudo $0 $NEW_PORT"
echo ""
exit 1
fi
fi
# Validate port number
if ! [[ "$NEW_PORT" =~ ^[0-9]+$ ]] || [ "$NEW_PORT" -lt 1 ] || [ "$NEW_PORT" -gt 65535 ]; then
log_error "Invalid port number: $NEW_PORT"
@ -421,15 +286,8 @@ if ! wait_for_health_check "$NEW_PORT"; then
exit 1
fi
# Update nginx to point to new workers
if ! update_nginx_upstream "$NEW_PORT" "$OLD_PORT"; then
log_error "Failed to update nginx configuration"
rollback "$OLD_PORT" "$NEW_PID"
exit 1
fi
# Give nginx some time to start routing to new workers
log_info "Waiting for nginx to stabilize..."
# Give the system some time to stabilize before shutting down old workers
log_info "Waiting for system to stabilize..."
sleep 5
# Gracefully shutdown old workers

View file

@ -1,18 +1,22 @@
#!/usr/bin/env bash
# restart_services.sh — safer, simplified
# start_services.sh
set -euo pipefail
set -e # Exit on error
### CONFIGURATION #############################################################
ENV_FILE="api/.env"
RUN_DIR="run" # where we keep *.pid
LOG_ROOT="logs"
VENV_PATH="/home/ubuntu/dograh_venv"
ARQ_WORKERS=${ARQ_WORKERS:-1}
### 1) Load environment vars so that configurations like FASTAPI_WORKERS are loaded #
# Log startup
echo "Starting Dograh Services at $(date)"
### 1) Load environment vars so that configurations like FASTAPI_WORKERS are loaded
set -a && . "$ENV_FILE" && set +a
# Get ENVIRONMENT for nginx config selection
ENVIRONMENT="${ENVIRONMENT:-staging}"
cd /home/ubuntu/app
if [[ -z "${FASTAPI_PORT:-}" ]]; then
echo "Error: FASTAPI_PORT environment variable is not set."
@ -24,14 +28,6 @@ if [[ -z "${FASTAPI_WORKERS:-}" ]]; then
exit 1
fi
if [[ -z "${CONDA_ENV_NAME:-}" ]]; then
echo "Error: CONDA_ENV_NAME environment variable is not set."
exit 1
fi
# Default ARQ_WORKERS to 1 if not set
ARQ_WORKERS=${ARQ_WORKERS:-1}
# map "service name" → "command to run"
declare -A SERVICES=(
[ari_manager]="python -m api.services.telephony.ari_manager"
@ -44,13 +40,8 @@ for ((i=1; i<=ARQ_WORKERS; i++)); do
SERVICES[arq$i]="python -m arq api.tasks.arq.WorkerSettings --custom-log-dict api.tasks.arq.LOG_CONFIG"
done
### 2) Activate conda #########################################################
# Source conda if not already available (needed when running from systemd)
if ! command -v conda &>/dev/null; then
source /opt/conda/etc/profile.d/conda.sh
fi
eval "$(conda shell.bash hook)"
conda activate "$CONDA_ENV_NAME"
### 2) Activate virtual environment #########################################
source ${VENV_PATH}/bin/activate
### 3) Stop old services (only via PID files) #################################
mkdir -p "$RUN_DIR"
@ -58,16 +49,16 @@ for name in "${!SERVICES[@]}"; do
pidfile="$RUN_DIR/$name.pid"
if [[ -f $pidfile ]]; then
oldpid=$(<"$pidfile")
if kill -0 "$oldpid" 2>/dev/null; then
if kill -0 "$oldpid"; then
echo "Stopping $name (PID $oldpid and its process group)…"
# Kill the entire process group (negative PID)
# First try SIGTERM
kill -TERM -"$oldpid" 2>/dev/null || kill -TERM "$oldpid" 2>/dev/null || true
kill -TERM -"$oldpid" || kill -TERM "$oldpid" || true
sleep 4
# If still running, use SIGKILL
if kill -0 "$oldpid" 2>/dev/null; then
if kill -0 "$oldpid"; then
echo "⚠️ $name did not exit cleanly, forcing stop..."
kill -KILL -"$oldpid" 2>/dev/null || kill -KILL "$oldpid" 2>/dev/null || true
kill -KILL -"$oldpid" || kill -KILL "$oldpid" || true
sleep 1
fi
fi
@ -80,32 +71,17 @@ done
# Clean up any port tracking files for uvicorn
rm -f "$RUN_DIR/uvicorn.port" "$RUN_DIR/uvicorn_new.port" "$RUN_DIR/uvicorn_old.pid"
### 4) Run migrations #########################################################
alembic -c api/alembic.ini upgrade head
### 5) Prepare logs ###########################################################
timestamp=$(date '+%Y-%m-%d_%H-%M-%S')
LOG_DIR="$LOG_ROOT/$timestamp"
LOG_DIR="$LOG_ROOT/latest"
mkdir -p "$LOG_DIR"
# Create relative symlink
cd "$LOG_ROOT" && ln -sfn "$timestamp" latest && cd - >/dev/null
### 6) (Optional) Free FastAPI port ###########################################
FASTAPI_PORT=$FASTAPI_PORT
if command -v lsof &>/dev/null; then
lsof -ti tcp:"$FASTAPI_PORT" | xargs -r kill -9 || true
fi
### 7) Start services #########################################################
# Export rotation settings for loguru (if using file logging)
export LOG_ROTATION_SIZE="${LOG_ROTATION_SIZE:-100 MB}"
export LOG_RETENTION="${LOG_RETENTION:-7 days}"
export LOG_COMPRESSION="${LOG_COMPRESSION:-gz}"
for name in "${!SERVICES[@]}"; do
cmd=${SERVICES[$name]}
echo "→ Starting $name with loguru rotation…"
echo "→ Starting $name"
# Export LOG_FILE_PATH for this specific service
export LOG_FILE_PATH="$LOG_DIR/$name.log"
@ -118,55 +94,9 @@ for name in "${!SERVICES[@]}"; do
pid=$!
echo $pid >"$RUN_DIR/$name.pid"
# For uvicorn, also save the port for rolling updates and update nginx
# For uvicorn, also save the port for rolling updates
if [[ "$name" == "uvicorn" ]]; then
echo "$FASTAPI_PORT" >"$RUN_DIR/uvicorn.port"
# Update nginx upstream configuration if nginx is installed
if command -v nginx &>/dev/null && [[ -d /etc/nginx ]]; then
# Determine which upstream config to update based on ENVIRONMENT
if [[ "${ENVIRONMENT:-}" == "production" ]]; then
NGINX_UPSTREAM_CONF="/etc/nginx/conf.d/dograh_production_upstream.conf"
UPSTREAM_NAME="dograh_production_backend"
echo "→ Updating PRODUCTION nginx upstream to port $FASTAPI_PORT"
else
# Default to staging for any non-production environment
NGINX_UPSTREAM_CONF="/etc/nginx/conf.d/dograh_staging_upstream.conf"
UPSTREAM_NAME="dograh_staging_backend"
echo "→ Updating STAGING nginx upstream to port $FASTAPI_PORT"
fi
if [[ -w $(dirname "$NGINX_UPSTREAM_CONF") ]] || [[ $EUID -eq 0 ]]; then
cat > "${NGINX_UPSTREAM_CONF}.tmp" <<EOF
# Auto-generated by start_services.sh for ${ENVIRONMENT:-staging}
# Last updated: $(date)
upstream ${UPSTREAM_NAME} {
server 127.0.0.1:${FASTAPI_PORT} max_fails=3 fail_timeout=30s;
}
EOF
# Atomic move (may need sudo)
if [[ $EUID -eq 0 ]]; then
mv "${NGINX_UPSTREAM_CONF}.tmp" "${NGINX_UPSTREAM_CONF}"
else
sudo mv "${NGINX_UPSTREAM_CONF}.tmp" "${NGINX_UPSTREAM_CONF}" 2>/dev/null || \
echo "⚠️ Could not update nginx config (need sudo). Run: sudo $0"
fi
# Test and reload nginx if config was updated
if [[ -f "$NGINX_UPSTREAM_CONF" ]]; then
if nginx -t 2>/dev/null || sudo nginx -t 2>/dev/null; then
echo "→ Reloading nginx…"
nginx -s reload 2>/dev/null || sudo nginx -s reload 2>/dev/null || \
echo "⚠️ Could not reload nginx (may need sudo)"
else
echo "⚠️ Nginx configuration test failed"
fi
fi
else
echo "⚠️ Cannot write to nginx config directory (need sudo privileges)"
echo " Run: sudo $0 to update nginx configuration"
fi
fi
fi
done
disown -a