mirror of
https://github.com/katanemo/plano.git
synced 2026-04-28 10:26:36 +02:00
178 lines
5.6 KiB
Python
178 lines
5.6 KiB
Python
import importlib
|
|
import sys
|
|
import os
|
|
import time
|
|
import requests
|
|
import psutil
|
|
import tempfile
|
|
import subprocess
|
|
import logging
|
|
|
|
|
|
def get_version():
|
|
try:
|
|
version = importlib.metadata.version("archgw_modelserver")
|
|
return version
|
|
except importlib.metadata.PackageNotFoundError:
|
|
return "version not found"
|
|
|
|
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
|
|
)
|
|
|
|
log = logging.getLogger("model_server.cli")
|
|
log.setLevel(logging.INFO)
|
|
log.info(f"model server version: {get_version()}")
|
|
|
|
|
|
def run_server(port=51000):
|
|
"""Start, stop, or restart the Uvicorn server based on command-line arguments."""
|
|
if len(sys.argv) > 1:
|
|
action = sys.argv[1]
|
|
else:
|
|
action = "start"
|
|
|
|
if action == "start":
|
|
start_server(port)
|
|
elif action == "stop":
|
|
stop_server(port)
|
|
elif action == "restart":
|
|
restart_server(port)
|
|
else:
|
|
log.info(f"Unknown action: {action}")
|
|
sys.exit(1)
|
|
|
|
|
|
def start_server(port=51000):
|
|
"""Start the Uvicorn server"""
|
|
log.info(
|
|
"starting model server - loading some awesomeness, this may take some time :)"
|
|
)
|
|
|
|
process = subprocess.Popen(
|
|
[
|
|
"python",
|
|
"-m",
|
|
"uvicorn",
|
|
"app.main:app",
|
|
"--host",
|
|
"0.0.0.0",
|
|
"--port",
|
|
f"{port}",
|
|
],
|
|
start_new_session=True,
|
|
bufsize=1,
|
|
universal_newlines=True,
|
|
stdout=subprocess.PIPE, # Suppress standard output. There is a logger that model_server prints to
|
|
stderr=subprocess.PIPE, # Suppress standard error. There is a logger that model_server prints to
|
|
)
|
|
|
|
if wait_for_health_check(f"http://0.0.0.0:{port}/healthz"):
|
|
log.info(f"Model server started with PID {process.pid}")
|
|
else:
|
|
# Add model_server boot-up logs
|
|
log.info("model server - didn't start in time, shutting down")
|
|
process.terminate()
|
|
|
|
|
|
def wait_for_health_check(url, timeout=300):
|
|
"""Wait for the Uvicorn server to respond to health-check requests."""
|
|
start_time = time.time()
|
|
while time.time() - start_time < timeout:
|
|
try:
|
|
response = requests.get(url)
|
|
if response.status_code == 200:
|
|
return True
|
|
except requests.ConnectionError:
|
|
time.sleep(1)
|
|
print("Timed out waiting for model server to respond.")
|
|
return False
|
|
|
|
|
|
def check_and_install_lsof():
|
|
"""Check if lsof is installed, and if not, install it using apt-get."""
|
|
try:
|
|
# Check if lsof is installed by running "lsof -v"
|
|
subprocess.run(
|
|
["lsof", "-v"], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE
|
|
)
|
|
print("lsof is already installed.")
|
|
except subprocess.CalledProcessError:
|
|
print("lsof not found, installing...")
|
|
try:
|
|
# Update package list and install lsof
|
|
subprocess.run(["sudo", "apt-get", "update"], check=True)
|
|
subprocess.run(["sudo", "apt-get", "install", "-y", "lsof"], check=True)
|
|
print("lsof installed successfully.")
|
|
except subprocess.CalledProcessError as install_error:
|
|
print(f"Failed to install lsof: {install_error}")
|
|
|
|
|
|
def kill_process(port=51000, wait=True, timeout=10):
|
|
"""Stop the running Uvicorn server."""
|
|
log.info("Stopping model server")
|
|
try:
|
|
# Run the function to check and install lsof if necessary
|
|
# Step 1: Run lsof command to get the process using the port
|
|
lsof_command = f"lsof -n | grep {port} | grep -i LISTEN"
|
|
result = subprocess.run(
|
|
lsof_command, shell=True, capture_output=True, text=True
|
|
)
|
|
|
|
if result.returncode != 0:
|
|
print(f"No process found listening on port {port}.")
|
|
return
|
|
|
|
# Step 2: Parse the process IDs from the output
|
|
process_ids = [line.split()[1] for line in result.stdout.splitlines()]
|
|
|
|
if not process_ids:
|
|
print(f"No process found listening on port {port}.")
|
|
return
|
|
|
|
# Step 3: Kill each process using its PID
|
|
for pid in process_ids:
|
|
print(f"Killing model server process with PID {pid}")
|
|
subprocess.run(f"kill {pid}", shell=True)
|
|
|
|
if wait:
|
|
# Step 4: Wait for the process to be killed by checking if it's still running
|
|
start_time = time.time()
|
|
|
|
while True:
|
|
check_process = subprocess.run(
|
|
f"ps -p {pid}", shell=True, capture_output=True, text=True
|
|
)
|
|
if check_process.returncode != 0:
|
|
print(f"Process {pid} has been killed.")
|
|
break
|
|
|
|
elapsed_time = time.time() - start_time
|
|
if elapsed_time > timeout:
|
|
print(
|
|
f"Process {pid} did not terminate within {timeout} seconds."
|
|
)
|
|
print(f"Attempting to force kill process {pid}...")
|
|
subprocess.run(f"kill -9 {pid}", shell=True) # SIGKILL
|
|
break
|
|
|
|
print(
|
|
f"Waiting for process {pid} to be killed... ({elapsed_time:.2f} seconds)"
|
|
)
|
|
time.sleep(0.5)
|
|
|
|
except Exception as e:
|
|
print(f"Error occurred: {e}")
|
|
|
|
|
|
def stop_server(port=51000, wait=True, timeout=10):
|
|
check_and_install_lsof()
|
|
kill_process(port, wait, timeout)
|
|
|
|
|
|
def restart_server(port=51000):
|
|
"""Restart the Uvicorn server."""
|
|
stop_server(port)
|
|
start_server(port)
|