diff --git a/arch/download_mistral_7b.sh b/arch/download_mistral_7b.sh deleted file mode 100644 index f3b682ec..00000000 --- a/arch/download_mistral_7b.sh +++ /dev/null @@ -1 +0,0 @@ -huggingface-cli download TheBloke/Mistral-7B-Instruct-v0.2-GGUF mistral-7b-instruct-v0.2.Q4_K_M.gguf --local-dir . --local-dir-use-symlinks False diff --git a/arch/tools/cli.py b/arch/tools/cli.py index 41c4d1db..50e4f03e 100644 --- a/arch/tools/cli.py +++ b/arch/tools/cli.py @@ -90,6 +90,7 @@ def up(file, path): # Set the ARCH_CONFIG_FILE environment variable env_stage = {} + env = os.environ.copy() #check if access_keys are preesnt in the config file access_keys = get_llm_provider_access_keys(arch_config_file=arch_config_file) if access_keys: @@ -118,7 +119,6 @@ def up(file, path): for key, value in env_stage.items(): file.write(f"{key}={value}\n") - env = os.environ.copy() env.update(env_stage) env['ARCH_CONFIG_FILE'] = arch_config_file diff --git a/arch/tools/core.py b/arch/tools/core.py index ac658b76..f4732b92 100644 --- a/arch/tools/core.py +++ b/arch/tools/core.py @@ -105,6 +105,7 @@ def start_arch_modelserver(): subprocess.run( ['archgw_modelserver', 'restart'], check=True, + start_new_session=True ) print("Successfull run the archgw model_server") except subprocess.CalledProcessError as e: diff --git a/arch/tools/utils.py b/arch/tools/utils.py index 910b12da..eb72870c 100644 --- a/arch/tools/utils.py +++ b/arch/tools/utils.py @@ -21,6 +21,7 @@ def run_docker_compose_ps(compose_file, env): stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, + start_new_session=True, env=env ) # Capture the output of `docker-compose ps` diff --git a/docs/source/_static/img/arch-logo.png b/docs/source/_static/img/arch-logo.png index ea769eca..183f6ee9 100644 Binary files a/docs/source/_static/img/arch-logo.png and b/docs/source/_static/img/arch-logo.png differ diff --git a/model_server/app/__init__.py b/model_server/app/__init__.py index 59e9ab48..7c40bad4 100644 --- a/model_server/app/__init__.py +++ b/model_server/app/__init__.py @@ -38,6 +38,9 @@ def start_server(): print(f"Starting Archgw Model Server") process = subprocess.Popen( ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "51000"], + start_new_session=True, + stdout=subprocess.DEVNULL, # Suppress standard output. There is a logger that model_server prints to + stderr=subprocess.DEVNULL, # Suppress standard error. There is a logger that model_server prints to ) if wait_for_health_check("http://0.0.0.0:51000/healthz"): diff --git a/model_server/app/arch_fc/arch_fc.py b/model_server/app/arch_fc/arch_fc.py index ae3ad231..a2de43f7 100644 --- a/model_server/app/arch_fc/arch_fc.py +++ b/model_server/app/arch_fc/arch_fc.py @@ -4,21 +4,17 @@ from fastapi import FastAPI, Response from .common import ChatMessage, Message from .arch_handler import ArchHandler from .bolt_handler import BoltHandler -from app.utils import load_yaml_config -import logging -import yaml +from app.utils import load_yaml_config, get_model_server_logger from openai import OpenAI import os import hashlib -logging.basicConfig( - level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s" -) -logger = logging.getLogger(__name__) +logger = get_model_server_logger() + params = load_yaml_config("openai_params.yaml") ollama_endpoint = os.getenv("OLLAMA_ENDPOINT", "localhost") ollama_model = os.getenv("OLLAMA_MODEL", "Arch-Function-Calling-1.5B-Q4_K_M") -fc_url = os.getenv("FC_URL", "https://arch-fc-free-trial-4mzywewe.uc.gateway.dev/v1") +fc_url = os.getenv("FC_URL", "https://api.fc.archgw.com/v1") mode = os.getenv("MODE", "cloud") if mode not in ["cloud", "local-gpu", "local-cpu"]: @@ -52,7 +48,7 @@ logger.info(f"using endpoint: {endpoint}") def process_state(arch_state, history: list[Message]): - print("state: {}".format(arch_state)) + logger.info("state: {}".format(arch_state)) state_json = json.loads(arch_state) state_map = {} @@ -61,7 +57,7 @@ def process_state(arch_state, history: list[Message]): for tool_state in tools_state: state_map[tool_state["key"]] = tool_state - print(f"state_map: {json.dumps(state_map)}") + logger.info(f"state_map: {json.dumps(state_map)}") sha_history = [] updated_history = [] @@ -73,7 +69,7 @@ def process_state(arch_state, history: list[Message]): joined_key_str = ("#.#").join(sha_history) sha256_hash.update(joined_key_str.encode()) sha_key = sha256_hash.hexdigest() - print(f"sha_key: {sha_key}") + logger.info(f"sha_key: {sha_key}") if sha_key in state_map: tool_call_state = state_map[sha_key] if "tool_call" in tool_call_state: diff --git a/model_server/app/install.py b/model_server/app/install.py deleted file mode 100644 index e50e1d35..00000000 --- a/model_server/app/install.py +++ /dev/null @@ -1,13 +0,0 @@ -from load_models import ( - load_transformers, - load_ner_models, - load_toxic_model, - load_jailbreak_model, -) - -print("installing transformers") -load_transformers() -print("installing ner models") -load_ner_models() -print("installing jailbreak models") -load_jailbreak_model() diff --git a/model_server/app/main.py b/model_server/app/main.py index 9f219eda..a77a9ab3 100644 --- a/model_server/app/main.py +++ b/model_server/app/main.py @@ -8,7 +8,7 @@ from app.load_models import ( get_device, ) import os -from app.utils import GuardHandler, split_text_into_chunks, load_yaml_config +from app.utils import GuardHandler, split_text_into_chunks, load_yaml_config, get_model_server_logger import torch import yaml import string @@ -17,11 +17,10 @@ import logging from app.arch_fc.arch_fc import chat_completion as arch_fc_chat_completion, ChatMessage import os.path -logging.basicConfig( - level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s" -) -logger = logging.getLogger(__name__) -logger.info("Device used: " + get_device()) + +logger = get_model_server_logger() +logger.info(f"Devices Avialble: {get_device()}") + transformers = load_transformers() zero_shot_models = load_zero_shot_models() guard_model_config = load_yaml_config("guard_model_config.yaml") @@ -40,7 +39,6 @@ guard_handler = GuardHandler(toxic_model=None, jailbreak_model=jailbreak_model) app = FastAPI() - class EmbeddingRequest(BaseModel): input: str model: str @@ -63,12 +61,13 @@ async def models(): @app.post("/embeddings") async def embedding(req: EmbeddingRequest, res: Response): - print(f"Embedding Call Start Time: {time.time()}") + if req.model not in transformers: raise HTTPException(status_code=400, detail="unknown model: " + req.model) + start = time.time() embeddings = transformers[req.model].encode([req.input]) - print(f"Embedding Call Complete Time: {time.time()-start}") + logger.info(f"Embedding Call Complete Time: {time.time()-start}") data = [] for embedding in embeddings.tolist(): @@ -78,10 +77,8 @@ async def embedding(req: EmbeddingRequest, res: Response): "prompt_tokens": 0, "total_tokens": 0, } - print(f"Embedding Call Complete Time: {time.time()}") return {"data": data, "model": req.model, "object": "list", "usage": usage} - class GuardRequest(BaseModel): input: str task: str diff --git a/model_server/app/utils.py b/model_server/app/utils.py index d7d9d8e0..2a3fe5c0 100644 --- a/model_server/app/utils.py +++ b/model_server/app/utils.py @@ -4,7 +4,10 @@ import time import torch import pkg_resources import yaml +import os +import logging +logger_instance = None def load_yaml_config(file_name): # Load the YAML file from the package @@ -134,3 +137,40 @@ class GuardHandler: f"{self.task}_sentence": sentence, } return result_dict + +def get_model_server_logger(): + global logger_instance + + if logger_instance is not None: + # If the logger is already initialized, return the existing instance + return logger_instance + + # Define log file path outside current directory (e.g., ~/archgw_logs) + log_dir = os.path.expanduser("~/archgw_logs") + log_file = "modelserver.log" + log_file_path = os.path.join(log_dir, log_file) + + # Ensure the log directory exists, create it if necessary, handle permissions errors + try: + if not os.path.exists(log_dir): + os.makedirs(log_dir, exist_ok=True) # Create directory if it doesn't exist + + # Check if the script has write permission in the log directory + if not os.access(log_dir, os.W_OK): + raise PermissionError(f"No write permission for the directory: {log_dir}") + # Configure logging to file and console using basicConfig + + logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(levelname)s - %(message)s", + handlers=[ + logging.FileHandler(log_file_path, mode='w'), # Overwrite logs in file + ] + ) + except (PermissionError, OSError) as e: + # Dont' fallback to console logging if there are issues writing to the log file + raise RuntimeError(f"No write permission for the directory: {log_dir}") + + # Initialize the logger instance after configuring handlers + logger_instance = logging.getLogger("model_server_logger") + return logger_instance diff --git a/www/index.html b/www/index.html index b606bf4c..91735839 100644 --- a/www/index.html +++ b/www/index.html @@ -57,11 +57,10 @@ height: auto; display: block; } - h2.bold-text { - font-weight: bold; + div.bold-text { font-size: 1.5rem; - margin-bottom: 10px; - line-height: 2rem; + margin-bottom: 5px; + line-height: 3rem; } .subheading { font-size: 1rem; @@ -173,8 +172,8 @@