ensure that we can call the new api.fc.archgw.com url, logging fixes … (#142)

* ensure that we can call the new api.fc.archgw.com url, logging fixes and minor cli bug fixes * fixed a bug where model_server printed on terminal after start script stopped running * updating the logo and fixing the website styles * updated the branch with feedback from Co and Adil --------- Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-261.local>
2026-06-14 15:15:15 +02:00 · 2024-10-08 12:40:24 -07:00 · 2024-10-08 12:40:24 -07:00 · 3ed50e61d2
commit 3ed50e61d2
parent 82fc91495e
11 changed files with 70 additions and 47 deletions
--- a/model_server/app/init.py
+++ b/model_server/app/init.py
@ -38,6 +38,9 @@ def start_server():
    print(f"Starting Archgw Model Server")
    process = subprocess.Popen(
        ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "51000"],
+        start_new_session=True,
+        stdout=subprocess.DEVNULL,  # Suppress standard output. There is a logger that model_server prints to
+        stderr=subprocess.DEVNULL,  # Suppress standard error. There is a logger that model_server prints to
    )

    if wait_for_health_check("http://0.0.0.0:51000/healthz"):
--- a/model_server/app/arch_fc/arch_fc.py
+++ b/model_server/app/arch_fc/arch_fc.py
@ -4,21 +4,17 @@ from fastapi import FastAPI, Response
 from .common import ChatMessage, Message
 from .arch_handler import ArchHandler
 from .bolt_handler import BoltHandler
-from app.utils import load_yaml_config
-import logging
-import yaml
+from app.utils import load_yaml_config, get_model_server_logger
 from openai import OpenAI
 import os
 import hashlib

-logging.basicConfig(
-    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
-)
-logger = logging.getLogger(__name__)
+logger = get_model_server_logger()
+
 params = load_yaml_config("openai_params.yaml")
 ollama_endpoint = os.getenv("OLLAMA_ENDPOINT", "localhost")
 ollama_model = os.getenv("OLLAMA_MODEL", "Arch-Function-Calling-1.5B-Q4_K_M")
-fc_url = os.getenv("FC_URL", "https://arch-fc-free-trial-4mzywewe.uc.gateway.dev/v1")
+fc_url = os.getenv("FC_URL", "https://api.fc.archgw.com/v1")

 mode = os.getenv("MODE", "cloud")
 if mode not in ["cloud", "local-gpu", "local-cpu"]:
@ -52,7 +48,7 @@ logger.info(f"using endpoint: {endpoint}")


 def process_state(arch_state, history: list[Message]):
-    print("state: {}".format(arch_state))
+    logger.info("state: {}".format(arch_state))
    state_json = json.loads(arch_state)

    state_map = {}
@ -61,7 +57,7 @@ def process_state(arch_state, history: list[Message]):
            for tool_state in tools_state:
                state_map[tool_state["key"]] = tool_state

-    print(f"state_map: {json.dumps(state_map)}")
+    logger.info(f"state_map: {json.dumps(state_map)}")

    sha_history = []
    updated_history = []
@ -73,7 +69,7 @@ def process_state(arch_state, history: list[Message]):
            joined_key_str = ("#.#").join(sha_history)
            sha256_hash.update(joined_key_str.encode())
            sha_key = sha256_hash.hexdigest()
-            print(f"sha_key: {sha_key}")
+            logger.info(f"sha_key: {sha_key}")
            if sha_key in state_map:
                tool_call_state = state_map[sha_key]
                if "tool_call" in tool_call_state:
--- a/model_server/app/install.py
+++ b/model_server/app/install.py
@ -1,13 +0,0 @@
-from load_models import (
-    load_transformers,
-    load_ner_models,
-    load_toxic_model,
-    load_jailbreak_model,
-)
-
-print("installing transformers")
-load_transformers()
-print("installing ner models")
-load_ner_models()
-print("installing jailbreak models")
-load_jailbreak_model()
--- a/model_server/app/main.py
+++ b/model_server/app/main.py
@ -8,7 +8,7 @@ from app.load_models import (
    get_device,
 )
 import os
-from app.utils import GuardHandler, split_text_into_chunks, load_yaml_config
+from app.utils import GuardHandler, split_text_into_chunks, load_yaml_config, get_model_server_logger
 import torch
 import yaml
 import string
@ -17,11 +17,10 @@ import logging
 from app.arch_fc.arch_fc import chat_completion as arch_fc_chat_completion, ChatMessage
 import os.path

-logging.basicConfig(
-    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
-)
-logger = logging.getLogger(__name__)
-logger.info("Device used: " + get_device())
+
+logger = get_model_server_logger()
+logger.info(f"Devices Avialble: {get_device()}")
+
 transformers = load_transformers()
 zero_shot_models = load_zero_shot_models()
 guard_model_config = load_yaml_config("guard_model_config.yaml")
@ -40,7 +39,6 @@ guard_handler = GuardHandler(toxic_model=None, jailbreak_model=jailbreak_model)

 app = FastAPI()

-
 class EmbeddingRequest(BaseModel):
    input: str
    model: str
@ -63,12 +61,13 @@ async def models():

@app.post("/embeddings")
 async def embedding(req: EmbeddingRequest, res: Response):
-    print(f"Embedding Call Start Time: {time.time()}")
+
    if req.model not in transformers:
        raise HTTPException(status_code=400, detail="unknown model: " + req.model)
+
    start = time.time()
    embeddings = transformers[req.model].encode([req.input])
-    print(f"Embedding Call Complete Time: {time.time()-start}")
+    logger.info(f"Embedding Call Complete Time: {time.time()-start}")
    data = []

    for embedding in embeddings.tolist():
@ -78,10 +77,8 @@ async def embedding(req: EmbeddingRequest, res: Response):
        "prompt_tokens": 0,
        "total_tokens": 0,
    }
-    print(f"Embedding Call Complete Time: {time.time()}")
    return {"data": data, "model": req.model, "object": "list", "usage": usage}

-
 class GuardRequest(BaseModel):
    input: str
    task: str
--- a/model_server/app/utils.py
+++ b/model_server/app/utils.py
@ -4,7 +4,10 @@ import time
 import torch
 import pkg_resources
 import yaml
+import os
+import logging

+logger_instance = None

 def load_yaml_config(file_name):
    # Load the YAML file from the package
@ -134,3 +137,40 @@ class GuardHandler:
                f"{self.task}_sentence": sentence,
            }
        return result_dict
+
+def get_model_server_logger():
+    global logger_instance
+
+    if logger_instance is not None:
+        # If the logger is already initialized, return the existing instance
+        return logger_instance
+
+    # Define log file path outside current directory (e.g., ~/archgw_logs)
+    log_dir = os.path.expanduser("~/archgw_logs")
+    log_file = "modelserver.log"
+    log_file_path = os.path.join(log_dir, log_file)
+
+    # Ensure the log directory exists, create it if necessary, handle permissions errors
+    try:
+        if not os.path.exists(log_dir):
+            os.makedirs(log_dir, exist_ok=True)  # Create directory if it doesn't exist
+
+        # Check if the script has write permission in the log directory
+        if not os.access(log_dir, os.W_OK):
+            raise PermissionError(f"No write permission for the directory: {log_dir}")
+            # Configure logging to file and console using basicConfig
+
+        logging.basicConfig(
+            level=logging.INFO,
+            format="%(asctime)s - %(levelname)s - %(message)s",
+            handlers=[
+                logging.FileHandler(log_file_path, mode='w'),  # Overwrite logs in file
+            ]
+        )
+    except (PermissionError, OSError) as e:
+        # Dont' fallback to console logging if there are issues writing to the log file
+        raise RuntimeError(f"No write permission for the directory: {log_dir}")
+
+    # Initialize the logger instance after configuring handlers
+    logger_instance = logging.getLogger("model_server_logger")
+    return logger_instance