Reorganize model_server

2026-06-23 15:38:07 +02:00 · 2024-12-08 09:21:53 -08:00 · 2024-12-08 09:21:53 -08:00 · b4f4695f16
commit b4f4695f16
parent a40cdc7b75
20 changed files with 20 additions and 20 deletions
--- a/model_server/src/commons/init.py
+++ b/model_server/src/commons/init.py
--- a/model_server/src/commons/constants.py
+++ b/model_server/src/commons/constants.py
@ -0,0 +1,79 @@
+# ========================== Arch-Intent Default Params ==========================
+ARCH_INTENT_MODEL_ALIAS = "Arch-Intent"
+ARCH_INTENT_INSTRUCTION = "Are there any tools can help?"
+
+ARCH_INTENT_TASK_PROMPT = """
+You are a helpful assistant.
+"""
+
+
+ARCH_INTENT_TOOL_PROMPT_TEMPLATE = """
+You task is to check if there are any tools that can be used to help the last user message in conversations according to the available tools listed below.
+
+<tools>
+{tool_text}
+</tools>
+"""
+
+
+ARCH_INTENT_FORMAT_PROMPT = """
+Provide your tool assessment for ONLY THE LAST USER MESSAGE in the above conversation:
+- First line must read 'Yes' or 'No'.
+- If yes, a second line must include a comma-separated list of tool indexes.
+"""
+
+
+ARCH_INTENT_GENERATION_CONFIG = {
+    "generation_params": {"max_tokens": 1, "stop_token_ids": [151645]}
+}
+
+
+# ========================== Arch-Function Default Params ==========================
+ARCH_FUNCTION_MODEL_ALIAS = "Arch-Function"
+
+ARCH_FUNCTION_TASK_PROMPT = """
+You are a helpful assistant.
+"""
+
+
+ARCH_FUNCTION_TOOL_PROMPT_TEMPLATE = """
+# Tools
+
+You may call one or more functions to assist with the user query.
+
+You are provided with function signatures within <tools></tools> XML tags:
+<tools>
+{tool_text}
+</tools>
+"""
+
+
+ARCH_FUNCTION_FORMAT_PROMPT = """
+For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:
+<tool_call>
+{"name": <function-name>, "arguments": <args-json-object>}
+</tool_call>
+"""
+
+ARCH_FUNCTION_GENERATION_CONFIG = {
+    "generation_params": {
+        "temperature": 0.2,
+        "top_p": 1.0,
+        "top_k": 50,
+        "max_tokens": 512,
+        "stop_token_ids": [151645],
+    },
+    "prefill_params": {
+        "continue_final_message": True,
+        "add_generation_prompt": False,
+    },
+    "prefill_prefix": [
+        "May",
+        "Could",
+        "Sure",
+        "Definitely",
+        "Certainly",
+        "Of course",
+        "Can",
+    ],
+}
--- a/model_server/src/commons/globals.py
+++ b/model_server/src/commons/globals.py
@ -0,0 +1,38 @@
+import src.commons.utilities as utils
+
+from openai import OpenAI
+from src.commons.constants import *
+from src.core.function_calling import ArchIntentHandler, ArchFunctionHandler
+from src.core.guardrails import get_guardrail_handler
+
+
+logger = utils.get_model_server_logger()
+
+
+# Define the client
+ARCH_ENDPOINT = "https://api.fc.archgw.com/v1"
+ARCH_API_KEY = "EMPTY"
+ARCH_CLIENT = OpenAI(base_url=ARCH_ENDPOINT, api_key=ARCH_API_KEY)
+
+
+# Define model handlers
+handler_map = {
+    "Arch-Intent": ArchIntentHandler(
+        ARCH_CLIENT,
+        ARCH_INTENT_MODEL_ALIAS,
+        ARCH_INTENT_TASK_PROMPT,
+        ARCH_INTENT_TOOL_PROMPT_TEMPLATE,
+        ARCH_INTENT_FORMAT_PROMPT,
+        ARCH_INTENT_INSTRUCTION,
+        **ARCH_INTENT_GENERATION_CONFIG,
+    ),
+    "Arch-Function": ArchFunctionHandler(
+        ARCH_CLIENT,
+        ARCH_FUNCTION_MODEL_ALIAS,
+        ARCH_FUNCTION_TASK_PROMPT,
+        ARCH_FUNCTION_TOOL_PROMPT_TEMPLATE,
+        ARCH_FUNCTION_FORMAT_PROMPT,
+        **ARCH_FUNCTION_GENERATION_CONFIG,
+    ),
+    "Arch-Guard": get_guardrail_handler(),
+}
--- a/model_server/src/commons/utilities.py
+++ b/model_server/src/commons/utilities.py
@ -0,0 +1,65 @@
+import os
+import torch
+import logging
+
+
+logger_instance = None
+
+
+def get_device():
+    available_device = {
+        "cpu": True,
+        "cuda": torch.cuda.is_available(),
+        "mps": (
+            torch.backends.mps.is_available()
+            if hasattr(torch.backends, "mps")
+            else False
+        ),
+    }
+
+    if available_device["cuda"]:
+        device = "cuda"
+    elif available_device["mps"]:
+        device = "mps"
+    else:
+        device = "cpu"
+
+    return device
+
+
+def get_model_server_logger():
+    global logger_instance
+
+    if logger_instance is not None:
+        # If the logger is already initialized, return the existing instance
+        return logger_instance
+
+    # Define log file path outside current directory (e.g., ~/archgw_logs)
+    log_dir = os.path.expanduser("~/archgw_logs")
+    log_file = "modelserver.log"
+    log_file_path = os.path.join(log_dir, log_file)
+
+    # Ensure the log directory exists, create it if necessary, handle permissions errors
+    try:
+        if not os.path.exists(log_dir):
+            os.makedirs(log_dir, exist_ok=True)  # Create directory if it doesn't exist
+
+        # Check if the script has write permission in the log directory
+        if not os.access(log_dir, os.W_OK):
+            raise PermissionError(f"No write permission for the directory: {log_dir}")
+            # Configure logging to file and console using basicConfig
+
+        logging.basicConfig(
+            level=logging.INFO,
+            format="%(asctime)s - %(levelname)s - %(message)s",
+            handlers=[
+                logging.FileHandler(log_file_path, mode="w"),  # Overwrite logs in file
+            ],
+        )
+    except (PermissionError, OSError):
+        # Dont' fallback to console logging if there are issues writing to the log file
+        raise RuntimeError(f"No write permission for the directory: {log_dir}")
+
+    # Initialize the logger instance after configuring handlers
+    logger_instance = logging.getLogger("model_server_logger")
+    return logger_instance