mirror of
https://github.com/katanemo/plano.git
synced 2026-06-23 15:38:07 +02:00
Reorganize model_server
This commit is contained in:
parent
a40cdc7b75
commit
b4f4695f16
20 changed files with 20 additions and 20 deletions
0
model_server/src/commons/__init__.py
Normal file
0
model_server/src/commons/__init__.py
Normal file
79
model_server/src/commons/constants.py
Normal file
79
model_server/src/commons/constants.py
Normal file
|
|
@ -0,0 +1,79 @@
|
|||
# ========================== Arch-Intent Default Params ==========================
|
||||
ARCH_INTENT_MODEL_ALIAS = "Arch-Intent"
|
||||
ARCH_INTENT_INSTRUCTION = "Are there any tools can help?"
|
||||
|
||||
ARCH_INTENT_TASK_PROMPT = """
|
||||
You are a helpful assistant.
|
||||
"""
|
||||
|
||||
|
||||
ARCH_INTENT_TOOL_PROMPT_TEMPLATE = """
|
||||
You task is to check if there are any tools that can be used to help the last user message in conversations according to the available tools listed below.
|
||||
|
||||
<tools>
|
||||
{tool_text}
|
||||
</tools>
|
||||
"""
|
||||
|
||||
|
||||
ARCH_INTENT_FORMAT_PROMPT = """
|
||||
Provide your tool assessment for ONLY THE LAST USER MESSAGE in the above conversation:
|
||||
- First line must read 'Yes' or 'No'.
|
||||
- If yes, a second line must include a comma-separated list of tool indexes.
|
||||
"""
|
||||
|
||||
|
||||
ARCH_INTENT_GENERATION_CONFIG = {
|
||||
"generation_params": {"max_tokens": 1, "stop_token_ids": [151645]}
|
||||
}
|
||||
|
||||
|
||||
# ========================== Arch-Function Default Params ==========================
|
||||
ARCH_FUNCTION_MODEL_ALIAS = "Arch-Function"
|
||||
|
||||
ARCH_FUNCTION_TASK_PROMPT = """
|
||||
You are a helpful assistant.
|
||||
"""
|
||||
|
||||
|
||||
ARCH_FUNCTION_TOOL_PROMPT_TEMPLATE = """
|
||||
# Tools
|
||||
|
||||
You may call one or more functions to assist with the user query.
|
||||
|
||||
You are provided with function signatures within <tools></tools> XML tags:
|
||||
<tools>
|
||||
{tool_text}
|
||||
</tools>
|
||||
"""
|
||||
|
||||
|
||||
ARCH_FUNCTION_FORMAT_PROMPT = """
|
||||
For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:
|
||||
<tool_call>
|
||||
{"name": <function-name>, "arguments": <args-json-object>}
|
||||
</tool_call>
|
||||
"""
|
||||
|
||||
ARCH_FUNCTION_GENERATION_CONFIG = {
|
||||
"generation_params": {
|
||||
"temperature": 0.2,
|
||||
"top_p": 1.0,
|
||||
"top_k": 50,
|
||||
"max_tokens": 512,
|
||||
"stop_token_ids": [151645],
|
||||
},
|
||||
"prefill_params": {
|
||||
"continue_final_message": True,
|
||||
"add_generation_prompt": False,
|
||||
},
|
||||
"prefill_prefix": [
|
||||
"May",
|
||||
"Could",
|
||||
"Sure",
|
||||
"Definitely",
|
||||
"Certainly",
|
||||
"Of course",
|
||||
"Can",
|
||||
],
|
||||
}
|
||||
38
model_server/src/commons/globals.py
Normal file
38
model_server/src/commons/globals.py
Normal file
|
|
@ -0,0 +1,38 @@
|
|||
import src.commons.utilities as utils
|
||||
|
||||
from openai import OpenAI
|
||||
from src.commons.constants import *
|
||||
from src.core.function_calling import ArchIntentHandler, ArchFunctionHandler
|
||||
from src.core.guardrails import get_guardrail_handler
|
||||
|
||||
|
||||
logger = utils.get_model_server_logger()
|
||||
|
||||
|
||||
# Define the client
|
||||
ARCH_ENDPOINT = "https://api.fc.archgw.com/v1"
|
||||
ARCH_API_KEY = "EMPTY"
|
||||
ARCH_CLIENT = OpenAI(base_url=ARCH_ENDPOINT, api_key=ARCH_API_KEY)
|
||||
|
||||
|
||||
# Define model handlers
|
||||
handler_map = {
|
||||
"Arch-Intent": ArchIntentHandler(
|
||||
ARCH_CLIENT,
|
||||
ARCH_INTENT_MODEL_ALIAS,
|
||||
ARCH_INTENT_TASK_PROMPT,
|
||||
ARCH_INTENT_TOOL_PROMPT_TEMPLATE,
|
||||
ARCH_INTENT_FORMAT_PROMPT,
|
||||
ARCH_INTENT_INSTRUCTION,
|
||||
**ARCH_INTENT_GENERATION_CONFIG,
|
||||
),
|
||||
"Arch-Function": ArchFunctionHandler(
|
||||
ARCH_CLIENT,
|
||||
ARCH_FUNCTION_MODEL_ALIAS,
|
||||
ARCH_FUNCTION_TASK_PROMPT,
|
||||
ARCH_FUNCTION_TOOL_PROMPT_TEMPLATE,
|
||||
ARCH_FUNCTION_FORMAT_PROMPT,
|
||||
**ARCH_FUNCTION_GENERATION_CONFIG,
|
||||
),
|
||||
"Arch-Guard": get_guardrail_handler(),
|
||||
}
|
||||
65
model_server/src/commons/utilities.py
Normal file
65
model_server/src/commons/utilities.py
Normal file
|
|
@ -0,0 +1,65 @@
|
|||
import os
|
||||
import torch
|
||||
import logging
|
||||
|
||||
|
||||
logger_instance = None
|
||||
|
||||
|
||||
def get_device():
|
||||
available_device = {
|
||||
"cpu": True,
|
||||
"cuda": torch.cuda.is_available(),
|
||||
"mps": (
|
||||
torch.backends.mps.is_available()
|
||||
if hasattr(torch.backends, "mps")
|
||||
else False
|
||||
),
|
||||
}
|
||||
|
||||
if available_device["cuda"]:
|
||||
device = "cuda"
|
||||
elif available_device["mps"]:
|
||||
device = "mps"
|
||||
else:
|
||||
device = "cpu"
|
||||
|
||||
return device
|
||||
|
||||
|
||||
def get_model_server_logger():
|
||||
global logger_instance
|
||||
|
||||
if logger_instance is not None:
|
||||
# If the logger is already initialized, return the existing instance
|
||||
return logger_instance
|
||||
|
||||
# Define log file path outside current directory (e.g., ~/archgw_logs)
|
||||
log_dir = os.path.expanduser("~/archgw_logs")
|
||||
log_file = "modelserver.log"
|
||||
log_file_path = os.path.join(log_dir, log_file)
|
||||
|
||||
# Ensure the log directory exists, create it if necessary, handle permissions errors
|
||||
try:
|
||||
if not os.path.exists(log_dir):
|
||||
os.makedirs(log_dir, exist_ok=True) # Create directory if it doesn't exist
|
||||
|
||||
# Check if the script has write permission in the log directory
|
||||
if not os.access(log_dir, os.W_OK):
|
||||
raise PermissionError(f"No write permission for the directory: {log_dir}")
|
||||
# Configure logging to file and console using basicConfig
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s - %(levelname)s - %(message)s",
|
||||
handlers=[
|
||||
logging.FileHandler(log_file_path, mode="w"), # Overwrite logs in file
|
||||
],
|
||||
)
|
||||
except (PermissionError, OSError):
|
||||
# Dont' fallback to console logging if there are issues writing to the log file
|
||||
raise RuntimeError(f"No write permission for the directory: {log_dir}")
|
||||
|
||||
# Initialize the logger instance after configuring handlers
|
||||
logger_instance = logging.getLogger("model_server_logger")
|
||||
return logger_instance
|
||||
Loading…
Add table
Add a link
Reference in a new issue