import app.commons.globals as glb import app.commons.utilities as utils import app.loader as loader from app.function_calling.model_handler import ArchFunctionHandler from app.prompt_guard.model_handler import ArchGuardHanlder from enum import Enum logger = utils.get_model_server_logger() arch_function_hanlder = ArchFunctionHandler() PREFILL_LIST = ["May", "Could", "Sure", "Definitely", "Certainly", "Of course", "Can"] PREFILL_ENABLED = True TOOL_CALL_TOKEN = "" arch_function_endpoint = "https://api.fc.archgw.com/v1" arch_function_client = utils.get_client(arch_function_endpoint) arch_function_generation_params = { "temperature": 0.2, "top_p": 1.0, "top_k": 50, "max_tokens": 512, "stop_token_ids": [151645], # "top_logprobs": 10, } arch_guard_model_type = { "cpu": "katanemo/Arch-Guard-cpu", "cuda": "katanemo/Arch-Guard", "mps": "katanemo/Arch-Guard", } # Model definition embedding_model = loader.get_embedding_model() zero_shot_model = loader.get_zero_shot_model() prompt_guard_dict = loader.get_prompt_guard(arch_guard_model_type[glb.DEVICE]) arch_guard_handler = ArchGuardHanlder(model_dict=prompt_guard_dict) # Patterns for function name and parameter parsing FUNC_NAME_START_PATTERN = ('\n{"name":"', "\n{'name':'") FUNC_NAME_END_TOKEN = ('",', "',") TOOL_CALL_TOKEN = "" FIRST_PARAM_NAME_START_PATTERN = ('"arguments":{"', "'arguments':{'") PARAMETER_NAME_END_TOKENS = ('":', ':"', "':", ":'") PARAMETER_NAME_START_PATTERN = (',"', ",'") PARAMETER_VALUE_START_PATTERN = ('":', "':") PARAMETER_VALUE_END_TOKEN = ('",', "}}\n", "',") # Thresholds class MaskToken(Enum): FUNCTION_NAME = "f" PARAMETER_VALUE = "v" PARAMETER_NAME = "p" NOT_USED = "e" TOOL_CALL = "t" HALLUCINATION_THRESHOLD_DICT = { MaskToken.TOOL_CALL.value: {"entropy": 0.1, "varentropy": 0.5}, MaskToken.PARAMETER_VALUE.value: { "entropy": 0.5, "varentropy": 2.5, }, }