mirror of
https://github.com/katanemo/plano.git
synced 2026-06-17 15:25:17 +02:00
66 lines
1.9 KiB
Python
66 lines
1.9 KiB
Python
import app.commons.globals as glb
|
|
import app.commons.utilities as utils
|
|
import app.loader as loader
|
|
|
|
from app.function_calling.model_handler import ArchFunctionHandler
|
|
from app.prompt_guard.model_handler import ArchGuardHanlder
|
|
from enum import Enum
|
|
|
|
logger = utils.get_model_server_logger()
|
|
|
|
arch_function_hanlder = ArchFunctionHandler()
|
|
PREFILL_LIST = ["May", "Could", "Sure", "Definitely", "Certainly", "Of course", "Can"]
|
|
PREFILL_ENABLED = True
|
|
TOOL_CALL_TOKEN = "<tool_call>"
|
|
arch_function_endpoint = "https://api.fc.archgw.com/v1"
|
|
arch_function_client = utils.get_client(arch_function_endpoint)
|
|
arch_function_generation_params = {
|
|
"temperature": 0.2,
|
|
"top_p": 1.0,
|
|
"top_k": 50,
|
|
"max_tokens": 512,
|
|
"stop_token_ids": [151645],
|
|
# "top_logprobs": 10,
|
|
}
|
|
|
|
arch_guard_model_type = {
|
|
"cpu": "katanemo/Arch-Guard-cpu",
|
|
"cuda": "katanemo/Arch-Guard",
|
|
"mps": "katanemo/Arch-Guard",
|
|
}
|
|
|
|
# Model definition
|
|
embedding_model = loader.get_embedding_model()
|
|
zero_shot_model = loader.get_zero_shot_model()
|
|
|
|
prompt_guard_dict = loader.get_prompt_guard(arch_guard_model_type[glb.DEVICE])
|
|
|
|
arch_guard_handler = ArchGuardHanlder(model_dict=prompt_guard_dict)
|
|
# Patterns for function name and parameter parsing
|
|
FUNC_NAME_START_PATTERN = ('<tool_call>\n{"name":"', "<tool_call>\n{'name':'")
|
|
FUNC_NAME_END_TOKEN = ('",', "',")
|
|
TOOL_CALL_TOKEN = "<tool_call>"
|
|
|
|
FIRST_PARAM_NAME_START_PATTERN = ('"arguments":{"', "'arguments':{'")
|
|
PARAMETER_NAME_END_TOKENS = ('":', ':"', "':", ":'")
|
|
PARAMETER_NAME_START_PATTERN = (',"', ",'")
|
|
PARAMETER_VALUE_START_PATTERN = ('":', "':")
|
|
PARAMETER_VALUE_END_TOKEN = ('",', "}}\n", "',")
|
|
|
|
|
|
# Thresholds
|
|
class MaskToken(Enum):
|
|
FUNCTION_NAME = "f"
|
|
PARAMETER_VALUE = "v"
|
|
PARAMETER_NAME = "p"
|
|
NOT_USED = "e"
|
|
TOOL_CALL = "t"
|
|
|
|
|
|
HALLUCINATION_THRESHOLD_DICT = {
|
|
MaskToken.TOOL_CALL.value: {"entropy": 0.1, "varentropy": 0.5},
|
|
MaskToken.PARAMETER_VALUE.value: {
|
|
"entropy": 0.5,
|
|
"varentropy": 2.5,
|
|
},
|
|
}
|