address cmt

This commit is contained in:
cotran 2024-11-07 11:15:03 -08:00
parent dd07ba2cd0
commit 1f383eafc4
4 changed files with 27 additions and 23 deletions

View file

@ -8,8 +8,9 @@ from app.prompt_guard.model_handler import ArchGuardHanlder
logger = utils.get_model_server_logger()
arch_function_hanlder = ArchFunctionHandler()
prefill_list = ["May", "Could", "Sure", "Definitely", "Certainly", "Of course", "Can"]
prefill_enabled = True
PREFILL_LIST = ["May", "Could", "Sure", "Definitely", "Certainly", "Of course", "Can"]
PREFILL_ENABLED = True
TOOL_CALL_TOKEN = "<tool_call>"
arch_function_endpoint = "https://api.fc.archgw.com/v1"
arch_function_client = utils.get_client(arch_function_endpoint)
arch_function_generation_params = {

View file

@ -87,14 +87,14 @@ async def chat_completion(req: ChatMessage, res: Response):
resp = const.arch_function_client.chat.completions.create(
messages=messages,
model=client_model_name,
stream=const.prefill_enabled,
stream=const.PREFILL_ENABLED,
extra_body=const.arch_function_generation_params,
)
except Exception as e:
logger.error(f"model_server <= arch_function: error: {e}")
raise
if const.prefill_enabled:
if const.PREFILL_ENABLED:
first_token_content = ""
for token in resp:
first_token_content = token.choices[
@ -104,14 +104,16 @@ async def chat_completion(req: ChatMessage, res: Response):
break
# Check if the first token requires tool call handling
if first_token_content != "<tool_call>":
if first_token_content != const.TOOL_CALL_TOKEN:
# Engage pre-filling response if no tool call is indicated
resp.close()
logger.info("Tool call is not found! Engage pre filling")
prefill_content = random.choice(const.prefill_list)
prefill_content = random.choice(const.PREFILL_LIST)
messages.append({"role": "assistant", "content": prefill_content})
# Send a new completion request with the updated messages
# the model will continue the final message in the chat instead of starting a new one
# disable add_generation_prompt which tells the template to add tokens that indicate the start of a bot response.
extra_body = {
**const.arch_function_generation_params,
"continue_final_message": True,