From 13fac83381701521363c006803242e923c7fe837 Mon Sep 17 00:00:00 2001 From: cotran Date: Fri, 1 Nov 2024 10:43:34 -0700 Subject: [PATCH] address comments --- model_server/app/function_calling/model_utils.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/model_server/app/function_calling/model_utils.py b/model_server/app/function_calling/model_utils.py index e3ceea51..9c2da39b 100644 --- a/model_server/app/function_calling/model_utils.py +++ b/model_server/app/function_calling/model_utils.py @@ -96,6 +96,7 @@ async def chat_completion( except Exception as e: logger.error(f"model_server <= arch_function: error: {e}") raise + first_token_content = "" for token in resp: first_token_content = token.choices[ @@ -113,11 +114,16 @@ async def chat_completion( messages.append({"role": "assistant", "content": prefill_content}) # Send a new completion request with the updated messages + extra_body = { + **const.arch_function_generation_params, + "continue_final_message": True, + "add_generation_prompt": False, + } pre_fill_resp = const.arch_function_client.chat.completions.create( messages=messages, model=client_model_name, stream=False, - extra_body=const.arch_function_generation_params, + extra_body=extra_body, ) full_response = pre_fill_resp.choices[0].message.content else: