From 13fac83381701521363c006803242e923c7fe837 Mon Sep 17 00:00:00 2001
From: cotran <cotran2@utexas.edu>
Date: Fri, 1 Nov 2024 10:43:34 -0700
Subject: [PATCH] address comments

---
 model_server/app/function_calling/model_utils.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/model_server/app/function_calling/model_utils.py b/model_server/app/function_calling/model_utils.py
index e3ceea51..9c2da39b 100644
--- a/model_server/app/function_calling/model_utils.py
+++ b/model_server/app/function_calling/model_utils.py
@@ -96,6 +96,7 @@ async def chat_completion(
         except Exception as e:
             logger.error(f"model_server <= arch_function: error: {e}")
             raise
+
         first_token_content = ""
         for token in resp:
             first_token_content = token.choices[
@@ -113,11 +114,16 @@ async def chat_completion(
             messages.append({"role": "assistant", "content": prefill_content})
 
             # Send a new completion request with the updated messages
+            extra_body = {
+                **const.arch_function_generation_params,
+                "continue_final_message": True,
+                "add_generation_prompt": False,
+            }
             pre_fill_resp = const.arch_function_client.chat.completions.create(
                 messages=messages,
                 model=client_model_name,
                 stream=False,
-                extra_body=const.arch_function_generation_params,
+                extra_body=extra_body,
             )
             full_response = pre_fill_resp.choices[0].message.content
         else: