From 8871d3f751c2d3a6f2c273cb00c21fe373707148 Mon Sep 17 00:00:00 2001
From: Shuguang Chen <54548843+nehcgs@users.noreply.github.com>
Date: Mon, 9 Dec 2024 14:15:10 -0800
Subject: [PATCH 1/2] Collect debugging log

---
 e2e_tests/api_model_server.rest           | 56 ++++++++++++++++++++---
 model_server/src/core/function_calling.py | 23 ++++++++--
 model_server/src/core/hallucination.py    |  3 ++
 model_server/src/main.py                  | 24 ++++++++--
 4 files changed, 92 insertions(+), 14 deletions(-)

diff --git a/e2e_tests/api_model_server.rest b/e2e_tests/api_model_server.rest
index 79a7a0e5..7eeb6849 100644
--- a/e2e_tests/api_model_server.rest
+++ b/e2e_tests/api_model_server.rest
@@ -10,22 +10,66 @@ Content-Type: application/json
   "messages": [
     {
       "role": "user",
-      "content": "how is the weather in seattle for next 10 days"
+      "content": "what is the weather forcast for seattle in the next 10 days?"
     }
   ],
   "tools": [
     {
-        "id": "weather-112",
-        "tool_type": "function",
-        "function": {
-          "name": "weather_forecast",
-          "arguments": {"city": "str", "days": "int"}
+      "type": "function",
+      "function": {
+        "name": "weather_forecast",
+        "parameters": {
+          "type": "object",
+          "properties": {
+            "city": {
+              "type": "str"
+            },
+            "days": {
+              "type": "int"
+            }
+          },
+          "required": ["city", "days"]
         }
+      }
     }
   ]
 }
 
 
+
+### talk to function calling endpoint
+POST {{model_server_endpoint}}/function_calling HTTP/1.1
+Content-Type: application/json
+
+{
+  "messages": [
+    {
+      "role": "user",
+      "content": "book a hotel for me"
+    }
+  ],
+  "tools": [
+    {
+      "type": "function",
+      "function": {
+        "name": "weather_forecast",
+        "parameters": {
+          "type": "object",
+          "properties": {
+            "city": {
+              "type": "str"
+            },
+            "days": {
+              "type": "int"
+            }
+          },
+          "required": ["city", "days"]
+        }
+      }
+    }
+  ]
+}
+
 ### talk to Arch-Intent directly for completion
 POST https://api.fc.archgw.com/v1/chat/completions HTTP/1.1
 Content-Type: application/json
diff --git a/model_server/src/core/function_calling.py b/model_server/src/core/function_calling.py
index ec21f0f4..27b9cc38 100644
--- a/model_server/src/core/function_calling.py
+++ b/model_server/src/core/function_calling.py
@@ -174,7 +174,7 @@ class ArchFunctionConfig:
     ).strip()
 
     GENERATION_PARAMS = {
-        "temperature": 0.2,
+        "temperature": 0.6,
         "top_p": 1.0,
         "top_k": 50,
         "max_tokens": 512,
@@ -482,7 +482,7 @@ class ArchFunctionHandler(ArchBaseHandler):
 
         for _ in self.hallu_handler:
             # check if the first token is <tool_call>
-            if len(self.hallu_handler.tokens) > 0 and has_tool_call == None:
+            if len(self.hallu_handler.tokens) > 0 and has_tool_call is None:
                 if self.hallu_handler.tokens[0] == "<tool_call>":
                     has_tool_call = True
                 else:
@@ -490,29 +490,42 @@ class ArchFunctionHandler(ArchBaseHandler):
                     break
 
             # if the model is hallucinating, start parameter gathering
-            if self.hallu_handler.hallucination == True:
+            if self.hallu_handler.hallucination is True:
+                # [TODO] - Review: remove the following code
+                print(
+                    f"Hallucination detected for the following response, start parameter gathering: \n{''.join(self.hallu_handler.tokens)}"
+                )
+
                 prefill_response = self._engage_parameter_gathering(messages)
                 model_response = prefill_response.choices[0].message.content
                 break
 
-        if has_tool_call and self.hallu_handler.hallucination == False:
+        if has_tool_call and self.hallu_handler.hallucination is False:
+            # [TODO] - Review: remove the following code
+            print("Tool call found, no hallucination detected!")
             model_response = "".join(self.hallu_handler.tokens)
 
         # start parameter gathering if the model is not generating tool calls
         if has_tool_call is False:
+            # [TODO] - Review: remove the following code
+            print("No tool call found, start parameter gathering")
             prefill_response = self._engage_parameter_gathering(messages)
             model_response = prefill_response.choices[0].message.content
 
         # Extract tool calls from model response
         extracted = self._extract_tool_calls(model_response)
+        # [TODO] - Review: remvoe the following code
+        print(f"[Extracted] - {extracted}")
 
-        if extracted["result"]:
+        if len(extracted["result"]) and extracted["status"]:
             # [TODO] Review: define the behavior in the case that tool call extraction fails
             # if not extracted["status"]:
 
             verified = self._verify_tool_calls(
                 tools=req.tools, tool_calls=extracted["result"]
             )
+            # [TODO] - Review: remvoe the following code
+            print(f"[Verified] - {verified}")
 
             # [TODO] Review: In the case that tool calls are invalid, define the protocol to collect debugging output and the behavior to handle it appropriately
             if verified["status"]:
diff --git a/model_server/src/core/hallucination.py b/model_server/src/core/hallucination.py
index 56b84713..3b42a688 100644
--- a/model_server/src/core/hallucination.py
+++ b/model_server/src/core/hallucination.py
@@ -278,6 +278,9 @@ class HallucinationStateHandler:
                 f"Hallucination: token '{self.tokens[-1]}' is uncertain."
             )
 
+            # [TODO] - Review: remove the following code
+            print(f"[Hallucination] - Hallucination detected: {self.error_message}")
+
     def _count_consecutive_token(self, token=MaskToken.PARAMETER_VALUE) -> int:
         """
         Counts the number of consecutive occurrences of a given token in the mask.
diff --git a/model_server/src/main.py b/model_server/src/main.py
index 0ca8c7c7..ef15ff78 100644
--- a/model_server/src/main.py
+++ b/model_server/src/main.py
@@ -1,4 +1,5 @@
 import os
+import time
 
 from src.commons.globals import handler_map
 from src.core.model_utils import ChatMessage, GuardRequest
@@ -54,22 +55,34 @@ async def models():
 @app.post("/function_calling")
 async def function_calling(req: ChatMessage, res: Response):
     try:
+        intent_start_time = time.perf_counter()
         intent_response = await handler_map["Arch-Intent"].chat_completion(req)
+        intent_latency = time.perf_counter() - intent_start_time
 
         if handler_map["Arch-Intent"].detect_intent(intent_response):
             # [TODO] measure agreement between intent detection and function calling
             try:
+                function_start_time = time.perf_counter()
                 function_calling_response = await handler_map[
                     "Arch-Function"
                 ].chat_completion(req)
-                return function_calling_response
+                function_latency = time.perf_counter() - function_start_time
+                return {
+                    "response": function_calling_response,
+                    "intent_latency": round(intent_latency * 1000, 3),
+                    "function_latency": round(function_latency * 1000, 3),
+                }
             except Exception as e:
                 # [TODO] Review: update how to collect debugging outputs
                 # logger.error(f"Error in chat_completion from `Arch-Function`: {e}")
                 res.status_code = 500
                 return {"error": f"[Arch-Function] - {e}"}
         # [TODO] Review: define the behavior if `Arch-Intent` doesn't detect an intent
-        # else:
+        else:
+            return {
+                "result": "No intent matched",
+                "intent_latency": round(intent_latency * 1000, 3),
+            }
 
     except Exception as e:
         # [TODO] Review: update how to collect debugging outputs
@@ -81,8 +94,13 @@ async def function_calling(req: ChatMessage, res: Response):
 @app.post("/guardrails")
 async def guardrails(req: GuardRequest, res: Response, max_num_words=300):
     try:
+        guard_start_time = time.perf_counter()
         guard_result = handler_map["Arch-Guard"].predict(req)
-        return guard_result
+        guard_latency = time.perf_counter() - guard_start_time
+        return {
+            "response": guard_result,
+            "guard_latency": round(guard_latency * 1000, 3),
+        }
     except Exception as e:
         # [TODO] Review: update how to collect debugging outputs
         res.status_code = 500

From 1635d44e4afacd488e8dd144293c93c3e9e76e02 Mon Sep 17 00:00:00 2001
From: Shuguang Chen <54548843+nehcgs@users.noreply.github.com>
Date: Mon, 9 Dec 2024 14:35:03 -0800
Subject: [PATCH 2/2] Update api_model_server.rest

---
 e2e_tests/api_model_server.rest | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/e2e_tests/api_model_server.rest b/e2e_tests/api_model_server.rest
index 7eeb6849..e6ad1530 100644
--- a/e2e_tests/api_model_server.rest
+++ b/e2e_tests/api_model_server.rest
@@ -79,7 +79,7 @@ Content-Type: application/json
   "messages": [
     {
       "role": "system",
-      "content": "You are a helpful assistant.\n\nYou task is to check if there are any tools that can be used to help the last user message in conversations according to the available tools listed below.\n\n<tools>\n{\"index\": \"T0\", \"type\": \"function\", \"function\": {\"name\": \"get_current_weather\", \"description\": \"Get the current weather for a location\", \"parameters\": {\"type\": \"object\", \"properties\": {\"location\": {\"type\": \"str\", \"description\": \"The city and state, e.g. San Francisco, New York\"}, \"unit\": {\"type\": \"str\", \"enum\": [\"celsius\", \"fahrenheit\"], \"description\": \"The unit of temperature to return\"}}, \"required\": [\"location\"]}}}\n</tools>\n\nProvide your tool assessment for ONLY THE LAST USER MESSAGE in the above conversation:\n- First line must read 'Yes' or 'No'.\n- If yes, a second line must include a comma-separated list of tool indexes.\n"
+      "content": "You are a helpful assistant.\n\nYou task is to check if there are any tools that can be used to help the last user message in conversations according to the available tools listed below.\n\n<tools>\n{\"index\": \"T0\", \"type\": \"function\", \"function\": {\"name\": \"weather_forecast\", \"parameters\": {\"type\": \"object\", \"properties\": {\"city\": {\"type\": \"str\"}, \"days\": {\"type\": \"int\"}}, \"required\": [\"city\", \"days\"]}}}\n</tools>\n\nProvide your tool assessment for ONLY THE LAST USER MESSAGE in the above conversation:\n- First line must read 'Yes' or 'No'.\n- If yes, a second line must include a comma-separated list of tool indexes.\n"
     },
     { "role": "user", "content": "how is the weather in seattle? Are there any tools can help?" }
   ],
@@ -96,7 +96,7 @@ Content-Type: application/json
   "messages": [
     {
       "role": "system",
-      "content": "You are a helpful assistant.\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>\n{\"id\": \"weather-112\", \"tool_type\": \"function\", \"function\": {\"name\": \"weather_forecast\", \"arguments\": {\"city\": \"str\", \"days\": \"int\"}}}\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call>"
+      "content": "You are a helpful assistant.\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>\n{\"type\": \"function\", \"function\": {\"name\": \"weather_forecast\", \"parameters\": {\"type\": \"object\", \"properties\": {\"city\": {\"type\": \"str\"}, \"days\": {\"type\": \"int\"}}, \"required\": [\"city\", \"days\"]}}}\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call>\n"
     },
     { "role": "user", "content": "how is the weather in seattle?" },
     { "role": "assistant", "content": "Of course! " }
@@ -106,6 +106,22 @@ Content-Type: application/json
 }
 
 
+### talk to Arch-Function directly for completion
+POST {{archfc_endpoint}}/v1/chat/completions HTTP/1.1
+Content-Type: application/json
+
+{
+  "model": "Arch-Function",
+  "messages": [
+    {
+      "role": "system",
+      "content": "You are a helpful assistant.\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>\n{\"type\": \"function\", \"function\": {\"name\": \"weather_forecast\", \"parameters\": {\"type\": \"object\", \"properties\": {\"city\": {\"type\": \"str\"}, \"days\": {\"type\": \"int\"}}, \"required\": [\"city\", \"days\"]}}}\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call>\n"
+    },
+    { "role": "user", "content": "how is the weather in seattle?" }
+  ]
+}
+
+
 ### talk to guardrails endpoint
 POST {{model_server_endpoint}}/guardrails HTTP/1.1
 Content-Type: application/json