From 8871d3f751c2d3a6f2c273cb00c21fe373707148 Mon Sep 17 00:00:00 2001 From: Shuguang Chen <54548843+nehcgs@users.noreply.github.com> Date: Mon, 9 Dec 2024 14:15:10 -0800 Subject: [PATCH 1/2] Collect debugging log --- e2e_tests/api_model_server.rest | 56 ++++++++++++++++++++--- model_server/src/core/function_calling.py | 23 ++++++++-- model_server/src/core/hallucination.py | 3 ++ model_server/src/main.py | 24 ++++++++-- 4 files changed, 92 insertions(+), 14 deletions(-) diff --git a/e2e_tests/api_model_server.rest b/e2e_tests/api_model_server.rest index 79a7a0e5..7eeb6849 100644 --- a/e2e_tests/api_model_server.rest +++ b/e2e_tests/api_model_server.rest @@ -10,22 +10,66 @@ Content-Type: application/json "messages": [ { "role": "user", - "content": "how is the weather in seattle for next 10 days" + "content": "what is the weather forcast for seattle in the next 10 days?" } ], "tools": [ { - "id": "weather-112", - "tool_type": "function", - "function": { - "name": "weather_forecast", - "arguments": {"city": "str", "days": "int"} + "type": "function", + "function": { + "name": "weather_forecast", + "parameters": { + "type": "object", + "properties": { + "city": { + "type": "str" + }, + "days": { + "type": "int" + } + }, + "required": ["city", "days"] } + } } ] } + +### talk to function calling endpoint +POST {{model_server_endpoint}}/function_calling HTTP/1.1 +Content-Type: application/json + +{ + "messages": [ + { + "role": "user", + "content": "book a hotel for me" + } + ], + "tools": [ + { + "type": "function", + "function": { + "name": "weather_forecast", + "parameters": { + "type": "object", + "properties": { + "city": { + "type": "str" + }, + "days": { + "type": "int" + } + }, + "required": ["city", "days"] + } + } + } + ] +} + ### talk to Arch-Intent directly for completion POST https://api.fc.archgw.com/v1/chat/completions HTTP/1.1 Content-Type: application/json diff --git a/model_server/src/core/function_calling.py b/model_server/src/core/function_calling.py index ec21f0f4..27b9cc38 100644 --- a/model_server/src/core/function_calling.py +++ b/model_server/src/core/function_calling.py @@ -174,7 +174,7 @@ class ArchFunctionConfig: ).strip() GENERATION_PARAMS = { - "temperature": 0.2, + "temperature": 0.6, "top_p": 1.0, "top_k": 50, "max_tokens": 512, @@ -482,7 +482,7 @@ class ArchFunctionHandler(ArchBaseHandler): for _ in self.hallu_handler: # check if the first token is - if len(self.hallu_handler.tokens) > 0 and has_tool_call == None: + if len(self.hallu_handler.tokens) > 0 and has_tool_call is None: if self.hallu_handler.tokens[0] == "": has_tool_call = True else: @@ -490,29 +490,42 @@ class ArchFunctionHandler(ArchBaseHandler): break # if the model is hallucinating, start parameter gathering - if self.hallu_handler.hallucination == True: + if self.hallu_handler.hallucination is True: + # [TODO] - Review: remove the following code + print( + f"Hallucination detected for the following response, start parameter gathering: \n{''.join(self.hallu_handler.tokens)}" + ) + prefill_response = self._engage_parameter_gathering(messages) model_response = prefill_response.choices[0].message.content break - if has_tool_call and self.hallu_handler.hallucination == False: + if has_tool_call and self.hallu_handler.hallucination is False: + # [TODO] - Review: remove the following code + print("Tool call found, no hallucination detected!") model_response = "".join(self.hallu_handler.tokens) # start parameter gathering if the model is not generating tool calls if has_tool_call is False: + # [TODO] - Review: remove the following code + print("No tool call found, start parameter gathering") prefill_response = self._engage_parameter_gathering(messages) model_response = prefill_response.choices[0].message.content # Extract tool calls from model response extracted = self._extract_tool_calls(model_response) + # [TODO] - Review: remvoe the following code + print(f"[Extracted] - {extracted}") - if extracted["result"]: + if len(extracted["result"]) and extracted["status"]: # [TODO] Review: define the behavior in the case that tool call extraction fails # if not extracted["status"]: verified = self._verify_tool_calls( tools=req.tools, tool_calls=extracted["result"] ) + # [TODO] - Review: remvoe the following code + print(f"[Verified] - {verified}") # [TODO] Review: In the case that tool calls are invalid, define the protocol to collect debugging output and the behavior to handle it appropriately if verified["status"]: diff --git a/model_server/src/core/hallucination.py b/model_server/src/core/hallucination.py index 56b84713..3b42a688 100644 --- a/model_server/src/core/hallucination.py +++ b/model_server/src/core/hallucination.py @@ -278,6 +278,9 @@ class HallucinationStateHandler: f"Hallucination: token '{self.tokens[-1]}' is uncertain." ) + # [TODO] - Review: remove the following code + print(f"[Hallucination] - Hallucination detected: {self.error_message}") + def _count_consecutive_token(self, token=MaskToken.PARAMETER_VALUE) -> int: """ Counts the number of consecutive occurrences of a given token in the mask. diff --git a/model_server/src/main.py b/model_server/src/main.py index 0ca8c7c7..ef15ff78 100644 --- a/model_server/src/main.py +++ b/model_server/src/main.py @@ -1,4 +1,5 @@ import os +import time from src.commons.globals import handler_map from src.core.model_utils import ChatMessage, GuardRequest @@ -54,22 +55,34 @@ async def models(): @app.post("/function_calling") async def function_calling(req: ChatMessage, res: Response): try: + intent_start_time = time.perf_counter() intent_response = await handler_map["Arch-Intent"].chat_completion(req) + intent_latency = time.perf_counter() - intent_start_time if handler_map["Arch-Intent"].detect_intent(intent_response): # [TODO] measure agreement between intent detection and function calling try: + function_start_time = time.perf_counter() function_calling_response = await handler_map[ "Arch-Function" ].chat_completion(req) - return function_calling_response + function_latency = time.perf_counter() - function_start_time + return { + "response": function_calling_response, + "intent_latency": round(intent_latency * 1000, 3), + "function_latency": round(function_latency * 1000, 3), + } except Exception as e: # [TODO] Review: update how to collect debugging outputs # logger.error(f"Error in chat_completion from `Arch-Function`: {e}") res.status_code = 500 return {"error": f"[Arch-Function] - {e}"} # [TODO] Review: define the behavior if `Arch-Intent` doesn't detect an intent - # else: + else: + return { + "result": "No intent matched", + "intent_latency": round(intent_latency * 1000, 3), + } except Exception as e: # [TODO] Review: update how to collect debugging outputs @@ -81,8 +94,13 @@ async def function_calling(req: ChatMessage, res: Response): @app.post("/guardrails") async def guardrails(req: GuardRequest, res: Response, max_num_words=300): try: + guard_start_time = time.perf_counter() guard_result = handler_map["Arch-Guard"].predict(req) - return guard_result + guard_latency = time.perf_counter() - guard_start_time + return { + "response": guard_result, + "guard_latency": round(guard_latency * 1000, 3), + } except Exception as e: # [TODO] Review: update how to collect debugging outputs res.status_code = 500 From 1635d44e4afacd488e8dd144293c93c3e9e76e02 Mon Sep 17 00:00:00 2001 From: Shuguang Chen <54548843+nehcgs@users.noreply.github.com> Date: Mon, 9 Dec 2024 14:35:03 -0800 Subject: [PATCH 2/2] Update api_model_server.rest --- e2e_tests/api_model_server.rest | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/e2e_tests/api_model_server.rest b/e2e_tests/api_model_server.rest index 7eeb6849..e6ad1530 100644 --- a/e2e_tests/api_model_server.rest +++ b/e2e_tests/api_model_server.rest @@ -79,7 +79,7 @@ Content-Type: application/json "messages": [ { "role": "system", - "content": "You are a helpful assistant.\n\nYou task is to check if there are any tools that can be used to help the last user message in conversations according to the available tools listed below.\n\n\n{\"index\": \"T0\", \"type\": \"function\", \"function\": {\"name\": \"get_current_weather\", \"description\": \"Get the current weather for a location\", \"parameters\": {\"type\": \"object\", \"properties\": {\"location\": {\"type\": \"str\", \"description\": \"The city and state, e.g. San Francisco, New York\"}, \"unit\": {\"type\": \"str\", \"enum\": [\"celsius\", \"fahrenheit\"], \"description\": \"The unit of temperature to return\"}}, \"required\": [\"location\"]}}}\n\n\nProvide your tool assessment for ONLY THE LAST USER MESSAGE in the above conversation:\n- First line must read 'Yes' or 'No'.\n- If yes, a second line must include a comma-separated list of tool indexes.\n" + "content": "You are a helpful assistant.\n\nYou task is to check if there are any tools that can be used to help the last user message in conversations according to the available tools listed below.\n\n\n{\"index\": \"T0\", \"type\": \"function\", \"function\": {\"name\": \"weather_forecast\", \"parameters\": {\"type\": \"object\", \"properties\": {\"city\": {\"type\": \"str\"}, \"days\": {\"type\": \"int\"}}, \"required\": [\"city\", \"days\"]}}}\n\n\nProvide your tool assessment for ONLY THE LAST USER MESSAGE in the above conversation:\n- First line must read 'Yes' or 'No'.\n- If yes, a second line must include a comma-separated list of tool indexes.\n" }, { "role": "user", "content": "how is the weather in seattle? Are there any tools can help?" } ], @@ -96,7 +96,7 @@ Content-Type: application/json "messages": [ { "role": "system", - "content": "You are a helpful assistant.\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within XML tags:\n\n{\"id\": \"weather-112\", \"tool_type\": \"function\", \"function\": {\"name\": \"weather_forecast\", \"arguments\": {\"city\": \"str\", \"days\": \"int\"}}}\n\n\nFor each function call, return a json object with function name and arguments within XML tags:\n\n{\"name\": , \"arguments\": }\n" + "content": "You are a helpful assistant.\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within XML tags:\n\n{\"type\": \"function\", \"function\": {\"name\": \"weather_forecast\", \"parameters\": {\"type\": \"object\", \"properties\": {\"city\": {\"type\": \"str\"}, \"days\": {\"type\": \"int\"}}, \"required\": [\"city\", \"days\"]}}}\n\n\nFor each function call, return a json object with function name and arguments within XML tags:\n\n{\"name\": , \"arguments\": }\n\n" }, { "role": "user", "content": "how is the weather in seattle?" }, { "role": "assistant", "content": "Of course! " } @@ -106,6 +106,22 @@ Content-Type: application/json } +### talk to Arch-Function directly for completion +POST {{archfc_endpoint}}/v1/chat/completions HTTP/1.1 +Content-Type: application/json + +{ + "model": "Arch-Function", + "messages": [ + { + "role": "system", + "content": "You are a helpful assistant.\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within XML tags:\n\n{\"type\": \"function\", \"function\": {\"name\": \"weather_forecast\", \"parameters\": {\"type\": \"object\", \"properties\": {\"city\": {\"type\": \"str\"}, \"days\": {\"type\": \"int\"}}, \"required\": [\"city\", \"days\"]}}}\n\n\nFor each function call, return a json object with function name and arguments within XML tags:\n\n{\"name\": , \"arguments\": }\n\n" + }, + { "role": "user", "content": "how is the weather in seattle?" } + ] +} + + ### talk to guardrails endpoint POST {{model_server_endpoint}}/guardrails HTTP/1.1 Content-Type: application/json