Handle intent matching better in arch gateway (#391)

2026-06-11 15:05:14 +02:00 · 2025-03-04 12:49:13 -08:00 · 2025-03-04 12:49:13 -08:00 · e77fc47225
commit e77fc47225
parent 10cad4d0b7
10 changed files with 653 additions and 309 deletions
--- a/model_server/src/core/function_calling.py
+++ b/model_server/src/core/function_calling.py
@ -355,7 +355,7 @@ class ArchFunctionHandler(ArchBaseHandler):
                        try:
                            tool_content = json.loads(fixed_content)
                        except Exception:
-                            tool_calls, is_valid, error_message = [], False, e
+                            is_valid, error_message = False, e
                            break

                    tool_calls.append(
@ -573,23 +573,26 @@ class ArchFunctionHandler(ArchBaseHandler):
        # Extract tool calls from model response
        extracted = self._extract_tool_calls(model_response)

-        if len(extracted["result"]) and extracted["status"]:
-            verified = self._verify_tool_calls(
-                tools=req.tools, tool_calls=extracted["result"]
-            )
-
-            if verified["status"]:
-                logger.info(
-                    f"[Tool calls]: {json.dumps([tool_call['function'] for tool_call in extracted['result']])}"
+        if extracted["status"]:
+            # Response with tool calls
+            if len(extracted["result"]):
+                verified = self._verify_tool_calls(
+                    tools=req.tools, tool_calls=extracted["result"]
                )
-                model_response = Message(content="", tool_calls=extracted["result"])
+
+                if verified["status"]:
+                    logger.info(
+                        f"[Tool calls]: {json.dumps([tool_call['function'] for tool_call in extracted['result']])}"
+                    )
+                    model_response = Message(content="", tool_calls=extracted["result"])
+                else:
+                    logger.error(f"Invalid tool call - {verified['message']}")
+            # Response without tool calls
            else:
-                logger.error(f"Invalid tool call - {verified['message']}")
-                # raise ValueError(
-                #     f"[Arch-Function]: Invalid tool call - {verified['message']}"
-                # )
+                model_response = Message(content=model_response, tool_calls=[])
+        # Response with tool calls but contain errors
        else:
-            model_response = Message(content=model_response, tool_calls=[])
+            logger.error(f"Tool call extraction error - {extracted['message']}")

        chat_completion_response = ChatCompletionResponse(
            choices=[Choice(message=model_response)], model=self.model_name
--- a/model_server/src/core/utils/model_utils.py
+++ b/model_server/src/core/utils/model_utils.py
@ -171,7 +171,7 @@ class ArchBaseHandler:
        assert processed_messages[-1]["role"] == "user"

        if extra_instruction:
-            processed_messages[-1]["content"] += extra_instruction
+            processed_messages[-1]["content"] += "\n" + extra_instruction

        # keep the first system message and shift conversation if the total token length exceeds the limit
        def truncate_messages(messages: List[Dict[str, Any]]):
--- a/model_server/src/main.py
+++ b/model_server/src/main.py
@ -104,6 +104,7 @@ async def function_calling(req: ChatMessage, res: Response):
                res.status_code = 500
                error_messages = f"[Arch-Function] - Error in ChatCompletion: {e}"
        else:
+            # no intent matched
            intent_response.metadata = {
                "intent_latency": str(round(intent_latency * 1000, 3)),
            }