diff --git a/model_server/src/core/function_calling.py b/model_server/src/core/function_calling.py index 781149d5..73cf4fd7 100644 --- a/model_server/src/core/function_calling.py +++ b/model_server/src/core/function_calling.py @@ -369,15 +369,6 @@ class ArchFunctionHandler(ArchBaseHandler): model_response += chunk.choices[0].delta.content logger.info(f"[Agent Orchestrator]: response received: {model_response}") else: - # ********************************************************************************************* - # TODO: - # Update the following logic for hallucination check - # 1. If the model response starts wtth `tool_calls`, continue halluciantion check: - # - If hallucination detected, start prompt prefilling - # - Otherwise, continue until the end - # 2. Otherwise, stop it - # ********************************************************************************************* - # initialize the hallucination handler, which is an iterator self.hallucination_state = HallucinationState( response_iterator=response, function=req.tools @@ -416,17 +407,6 @@ class ArchFunctionHandler(ArchBaseHandler): model_response = self.default_prefix + "".join( self.hallucination_state.tokens ) - # else: - # # start parameter gathering if the model is not generating tool calls - # prefill_response = self._engage_parameter_gathering(messages) - # model_response = prefill_response.choices[0].message.content - - # # *********************************************************************************************\ - # # TODO: Remove the following for loop after updating hallucination check - # # ********************************************************************************************* - # for chunk in response: - # if len(chunk.choices) > 0 and chunk.choices[0].delta.content: - # model_response += chunk.choices[0].delta.content logger.info(f"[arch-fc]: raw model response: {model_response}") # Extract tool calls from model response diff --git a/model_server/src/main.py b/model_server/src/main.py index ac29a743..e37136cd 100644 --- a/model_server/src/main.py +++ b/model_server/src/main.py @@ -97,9 +97,6 @@ async def function_calling(req: ChatMessage, res: Response): elif final_response.choices[0].message.tool_calls: final_response.metadata["function_latency"] = str(round(latency * 1000, 3)) - # ********************************************************************************************* - # TODO: Put the following code back when hallucination check is ready - # ********************************************************************************************* if not use_agent_orchestrator: final_response.metadata["hallucination"] = str( model_handler.hallucination_state.hallucination @@ -111,9 +108,6 @@ async def function_calling(req: ChatMessage, res: Response): if not use_agent_orchestrator: final_response.metadata["intent_latency"] = str(round(latency * 1000, 3)) - # ********************************************************************************************* - # TODO: Put the following code back when hallucination check is ready - # ********************************************************************************************* final_response.metadata["hallucination"] = str( model_handler.hallucination_state.hallucination )