mirror of
https://github.com/katanemo/plano.git
synced 2026-06-23 15:38:07 +02:00
Collect debugging log
This commit is contained in:
parent
f13947732c
commit
8871d3f751
4 changed files with 92 additions and 14 deletions
|
|
@ -10,22 +10,66 @@ Content-Type: application/json
|
||||||
"messages": [
|
"messages": [
|
||||||
{
|
{
|
||||||
"role": "user",
|
"role": "user",
|
||||||
"content": "how is the weather in seattle for next 10 days"
|
"content": "what is the weather forcast for seattle in the next 10 days?"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"tools": [
|
"tools": [
|
||||||
{
|
{
|
||||||
"id": "weather-112",
|
"type": "function",
|
||||||
"tool_type": "function",
|
"function": {
|
||||||
"function": {
|
"name": "weather_forecast",
|
||||||
"name": "weather_forecast",
|
"parameters": {
|
||||||
"arguments": {"city": "str", "days": "int"}
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"city": {
|
||||||
|
"type": "str"
|
||||||
|
},
|
||||||
|
"days": {
|
||||||
|
"type": "int"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": ["city", "days"]
|
||||||
}
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
### talk to function calling endpoint
|
||||||
|
POST {{model_server_endpoint}}/function_calling HTTP/1.1
|
||||||
|
Content-Type: application/json
|
||||||
|
|
||||||
|
{
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "book a hotel for me"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"tools": [
|
||||||
|
{
|
||||||
|
"type": "function",
|
||||||
|
"function": {
|
||||||
|
"name": "weather_forecast",
|
||||||
|
"parameters": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"city": {
|
||||||
|
"type": "str"
|
||||||
|
},
|
||||||
|
"days": {
|
||||||
|
"type": "int"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": ["city", "days"]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
### talk to Arch-Intent directly for completion
|
### talk to Arch-Intent directly for completion
|
||||||
POST https://api.fc.archgw.com/v1/chat/completions HTTP/1.1
|
POST https://api.fc.archgw.com/v1/chat/completions HTTP/1.1
|
||||||
Content-Type: application/json
|
Content-Type: application/json
|
||||||
|
|
|
||||||
|
|
@ -174,7 +174,7 @@ class ArchFunctionConfig:
|
||||||
).strip()
|
).strip()
|
||||||
|
|
||||||
GENERATION_PARAMS = {
|
GENERATION_PARAMS = {
|
||||||
"temperature": 0.2,
|
"temperature": 0.6,
|
||||||
"top_p": 1.0,
|
"top_p": 1.0,
|
||||||
"top_k": 50,
|
"top_k": 50,
|
||||||
"max_tokens": 512,
|
"max_tokens": 512,
|
||||||
|
|
@ -482,7 +482,7 @@ class ArchFunctionHandler(ArchBaseHandler):
|
||||||
|
|
||||||
for _ in self.hallu_handler:
|
for _ in self.hallu_handler:
|
||||||
# check if the first token is <tool_call>
|
# check if the first token is <tool_call>
|
||||||
if len(self.hallu_handler.tokens) > 0 and has_tool_call == None:
|
if len(self.hallu_handler.tokens) > 0 and has_tool_call is None:
|
||||||
if self.hallu_handler.tokens[0] == "<tool_call>":
|
if self.hallu_handler.tokens[0] == "<tool_call>":
|
||||||
has_tool_call = True
|
has_tool_call = True
|
||||||
else:
|
else:
|
||||||
|
|
@ -490,29 +490,42 @@ class ArchFunctionHandler(ArchBaseHandler):
|
||||||
break
|
break
|
||||||
|
|
||||||
# if the model is hallucinating, start parameter gathering
|
# if the model is hallucinating, start parameter gathering
|
||||||
if self.hallu_handler.hallucination == True:
|
if self.hallu_handler.hallucination is True:
|
||||||
|
# [TODO] - Review: remove the following code
|
||||||
|
print(
|
||||||
|
f"Hallucination detected for the following response, start parameter gathering: \n{''.join(self.hallu_handler.tokens)}"
|
||||||
|
)
|
||||||
|
|
||||||
prefill_response = self._engage_parameter_gathering(messages)
|
prefill_response = self._engage_parameter_gathering(messages)
|
||||||
model_response = prefill_response.choices[0].message.content
|
model_response = prefill_response.choices[0].message.content
|
||||||
break
|
break
|
||||||
|
|
||||||
if has_tool_call and self.hallu_handler.hallucination == False:
|
if has_tool_call and self.hallu_handler.hallucination is False:
|
||||||
|
# [TODO] - Review: remove the following code
|
||||||
|
print("Tool call found, no hallucination detected!")
|
||||||
model_response = "".join(self.hallu_handler.tokens)
|
model_response = "".join(self.hallu_handler.tokens)
|
||||||
|
|
||||||
# start parameter gathering if the model is not generating tool calls
|
# start parameter gathering if the model is not generating tool calls
|
||||||
if has_tool_call is False:
|
if has_tool_call is False:
|
||||||
|
# [TODO] - Review: remove the following code
|
||||||
|
print("No tool call found, start parameter gathering")
|
||||||
prefill_response = self._engage_parameter_gathering(messages)
|
prefill_response = self._engage_parameter_gathering(messages)
|
||||||
model_response = prefill_response.choices[0].message.content
|
model_response = prefill_response.choices[0].message.content
|
||||||
|
|
||||||
# Extract tool calls from model response
|
# Extract tool calls from model response
|
||||||
extracted = self._extract_tool_calls(model_response)
|
extracted = self._extract_tool_calls(model_response)
|
||||||
|
# [TODO] - Review: remvoe the following code
|
||||||
|
print(f"[Extracted] - {extracted}")
|
||||||
|
|
||||||
if extracted["result"]:
|
if len(extracted["result"]) and extracted["status"]:
|
||||||
# [TODO] Review: define the behavior in the case that tool call extraction fails
|
# [TODO] Review: define the behavior in the case that tool call extraction fails
|
||||||
# if not extracted["status"]:
|
# if not extracted["status"]:
|
||||||
|
|
||||||
verified = self._verify_tool_calls(
|
verified = self._verify_tool_calls(
|
||||||
tools=req.tools, tool_calls=extracted["result"]
|
tools=req.tools, tool_calls=extracted["result"]
|
||||||
)
|
)
|
||||||
|
# [TODO] - Review: remvoe the following code
|
||||||
|
print(f"[Verified] - {verified}")
|
||||||
|
|
||||||
# [TODO] Review: In the case that tool calls are invalid, define the protocol to collect debugging output and the behavior to handle it appropriately
|
# [TODO] Review: In the case that tool calls are invalid, define the protocol to collect debugging output and the behavior to handle it appropriately
|
||||||
if verified["status"]:
|
if verified["status"]:
|
||||||
|
|
|
||||||
|
|
@ -278,6 +278,9 @@ class HallucinationStateHandler:
|
||||||
f"Hallucination: token '{self.tokens[-1]}' is uncertain."
|
f"Hallucination: token '{self.tokens[-1]}' is uncertain."
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# [TODO] - Review: remove the following code
|
||||||
|
print(f"[Hallucination] - Hallucination detected: {self.error_message}")
|
||||||
|
|
||||||
def _count_consecutive_token(self, token=MaskToken.PARAMETER_VALUE) -> int:
|
def _count_consecutive_token(self, token=MaskToken.PARAMETER_VALUE) -> int:
|
||||||
"""
|
"""
|
||||||
Counts the number of consecutive occurrences of a given token in the mask.
|
Counts the number of consecutive occurrences of a given token in the mask.
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,5 @@
|
||||||
import os
|
import os
|
||||||
|
import time
|
||||||
|
|
||||||
from src.commons.globals import handler_map
|
from src.commons.globals import handler_map
|
||||||
from src.core.model_utils import ChatMessage, GuardRequest
|
from src.core.model_utils import ChatMessage, GuardRequest
|
||||||
|
|
@ -54,22 +55,34 @@ async def models():
|
||||||
@app.post("/function_calling")
|
@app.post("/function_calling")
|
||||||
async def function_calling(req: ChatMessage, res: Response):
|
async def function_calling(req: ChatMessage, res: Response):
|
||||||
try:
|
try:
|
||||||
|
intent_start_time = time.perf_counter()
|
||||||
intent_response = await handler_map["Arch-Intent"].chat_completion(req)
|
intent_response = await handler_map["Arch-Intent"].chat_completion(req)
|
||||||
|
intent_latency = time.perf_counter() - intent_start_time
|
||||||
|
|
||||||
if handler_map["Arch-Intent"].detect_intent(intent_response):
|
if handler_map["Arch-Intent"].detect_intent(intent_response):
|
||||||
# [TODO] measure agreement between intent detection and function calling
|
# [TODO] measure agreement between intent detection and function calling
|
||||||
try:
|
try:
|
||||||
|
function_start_time = time.perf_counter()
|
||||||
function_calling_response = await handler_map[
|
function_calling_response = await handler_map[
|
||||||
"Arch-Function"
|
"Arch-Function"
|
||||||
].chat_completion(req)
|
].chat_completion(req)
|
||||||
return function_calling_response
|
function_latency = time.perf_counter() - function_start_time
|
||||||
|
return {
|
||||||
|
"response": function_calling_response,
|
||||||
|
"intent_latency": round(intent_latency * 1000, 3),
|
||||||
|
"function_latency": round(function_latency * 1000, 3),
|
||||||
|
}
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# [TODO] Review: update how to collect debugging outputs
|
# [TODO] Review: update how to collect debugging outputs
|
||||||
# logger.error(f"Error in chat_completion from `Arch-Function`: {e}")
|
# logger.error(f"Error in chat_completion from `Arch-Function`: {e}")
|
||||||
res.status_code = 500
|
res.status_code = 500
|
||||||
return {"error": f"[Arch-Function] - {e}"}
|
return {"error": f"[Arch-Function] - {e}"}
|
||||||
# [TODO] Review: define the behavior if `Arch-Intent` doesn't detect an intent
|
# [TODO] Review: define the behavior if `Arch-Intent` doesn't detect an intent
|
||||||
# else:
|
else:
|
||||||
|
return {
|
||||||
|
"result": "No intent matched",
|
||||||
|
"intent_latency": round(intent_latency * 1000, 3),
|
||||||
|
}
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# [TODO] Review: update how to collect debugging outputs
|
# [TODO] Review: update how to collect debugging outputs
|
||||||
|
|
@ -81,8 +94,13 @@ async def function_calling(req: ChatMessage, res: Response):
|
||||||
@app.post("/guardrails")
|
@app.post("/guardrails")
|
||||||
async def guardrails(req: GuardRequest, res: Response, max_num_words=300):
|
async def guardrails(req: GuardRequest, res: Response, max_num_words=300):
|
||||||
try:
|
try:
|
||||||
|
guard_start_time = time.perf_counter()
|
||||||
guard_result = handler_map["Arch-Guard"].predict(req)
|
guard_result = handler_map["Arch-Guard"].predict(req)
|
||||||
return guard_result
|
guard_latency = time.perf_counter() - guard_start_time
|
||||||
|
return {
|
||||||
|
"response": guard_result,
|
||||||
|
"guard_latency": round(guard_latency * 1000, 3),
|
||||||
|
}
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# [TODO] Review: update how to collect debugging outputs
|
# [TODO] Review: update how to collect debugging outputs
|
||||||
res.status_code = 500
|
res.status_code = 500
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue