mirror of
https://github.com/katanemo/plano.git
synced 2026-06-17 15:25:17 +02:00
send latency numbers from model_server as metadata
This commit is contained in:
parent
14625e2a1d
commit
60dfb18018
4 changed files with 9 additions and 6 deletions
|
|
@ -30,6 +30,7 @@ class ChatCompletionResponse(BaseModel):
|
|||
created: Optional[str] = ""
|
||||
choices: List[Choice]
|
||||
model: str
|
||||
metadata: Optional[Dict[str, str]] = {}
|
||||
|
||||
|
||||
class GuardRequest(BaseModel):
|
||||
|
|
|
|||
|
|
@ -67,11 +67,12 @@ async def function_calling(req: ChatMessage, res: Response):
|
|||
"Arch-Function"
|
||||
].chat_completion(req)
|
||||
function_latency = time.perf_counter() - function_start_time
|
||||
return {
|
||||
"response": function_calling_response,
|
||||
"intent_latency": round(intent_latency * 1000, 3),
|
||||
"function_latency": round(function_latency * 1000, 3),
|
||||
function_calling_response.metadata = {
|
||||
"intent_latency": str(round(intent_latency * 1000, 3)),
|
||||
"function_latency": str(round(function_latency * 1000, 3)),
|
||||
}
|
||||
|
||||
return function_calling_response
|
||||
except Exception as e:
|
||||
# [TODO] Review: update how to collect debugging outputs
|
||||
# logger.error(f"Error in chat_completion from `Arch-Function`: {e}")
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue