mirror of
https://github.com/katanemo/plano.git
synced 2026-06-17 15:25:17 +02:00
add e2e test
This commit is contained in:
parent
0d9cbdebda
commit
e74a3e1e38
5 changed files with 61 additions and 4 deletions
|
|
@ -133,6 +133,7 @@ async def chat_completion(
|
|||
if hasattr(token.choices[0].delta, "content"):
|
||||
full_response += token.choices[0].delta.content
|
||||
else:
|
||||
logger.info("Stream is disabled, not engaging pre-filling")
|
||||
full_response = resp.choices[0].message.content
|
||||
|
||||
tool_calls = const.arch_function_hanlder.extract_tool_calls(full_response)
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ import app.prompt_guard.model_utils as guard_utils
|
|||
|
||||
from typing import List, Dict
|
||||
from pydantic import BaseModel
|
||||
from fastapi import FastAPI, Response, HTTPException
|
||||
from fastapi import FastAPI, Response, HTTPException, Request
|
||||
from app.function_calling.model_utils import ChatMessage
|
||||
|
||||
from app.commons.constants import embedding_model, zero_shot_model, arch_guard_handler
|
||||
|
|
@ -214,9 +214,12 @@ async def hallucination(req: HallucinationRequest, res: Response):
|
|||
|
||||
|
||||
@app.post("/v1/chat/completions")
|
||||
async def chat_completion(req: ChatMessage, res: Response):
|
||||
async def chat_completion(req: ChatMessage, res: Response, request: Request):
|
||||
try:
|
||||
result = await arch_function_chat_completion(req, res)
|
||||
prefill_enabled = (
|
||||
request.query_params.get("prefill_enabled", "true").lower() == "true"
|
||||
)
|
||||
result = await arch_function_chat_completion(req, res, prefill_enabled)
|
||||
return result
|
||||
except Exception as e:
|
||||
logger.error(f"Error in chat_completion: {e}")
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue