add prefill and test (#236)

* add prefill and test

* fix stream

* fix

* feedback

* address comments

* update

* add e2e test

* fix e2e test

* update fix

* fix

* address cmt

* address cmt
This commit is contained in:
CTran 2024-11-07 11:59:29 -08:00 committed by GitHub
parent f48489f7c0
commit fb67788be0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 216 additions and 19 deletions

View file

@ -6,7 +6,7 @@ import app.prompt_guard.model_utils as guard_utils
from typing import List, Dict
from pydantic import BaseModel
from fastapi import FastAPI, Response, HTTPException
from fastapi import FastAPI, Response, HTTPException, Request
from app.function_calling.model_utils import ChatMessage
from app.commons.constants import embedding_model, zero_shot_model, arch_guard_handler
@ -214,6 +214,11 @@ async def hallucination(req: HallucinationRequest, res: Response):
@app.post("/v1/chat/completions")
async def chat_completion(req: ChatMessage, res: Response):
result = await arch_function_chat_completion(req, res)
return result
async def chat_completion(req: ChatMessage, res: Response, request: Request):
try:
result = await arch_function_chat_completion(req, res)
return result
except Exception as e:
logger.error(f"Error in chat_completion: {e}")
res.status_code = 500
return {"error": "Internal server error"}