mirror of
https://github.com/katanemo/plano.git
synced 2026-05-02 04:12:56 +02:00
Refactor model server hardware config + add unit tests to load/request to the server (#189)
* remove mode/hardware * add test and pre commit hook * add pytest dependieces * fix format * fix lint * fix precommit * fix pre commit * fix pre commit * fix precommit * fix precommit * fix precommit * fix precommit * fix precommit * fix precommit * fix precommit * fix precommit * fix precommit * fix precommit
This commit is contained in:
parent
3bd2ffe9fb
commit
8e54ac20d8
13 changed files with 480 additions and 43 deletions
|
|
@ -13,6 +13,7 @@ from app.commons.constants import embedding_model, zero_shot_model, arch_guard_h
|
|||
from app.function_calling.model_utils import (
|
||||
chat_completion as arch_function_chat_completion,
|
||||
)
|
||||
from unittest.mock import patch
|
||||
|
||||
logger = utils.get_model_server_logger()
|
||||
|
||||
|
|
@ -173,6 +174,7 @@ async def zeroshot(req: ZeroShotRequest, res: Response):
|
|||
|
||||
|
||||
@app.post("/hallucination")
|
||||
@patch("app.loader.glb.DEVICE", "cpu") # Mock the device to 'cpu'
|
||||
async def hallucination(req: HallucinationRequest, res: Response):
|
||||
"""
|
||||
Take input as text and return the prediction of hallucination for each parameter
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue