fix precommit

This commit is contained in:
cotran 2024-10-16 16:49:44 -07:00
parent 953b1b056b
commit b5d2f19d9b
4 changed files with 119 additions and 7 deletions

View file

@ -13,6 +13,7 @@ from app.commons.constants import embedding_model, zero_shot_model, arch_guard_h
from app.function_calling.model_utils import (
chat_completion as arch_function_chat_completion,
)
from unittest.mock import patch
logger = utils.get_model_server_logger()
@ -173,6 +174,7 @@ async def zeroshot(req: ZeroShotRequest, res: Response):
@app.post("/hallucination")
@patch("app.loader.glb.DEVICE", "cpu") # Mock the device to 'cpu'
async def hallucination(req: HallucinationRequest, res: Response):
"""
Take input as text and return the prediction of hallucination for each parameter

View file

@ -2,12 +2,20 @@ import pytest
import httpx
from fastapi.testclient import TestClient
from app.main import app # Assuming your FastAPI app is in main.py
from unittest.mock import patch
import app.commons.globals as glb
import logging
logger = logging.getLogger(__name__)
client = TestClient(app)
logger.info(f"Model will be loaded on device: {glb.DEVICE}")
# Unit tests for the health check endpoint
@pytest.mark.asyncio
@patch("app.loader.glb.DEVICE", glb.DEVICE) # Mock the device to 'cpu'
async def test_healthz():
response = client.get("/healthz")
assert response.status_code == 200
@ -16,6 +24,7 @@ async def test_healthz():
# Unit test for the models endpoint
@pytest.mark.asyncio
@patch("app.loader.glb.DEVICE", glb.DEVICE) # Mock the device to 'cpu'
async def test_models():
response = client.get("/models")
assert response.status_code == 200
@ -25,6 +34,7 @@ async def test_models():
# Unit test for embeddings endpoint
@pytest.mark.asyncio
@patch("app.loader.glb.DEVICE", glb.DEVICE) # Mock the device to 'cpu'
async def test_embedding():
request_data = {"input": "Test embedding", "model": "katanemo/bge-large-en-v1.5"}
response = client.post("/embeddings", json=request_data)
@ -38,6 +48,7 @@ async def test_embedding():
# Unit test for the guard endpoint
@pytest.mark.asyncio
@patch("app.loader.glb.DEVICE", glb.DEVICE) # Mock the device to 'cpu'
async def test_guard():
request_data = {"input": "Test for jailbreak and toxicity", "task": "jailbreak"}
response = client.post("/guard", json=request_data)
@ -47,6 +58,7 @@ async def test_guard():
# Unit test for the zero-shot endpoint
@pytest.mark.asyncio
@patch("app.loader.glb.DEVICE", glb.DEVICE) # Mock the device to 'cpu'
async def test_zeroshot():
request_data = {
"input": "Test input",
@ -63,6 +75,7 @@ async def test_zeroshot():
# Unit test for the hallucination endpoint
@pytest.mark.asyncio
@patch("app.loader.glb.DEVICE", glb.DEVICE) # Mock the device to 'cpu'
async def test_hallucination():
request_data = {
"prompt": "Test hallucination",
@ -79,6 +92,7 @@ async def test_hallucination():
# Unit test for the chat completion endpoint
@pytest.mark.asyncio
@patch("app.loader.glb.DEVICE", glb.DEVICE) # Mock the device to 'cpu'
async def test_chat_completion():
async with httpx.AsyncClient(app=app, base_url="http://test") as client:
request_data = {

View file

@ -5,13 +5,7 @@ import app.commons.globals as glb
from app.loader import get_embedding_model, get_zero_shot_model, get_prompt_guard
# Mock constants
if torch.cuda.is_available():
DEVICE = "cuda"
elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
DEVICE = "mps"
else:
DEVICE = "cpu"
glb.DEVICE = DEVICE # Adjust as needed for your test case
glb.DEVICE = "cuda" # Adjust as needed for your test case
arch_guard_model_type = {
"cpu": "katanemo/Arch-Guard-cpu",
"cuda": "katanemo/Arch-Guard",

View file

@ -0,0 +1,102 @@
import os
import pytest
from unittest.mock import patch, MagicMock
import app.commons.globals as glb
from app.loader import get_embedding_model, get_zero_shot_model, get_prompt_guard
# Mock constants
glb.DEVICE = "mps" # Adjust as needed for your test case
arch_guard_model_type = {
"cpu": "katanemo/Arch-Guard-cpu",
"cuda": "katanemo/Arch-Guard",
"mps": "katanemo/Arch-Guard",
}
@pytest.fixture
def mock_env():
# Mock environment variables
os.environ["MODELS"] = "katanemo/bge-large-en-v1.5"
os.environ["ZERO_SHOT_MODELS"] = "katanemo/bart-large-mnli"
# Test for get_embedding_model function
@patch("app.loader.ORTModelForFeatureExtraction.from_pretrained")
@patch("app.loader.AutoModel.from_pretrained")
@patch("app.loader.AutoTokenizer.from_pretrained")
def test_get_embedding_model(mock_tokenizer, mock_automodel, mock_ort_model, mock_env):
mock_automodel.return_value = MagicMock()
mock_ort_model.return_value = MagicMock()
mock_tokenizer.return_value = MagicMock()
embedding_model = get_embedding_model()
# Assertions
assert embedding_model["model_name"] == "katanemo/bge-large-en-v1.5"
assert mock_tokenizer.called_once_with(
"katanemo/bge-large-en-v1.5", trust_remote_code=True
)
if glb.DEVICE != "cuda":
assert mock_ort_model.called_once_with(
"katanemo/bge-large-en-v1.5", file_name="onnx/model.onnx"
)
else:
assert mock_automodel.called_once_with(
"katanemo/bge-large-en-v1.5", device_map=glb.DEVICE
)
# Test for get_zero_shot_model function
@patch("app.loader.ORTModelForSequenceClassification.from_pretrained")
@patch("app.loader.pipeline")
@patch("app.loader.AutoTokenizer.from_pretrained")
def test_get_zero_shot_model(mock_tokenizer, mock_pipeline, mock_ort_model, mock_env):
mock_pipeline.return_value = MagicMock()
mock_ort_model.return_value = MagicMock()
mock_tokenizer.return_value = MagicMock()
zero_shot_model = get_zero_shot_model()
# Assertions
assert zero_shot_model["model_name"] == "katanemo/bart-large-mnli"
assert mock_tokenizer.called_once_with("katanemo/bart-large-mnli")
if glb.DEVICE != "cuda":
assert mock_ort_model.called_once_with(
"katanemo/bart-large-mnli", file_name="onnx/model.onnx"
)
else:
assert mock_pipeline.called_once()
# Test for get_prompt_guard function
@patch("app.loader.AutoTokenizer.from_pretrained")
@patch("app.loader.OVModelForSequenceClassification.from_pretrained")
@patch("app.loader.AutoModelForSequenceClassification.from_pretrained")
def test_get_prompt_guard(mock_ov_model, mock_auto_model, mock_tokenizer):
# Mock model based on device
if glb.DEVICE == "cpu":
mock_ov_model.return_value = MagicMock()
else:
mock_auto_model.return_value = MagicMock()
mock_tokenizer.return_value = MagicMock()
prompt_guard = get_prompt_guard(arch_guard_model_type[glb.DEVICE])
# Assertions
assert prompt_guard["model_name"] == arch_guard_model_type[glb.DEVICE]
assert mock_tokenizer.called_once_with(
arch_guard_model_type[glb.DEVICE], trust_remote_code=True
)
if glb.DEVICE == "cpu":
assert mock_ov_model.called_once_with(
arch_guard_model_type[glb.DEVICE],
device_map=glb.DEVICE,
low_cpu_mem_usage=True,
)
else:
assert mock_auto_model.called_once_with(
arch_guard_model_type[glb.DEVICE],
device_map=glb.DEVICE,
low_cpu_mem_usage=True,
)