mirror of
https://github.com/katanemo/plano.git
synced 2026-06-14 15:15:15 +02:00
add test and pre commit hook
This commit is contained in:
parent
cf612409f0
commit
ee36e403d1
9 changed files with 351 additions and 7 deletions
|
|
@ -25,6 +25,11 @@ repos:
|
|||
# --lib is to only test the library, since when integration tests are made,
|
||||
# they will be in a seperate tests directory
|
||||
entry: bash -c "cd arch && cargo test -p intelligent-prompt-gateway --lib"
|
||||
- id: python-tests
|
||||
name: Run Python Tests with pytest
|
||||
language: system
|
||||
entry: bash -c "cd model_server && pytest --maxfail=5 --disable-warnings"
|
||||
types: [python]
|
||||
- repo: https://github.com/psf/black
|
||||
rev: 23.1.0
|
||||
hooks:
|
||||
|
|
|
|||
|
|
@ -28,6 +28,6 @@ arch_guard_model_type = {
|
|||
embedding_model = loader.get_embedding_model()
|
||||
zero_shot_model = loader.get_zero_shot_model()
|
||||
|
||||
prompt_guard_dict = loader.get_prompt_guard(arch_guard_model_type[glb.device])
|
||||
prompt_guard_dict = loader.get_prompt_guard(arch_guard_model_type[glb.DEVICE])
|
||||
|
||||
arch_guard_handler = ArchGuardHanlder(model_dict=prompt_guard_dict)
|
||||
|
|
|
|||
|
|
@ -2,4 +2,3 @@ import app.commons.utilities as utils
|
|||
|
||||
|
||||
DEVICE = utils.get_device()
|
||||
MODE = utils.get_serving_mode()
|
||||
|
|
|
|||
|
|
@ -7,6 +7,10 @@ from optimum.onnxruntime import (
|
|||
ORTModelForSequenceClassification,
|
||||
)
|
||||
import app.commons.utilities as utils
|
||||
import torch
|
||||
from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
||||
from optimum.intel import OVModelForSequenceClassification
|
||||
|
||||
|
||||
logger = utils.get_model_server_logger()
|
||||
|
||||
|
|
@ -64,12 +68,9 @@ def get_prompt_guard(model_name):
|
|||
logger.info("Loading Guard Model...")
|
||||
|
||||
if glb.DEVICE == "cpu":
|
||||
from optimum.intel import OVModelForSequenceClassification
|
||||
|
||||
model_class = OVModelForSequenceClassification
|
||||
elif glb.DEVICE == "gpu":
|
||||
import torch
|
||||
from transformers import AutoModelForSequenceClassification
|
||||
else:
|
||||
|
||||
model_class = AutoModelForSequenceClassification
|
||||
|
||||
|
|
|
|||
|
|
@ -11,7 +11,6 @@ class ArchGuardHanlder:
|
|||
self.model = model_dict["model"]
|
||||
self.tokenizer = model_dict["tokenizer"]
|
||||
self.device = model_dict["device"]
|
||||
self.hardware_config = model_dict["hardware_config"]
|
||||
|
||||
self.threshold = threshold
|
||||
|
||||
|
|
|
|||
91
model_server/app/tests/test_app.py
Normal file
91
model_server/app/tests/test_app.py
Normal file
|
|
@ -0,0 +1,91 @@
|
|||
import pytest
|
||||
import httpx
|
||||
from fastapi.testclient import TestClient
|
||||
from app.main import app # Assuming your FastAPI app is in main.py
|
||||
|
||||
client = TestClient(app)
|
||||
|
||||
# Unit tests for the health check endpoint
|
||||
@pytest.mark.asyncio
|
||||
async def test_healthz():
|
||||
response = client.get("/healthz")
|
||||
assert response.status_code == 200
|
||||
assert response.json() == {"status": "ok"}
|
||||
|
||||
# Unit test for the models endpoint
|
||||
@pytest.mark.asyncio
|
||||
async def test_models():
|
||||
response = client.get("/models")
|
||||
assert response.status_code == 200
|
||||
assert response.json()["object"] == "list"
|
||||
assert len(response.json()["data"]) > 0
|
||||
|
||||
# Unit test for embeddings endpoint
|
||||
@pytest.mark.asyncio
|
||||
async def test_embedding():
|
||||
request_data = {
|
||||
"input": "Test embedding",
|
||||
"model": "katanemo/bge-large-en-v1.5"
|
||||
}
|
||||
response = client.post("/embeddings", json=request_data)
|
||||
if request_data["model"] == "katanemo/bge-large-en-v1.5":
|
||||
assert response.status_code == 200
|
||||
assert response.json()["object"] == "list"
|
||||
assert "data" in response.json()
|
||||
else:
|
||||
assert response.status_code == 400
|
||||
|
||||
# Unit test for the guard endpoint
|
||||
@pytest.mark.asyncio
|
||||
async def test_guard():
|
||||
request_data = {
|
||||
"input": "Test for jailbreak and toxicity",
|
||||
"task": "jailbreak"
|
||||
}
|
||||
response = client.post("/guard", json=request_data)
|
||||
assert response.status_code == 200
|
||||
assert "jailbreak_verdict" in response.json()
|
||||
|
||||
# Unit test for the zero-shot endpoint
|
||||
@pytest.mark.asyncio
|
||||
async def test_zeroshot():
|
||||
request_data = {
|
||||
"input": "Test input",
|
||||
"labels": ["label1", "label2"],
|
||||
"model": "katanemo/bart-large-mnli"
|
||||
}
|
||||
response = client.post("/zeroshot", json=request_data)
|
||||
if request_data["model"] == "katanemo/bart-large-mnli":
|
||||
assert response.status_code == 200
|
||||
assert "predicted_class" in response.json()
|
||||
else:
|
||||
assert response.status_code == 400
|
||||
|
||||
# Unit test for the hallucination endpoint
|
||||
@pytest.mark.asyncio
|
||||
async def test_hallucination():
|
||||
request_data = {
|
||||
"prompt": "Test hallucination",
|
||||
"parameters": {"param1": "value1"},
|
||||
"model": "katanemo/bart-large-mnli"
|
||||
}
|
||||
response = client.post("/hallucination", json=request_data)
|
||||
if request_data["model"] == "katanemo/bart-large-mnli":
|
||||
assert response.status_code == 200
|
||||
assert "params_scores" in response.json()
|
||||
else:
|
||||
assert response.status_code == 400
|
||||
|
||||
# Unit test for the chat completion endpoint
|
||||
@pytest.mark.asyncio
|
||||
async def test_chat_completion():
|
||||
async with httpx.AsyncClient(app=app, base_url="http://test") as client:
|
||||
request_data = {
|
||||
"messages": [{"role": "user", "content": "Hello!"}],
|
||||
"model": "Arch-Function-1.5B",
|
||||
"tools": [], # Assuming tools is part of the req as per the function
|
||||
"metadata": {"x-arch-state": "[]"} # Assuming metadata is needed
|
||||
}
|
||||
response = await client.post("/v1/chat/completions", json=request_data)
|
||||
assert response.status_code == 200
|
||||
assert "choices" in response.json()
|
||||
83
model_server/app/tests/test_loaders_cpu.py
Normal file
83
model_server/app/tests/test_loaders_cpu.py
Normal file
|
|
@ -0,0 +1,83 @@
|
|||
import os
|
||||
import pytest
|
||||
from unittest.mock import patch, MagicMock
|
||||
import app.commons.globals as glb
|
||||
from app.loader import get_embedding_model, get_zero_shot_model, get_prompt_guard
|
||||
|
||||
# Mock constants
|
||||
glb.DEVICE = "cpu" # Adjust as needed for your test case
|
||||
arch_guard_model_type = {
|
||||
"cpu": "katanemo/Arch-Guard-cpu",
|
||||
"cuda": "katanemo/Arch-Guard",
|
||||
"mps": "katanemo/Arch-Guard",
|
||||
}
|
||||
@pytest.fixture
|
||||
def mock_env():
|
||||
# Mock environment variables
|
||||
os.environ["MODELS"] = "katanemo/bge-large-en-v1.5"
|
||||
os.environ["ZERO_SHOT_MODELS"] = "katanemo/bart-large-mnli"
|
||||
|
||||
# Test for get_embedding_model function
|
||||
@patch("app.loader.ORTModelForFeatureExtraction.from_pretrained")
|
||||
@patch("app.loader.AutoModel.from_pretrained")
|
||||
@patch("app.loader.AutoTokenizer.from_pretrained")
|
||||
def test_get_embedding_model(mock_tokenizer, mock_automodel, mock_ort_model, mock_env):
|
||||
mock_automodel.return_value = MagicMock()
|
||||
mock_ort_model.return_value = MagicMock()
|
||||
mock_tokenizer.return_value = MagicMock()
|
||||
|
||||
embedding_model = get_embedding_model()
|
||||
|
||||
# Assertions
|
||||
assert embedding_model["model_name"] == "katanemo/bge-large-en-v1.5"
|
||||
assert mock_tokenizer.called_once_with("katanemo/bge-large-en-v1.5", trust_remote_code=True)
|
||||
if glb.DEVICE != "cuda":
|
||||
assert mock_ort_model.called_once_with("katanemo/bge-large-en-v1.5", file_name="onnx/model.onnx")
|
||||
else:
|
||||
assert mock_automodel.called_once_with("katanemo/bge-large-en-v1.5", device_map=glb.DEVICE)
|
||||
|
||||
# Test for get_zero_shot_model function
|
||||
@patch("app.loader.ORTModelForSequenceClassification.from_pretrained")
|
||||
@patch("app.loader.pipeline")
|
||||
@patch("app.loader.AutoTokenizer.from_pretrained")
|
||||
def test_get_zero_shot_model(mock_tokenizer, mock_pipeline, mock_ort_model, mock_env):
|
||||
mock_pipeline.return_value = MagicMock()
|
||||
mock_ort_model.return_value = MagicMock()
|
||||
mock_tokenizer.return_value = MagicMock()
|
||||
|
||||
zero_shot_model = get_zero_shot_model()
|
||||
|
||||
# Assertions
|
||||
assert zero_shot_model["model_name"] == "katanemo/bart-large-mnli"
|
||||
assert mock_tokenizer.called_once_with("katanemo/bart-large-mnli")
|
||||
if glb.DEVICE != "cuda":
|
||||
assert mock_ort_model.called_once_with("katanemo/bart-large-mnli", file_name="onnx/model.onnx")
|
||||
else:
|
||||
assert mock_pipeline.called_once()
|
||||
|
||||
# Test for get_prompt_guard function
|
||||
@patch("app.loader.AutoTokenizer.from_pretrained")
|
||||
@patch("app.loader.OVModelForSequenceClassification.from_pretrained")
|
||||
@patch("app.loader.AutoModelForSequenceClassification.from_pretrained")
|
||||
def test_get_prompt_guard(mock_ov_model, mock_auto_model, mock_tokenizer):
|
||||
# Mock model based on device
|
||||
if glb.DEVICE == "cpu":
|
||||
mock_ov_model.return_value = MagicMock()
|
||||
else:
|
||||
mock_auto_model.return_value = MagicMock()
|
||||
|
||||
mock_tokenizer.return_value = MagicMock()
|
||||
|
||||
prompt_guard = get_prompt_guard(arch_guard_model_type[glb.DEVICE])
|
||||
|
||||
# Assertions
|
||||
assert prompt_guard["model_name"] == arch_guard_model_type[glb.DEVICE]
|
||||
assert mock_tokenizer.called_once_with(arch_guard_model_type[glb.DEVICE], trust_remote_code=True)
|
||||
if glb.DEVICE == "cpu":
|
||||
assert mock_ov_model.called_once_with(
|
||||
arch_guard_model_type[glb.DEVICE], device_map=glb.DEVICE, low_cpu_mem_usage=True
|
||||
)
|
||||
else:
|
||||
assert mock_auto_model.called_once_with(
|
||||
arch_guard_model_type[glb.DEVICE], device_map=glb.DEVICE, low_cpu_mem_usage=True
|
||||
)
|
||||
83
model_server/app/tests/test_loaders_gpu.py
Normal file
83
model_server/app/tests/test_loaders_gpu.py
Normal file
|
|
@ -0,0 +1,83 @@
|
|||
import os
|
||||
import pytest
|
||||
from unittest.mock import patch, MagicMock
|
||||
import app.commons.globals as glb
|
||||
from app.loader import get_embedding_model, get_zero_shot_model, get_prompt_guard
|
||||
|
||||
# Mock constants
|
||||
glb.DEVICE = "cuda" # Adjust as needed for your test case
|
||||
arch_guard_model_type = {
|
||||
"cpu": "katanemo/Arch-Guard-cpu",
|
||||
"cuda": "katanemo/Arch-Guard",
|
||||
"mps": "katanemo/Arch-Guard",
|
||||
}
|
||||
@pytest.fixture
|
||||
def mock_env():
|
||||
# Mock environment variables
|
||||
os.environ["MODELS"] = "katanemo/bge-large-en-v1.5"
|
||||
os.environ["ZERO_SHOT_MODELS"] = "katanemo/bart-large-mnli"
|
||||
|
||||
# Test for get_embedding_model function
|
||||
@patch("app.loader.ORTModelForFeatureExtraction.from_pretrained")
|
||||
@patch("app.loader.AutoModel.from_pretrained")
|
||||
@patch("app.loader.AutoTokenizer.from_pretrained")
|
||||
def test_get_embedding_model(mock_tokenizer, mock_automodel, mock_ort_model, mock_env):
|
||||
mock_automodel.return_value = MagicMock()
|
||||
mock_ort_model.return_value = MagicMock()
|
||||
mock_tokenizer.return_value = MagicMock()
|
||||
|
||||
embedding_model = get_embedding_model()
|
||||
|
||||
# Assertions
|
||||
assert embedding_model["model_name"] == "katanemo/bge-large-en-v1.5"
|
||||
assert mock_tokenizer.called_once_with("katanemo/bge-large-en-v1.5", trust_remote_code=True)
|
||||
if glb.DEVICE != "cuda":
|
||||
assert mock_ort_model.called_once_with("katanemo/bge-large-en-v1.5", file_name="onnx/model.onnx")
|
||||
else:
|
||||
assert mock_automodel.called_once_with("katanemo/bge-large-en-v1.5", device_map=glb.DEVICE)
|
||||
|
||||
# Test for get_zero_shot_model function
|
||||
@patch("app.loader.ORTModelForSequenceClassification.from_pretrained")
|
||||
@patch("app.loader.pipeline")
|
||||
@patch("app.loader.AutoTokenizer.from_pretrained")
|
||||
def test_get_zero_shot_model(mock_tokenizer, mock_pipeline, mock_ort_model, mock_env):
|
||||
mock_pipeline.return_value = MagicMock()
|
||||
mock_ort_model.return_value = MagicMock()
|
||||
mock_tokenizer.return_value = MagicMock()
|
||||
|
||||
zero_shot_model = get_zero_shot_model()
|
||||
|
||||
# Assertions
|
||||
assert zero_shot_model["model_name"] == "katanemo/bart-large-mnli"
|
||||
assert mock_tokenizer.called_once_with("katanemo/bart-large-mnli")
|
||||
if glb.DEVICE != "cuda":
|
||||
assert mock_ort_model.called_once_with("katanemo/bart-large-mnli", file_name="onnx/model.onnx")
|
||||
else:
|
||||
assert mock_pipeline.called_once()
|
||||
|
||||
# Test for get_prompt_guard function
|
||||
@patch("app.loader.AutoTokenizer.from_pretrained")
|
||||
@patch("app.loader.OVModelForSequenceClassification.from_pretrained")
|
||||
@patch("app.loader.AutoModelForSequenceClassification.from_pretrained")
|
||||
def test_get_prompt_guard(mock_ov_model, mock_auto_model, mock_tokenizer):
|
||||
# Mock model based on device
|
||||
if glb.DEVICE == "cpu":
|
||||
mock_ov_model.return_value = MagicMock()
|
||||
else:
|
||||
mock_auto_model.return_value = MagicMock()
|
||||
|
||||
mock_tokenizer.return_value = MagicMock()
|
||||
|
||||
prompt_guard = get_prompt_guard(arch_guard_model_type[glb.DEVICE])
|
||||
|
||||
# Assertions
|
||||
assert prompt_guard["model_name"] == arch_guard_model_type[glb.DEVICE]
|
||||
assert mock_tokenizer.called_once_with(arch_guard_model_type[glb.DEVICE], trust_remote_code=True)
|
||||
if glb.DEVICE == "cpu":
|
||||
assert mock_ov_model.called_once_with(
|
||||
arch_guard_model_type[glb.DEVICE], device_map=glb.DEVICE, low_cpu_mem_usage=True
|
||||
)
|
||||
else:
|
||||
assert mock_auto_model.called_once_with(
|
||||
arch_guard_model_type[glb.DEVICE], device_map=glb.DEVICE, low_cpu_mem_usage=True
|
||||
)
|
||||
83
model_server/app/tests/test_loaders_mps.py
Normal file
83
model_server/app/tests/test_loaders_mps.py
Normal file
|
|
@ -0,0 +1,83 @@
|
|||
import os
|
||||
import pytest
|
||||
from unittest.mock import patch, MagicMock
|
||||
import app.commons.globals as glb
|
||||
from app.loader import get_embedding_model, get_zero_shot_model, get_prompt_guard
|
||||
|
||||
# Mock constants
|
||||
glb.DEVICE = "mps" # Adjust as needed for your test case
|
||||
arch_guard_model_type = {
|
||||
"cpu": "katanemo/Arch-Guard-cpu",
|
||||
"cuda": "katanemo/Arch-Guard",
|
||||
"mps": "katanemo/Arch-Guard",
|
||||
}
|
||||
@pytest.fixture
|
||||
def mock_env():
|
||||
# Mock environment variables
|
||||
os.environ["MODELS"] = "katanemo/bge-large-en-v1.5"
|
||||
os.environ["ZERO_SHOT_MODELS"] = "katanemo/bart-large-mnli"
|
||||
|
||||
# Test for get_embedding_model function
|
||||
@patch("app.loader.ORTModelForFeatureExtraction.from_pretrained")
|
||||
@patch("app.loader.AutoModel.from_pretrained")
|
||||
@patch("app.loader.AutoTokenizer.from_pretrained")
|
||||
def test_get_embedding_model(mock_tokenizer, mock_automodel, mock_ort_model, mock_env):
|
||||
mock_automodel.return_value = MagicMock()
|
||||
mock_ort_model.return_value = MagicMock()
|
||||
mock_tokenizer.return_value = MagicMock()
|
||||
|
||||
embedding_model = get_embedding_model()
|
||||
|
||||
# Assertions
|
||||
assert embedding_model["model_name"] == "katanemo/bge-large-en-v1.5"
|
||||
assert mock_tokenizer.called_once_with("katanemo/bge-large-en-v1.5", trust_remote_code=True)
|
||||
if glb.DEVICE != "cuda":
|
||||
assert mock_ort_model.called_once_with("katanemo/bge-large-en-v1.5", file_name="onnx/model.onnx")
|
||||
else:
|
||||
assert mock_automodel.called_once_with("katanemo/bge-large-en-v1.5", device_map=glb.DEVICE)
|
||||
|
||||
# Test for get_zero_shot_model function
|
||||
@patch("app.loader.ORTModelForSequenceClassification.from_pretrained")
|
||||
@patch("app.loader.pipeline")
|
||||
@patch("app.loader.AutoTokenizer.from_pretrained")
|
||||
def test_get_zero_shot_model(mock_tokenizer, mock_pipeline, mock_ort_model, mock_env):
|
||||
mock_pipeline.return_value = MagicMock()
|
||||
mock_ort_model.return_value = MagicMock()
|
||||
mock_tokenizer.return_value = MagicMock()
|
||||
|
||||
zero_shot_model = get_zero_shot_model()
|
||||
|
||||
# Assertions
|
||||
assert zero_shot_model["model_name"] == "katanemo/bart-large-mnli"
|
||||
assert mock_tokenizer.called_once_with("katanemo/bart-large-mnli")
|
||||
if glb.DEVICE != "cuda":
|
||||
assert mock_ort_model.called_once_with("katanemo/bart-large-mnli", file_name="onnx/model.onnx")
|
||||
else:
|
||||
assert mock_pipeline.called_once()
|
||||
|
||||
# Test for get_prompt_guard function
|
||||
@patch("app.loader.AutoTokenizer.from_pretrained")
|
||||
@patch("app.loader.OVModelForSequenceClassification.from_pretrained")
|
||||
@patch("app.loader.AutoModelForSequenceClassification.from_pretrained")
|
||||
def test_get_prompt_guard(mock_ov_model, mock_auto_model, mock_tokenizer):
|
||||
# Mock model based on device
|
||||
if glb.DEVICE == "cpu":
|
||||
mock_ov_model.return_value = MagicMock()
|
||||
else:
|
||||
mock_auto_model.return_value = MagicMock()
|
||||
|
||||
mock_tokenizer.return_value = MagicMock()
|
||||
|
||||
prompt_guard = get_prompt_guard(arch_guard_model_type[glb.DEVICE])
|
||||
|
||||
# Assertions
|
||||
assert prompt_guard["model_name"] == arch_guard_model_type[glb.DEVICE]
|
||||
assert mock_tokenizer.called_once_with(arch_guard_model_type[glb.DEVICE], trust_remote_code=True)
|
||||
if glb.DEVICE == "cpu":
|
||||
assert mock_ov_model.called_once_with(
|
||||
arch_guard_model_type[glb.DEVICE], device_map=glb.DEVICE, low_cpu_mem_usage=True
|
||||
)
|
||||
else:
|
||||
assert mock_auto_model.called_once_with(
|
||||
arch_guard_model_type[glb.DEVICE], device_map=glb.DEVICE, low_cpu_mem_usage=True
|
||||
)
|
||||
Loading…
Add table
Add a link
Reference in a new issue