mirror of
https://github.com/katanemo/plano.git
synced 2026-04-26 17:26:26 +02:00
Some fixes on model server (#362)
* Some fixes on model server * Remove prompt_prefilling message * Fix logging * Fix poetry issues * Improve logging and update the support for text truncation * Fix tests * Fix tests * Fix tests * Fix modelserver tests * Update modelserver tests
This commit is contained in:
parent
ebda682b30
commit
88a02dc478
25 changed files with 1090 additions and 1666 deletions
|
|
@ -1,12 +1,8 @@
|
|||
import os
|
||||
import pytest
|
||||
|
||||
from src.commons.globals import handler_map
|
||||
from src.core.model_utils import ChatMessage, Message
|
||||
import pytest
|
||||
from fastapi.testclient import TestClient
|
||||
from unittest.mock import AsyncMock, patch
|
||||
from src.main import app
|
||||
from src.commons.globals import handler_map
|
||||
from src.core.utils.model_utils import ChatMessage, Message
|
||||
|
||||
|
||||
# define function
|
||||
get_weather_api = {
|
||||
|
|
@ -163,7 +159,10 @@ async def test_function_calling(get_data_func):
|
|||
function_calling_response = await handler_map["Arch-Function"].chat_completion(
|
||||
req
|
||||
)
|
||||
assert handler_map["Arch-Function"].hallu_handler.hallucination == hallucination
|
||||
assert (
|
||||
handler_map["Arch-Function"].hallucination_state.hallucination
|
||||
== hallucination
|
||||
)
|
||||
response_txt = function_calling_response.choices[0].message.content
|
||||
|
||||
if parameter_gathering:
|
||||
|
|
|
|||
|
|
@ -1,33 +1,6 @@
|
|||
from unittest.mock import patch, MagicMock
|
||||
from src.core.guardrails import get_guardrail_handler
|
||||
|
||||
# Mock constants
|
||||
arch_guard_model_type = {
|
||||
"cpu": "katanemo/Arch-Guard-cpu",
|
||||
"cuda": "katanemo/Arch-Guard",
|
||||
"mps": "katanemo/Arch-Guard",
|
||||
}
|
||||
|
||||
|
||||
# [TODO] Review: check the following code to test under `cpu`, `cuda`, and `mps`
|
||||
# Test for `get_guardrail_handler()` function on `cpu`
|
||||
@patch("src.core.guardrails.AutoTokenizer.from_pretrained")
|
||||
@patch("src.core.guardrails.AutoModelForSequenceClassification.from_pretrained")
|
||||
def test_guardrail_handler_on_cpu(mock_auto_model, mock_tokenizer):
|
||||
device = "cpu"
|
||||
|
||||
mock_tokenizer.return_value = MagicMock()
|
||||
|
||||
guardrail = get_guardrail_handler(device=device)
|
||||
|
||||
mock_tokenizer.assert_called_once_with(guardrail.model_name, trust_remote_code=True)
|
||||
|
||||
mock_auto_model.assert_called_once_with(
|
||||
guardrail.model_name,
|
||||
device_map=device,
|
||||
low_cpu_mem_usage=True,
|
||||
)
|
||||
|
||||
|
||||
# Test for `get_guardrail_handler()` function on `cuda`
|
||||
@patch("src.core.guardrails.AutoTokenizer.from_pretrained")
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue