Some fixes on model server (#362)

* Some fixes on model server

* Remove prompt_prefilling message

* Fix logging

* Fix poetry issues

* Improve logging and update the support for text truncation

* Fix tests

* Fix tests

* Fix tests

* Fix modelserver tests

* Update modelserver tests
This commit is contained in:
Shuguang Chen 2025-01-10 16:45:36 -08:00 committed by GitHub
parent ebda682b30
commit 88a02dc478
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
25 changed files with 1090 additions and 1666 deletions

View file

@ -22,9 +22,7 @@ ARCH_CLIENT = OpenAI(base_url=ARCH_ENDPOINT, api_key=ARCH_API_KEY)
# Define model names
ARCH_INTENT_MODEL_ALIAS = "Arch-Intent"
ARCH_FUNCTION_MODEL_ALIAS = "Arch-Function"
logger.info("loading prompt guard model ...")
arch_guard_model = get_guardrail_handler()
ARCH_GUARD_MODEL_ALIAS = "katanemo/Arch-Guard"
# Define model handlers
handler_map = {
@ -34,5 +32,5 @@ handler_map = {
"Arch-Function": ArchFunctionHandler(
ARCH_CLIENT, ARCH_FUNCTION_MODEL_ALIAS, ArchFunctionConfig
),
"Arch-Guard": arch_guard_model,
"Arch-Guard": get_guardrail_handler(ARCH_GUARD_MODEL_ALIAS),
}