Some fixes on model server (#362)

* Some fixes on model server * Remove prompt_prefilling message * Fix logging * Fix poetry issues * Improve logging and update the support for text truncation * Fix tests * Fix tests * Fix tests * Fix modelserver tests * Update modelserver tests
2026-05-21 13:55:15 +02:00 · 2025-01-10 16:45:36 -08:00 · 2025-01-10 16:45:36 -08:00 · 88a02dc478
commit 88a02dc478
parent ebda682b30
25 changed files with 1090 additions and 1666 deletions
--- a/model_server/src/commons/globals.py
+++ b/model_server/src/commons/globals.py
@ -22,9 +22,7 @@ ARCH_CLIENT = OpenAI(base_url=ARCH_ENDPOINT, api_key=ARCH_API_KEY)
 # Define model names
 ARCH_INTENT_MODEL_ALIAS = "Arch-Intent"
 ARCH_FUNCTION_MODEL_ALIAS = "Arch-Function"
-
-logger.info("loading prompt guard model ...")
-arch_guard_model = get_guardrail_handler()
+ARCH_GUARD_MODEL_ALIAS = "katanemo/Arch-Guard"

 # Define model handlers
 handler_map = {
@ -34,5 +32,5 @@ handler_map = {
    "Arch-Function": ArchFunctionHandler(
        ARCH_CLIENT, ARCH_FUNCTION_MODEL_ALIAS, ArchFunctionConfig
    ),
-    "Arch-Guard": arch_guard_model,
+    "Arch-Guard": get_guardrail_handler(ARCH_GUARD_MODEL_ALIAS),
 }