change nli model (#167)

* change nli model * Fix bug in hallucination --------- Co-authored-by: Shuguang Chen <54548843+nehcgs@users.noreply.github.com>
2026-07-17 16:31:04 +02:00 · 2024-10-09 19:10:08 -07:00 · 2024-10-09 19:10:08 -07:00 · f9e3a052fc
commit f9e3a052fc
parent 3b7c58698f
3 changed files with 6 additions and 5 deletions
--- a/arch/src/consts.rs
+++ b/arch/src/consts.rs
@ -1,5 +1,5 @@
 pub const DEFAULT_EMBEDDING_MODEL: &str = "katanemo/bge-large-en-v1.5";
-pub const DEFAULT_INTENT_MODEL: &str = "katanemo/deberta-base-nli";
+pub const DEFAULT_INTENT_MODEL: &str = "katanemo/bart-large-mnli";
 pub const DEFAULT_PROMPT_TARGET_THRESHOLD: f64 = 0.8;
 pub const DEFAULT_HALLUCINATED_THRESHOLD: f64 = 0.1;
 pub const RATELIMIT_SELECTOR_HEADER_KEY: &str = "x-arch-ratelimit-selector";
--- a/model_server/app/loader.py
+++ b/model_server/app/loader.py
@ -30,7 +30,7 @@ def get_embedding_model(


 def get_zero_shot_model(
-    model_name=os.getenv("ZERO_SHOT_MODELS", "katanemo/deberta-base-nli"),
+    model_name=os.getenv("ZERO_SHOT_MODELS", "katanemo/bart-large-mnli"),
 ):
    print("Loading Zero-shot Model...")

--- a/model_server/app/main.py
+++ b/model_server/app/main.py
@ -189,17 +189,18 @@ async def hallucination(req: HallucinationRequest, res: Response):
    if "arch_messages" in req.parameters:
        req.parameters.pop("arch_messages")

-    candidate_labels = [f"{k} is {v}" for k, v in req.parameters.items()]
+    candidate_labels = {f"{k} is {v}": k for k, v in req.parameters.items()}

    predictions = classifier(
        req.prompt,
-        candidate_labels=candidate_labels,
+        candidate_labels=list(candidate_labels.keys()),
        hypothesis_template="{}",
        multi_label=True,
    )

    params_scores = {
-        k[0]: s for k, s in zip(req.parameters.items(), predictions["scores"])
+        candidate_labels[label]: score
+        for label, score in zip(predictions["labels"], predictions["scores"])
    }

    logger.info(