From 60cadf3e8a0c7c8c597a42b26153831d61e2249b Mon Sep 17 00:00:00 2001 From: cotran Date: Thu, 10 Oct 2024 15:37:08 -0700 Subject: [PATCH] add history of messages --- arch/src/consts.rs | 2 +- arch/src/stream_context.rs | 3 ++- model_server/app/main.py | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/src/consts.rs b/arch/src/consts.rs index 32172002..f4aae904 100644 --- a/arch/src/consts.rs +++ b/arch/src/consts.rs @@ -1,7 +1,7 @@ pub const DEFAULT_EMBEDDING_MODEL: &str = "katanemo/bge-large-en-v1.5"; pub const DEFAULT_INTENT_MODEL: &str = "katanemo/bart-large-mnli"; pub const DEFAULT_PROMPT_TARGET_THRESHOLD: f64 = 0.8; -pub const DEFAULT_HALLUCINATED_THRESHOLD: f64 = 0.1; +pub const DEFAULT_HALLUCINATED_THRESHOLD: f64 = 0.25; pub const RATELIMIT_SELECTOR_HEADER_KEY: &str = "x-arch-ratelimit-selector"; pub const SYSTEM_ROLE: &str = "system"; pub const USER_ROLE: &str = "user"; diff --git a/arch/src/stream_context.rs b/arch/src/stream_context.rs index bdf80885..c73728d8 100644 --- a/arch/src/stream_context.rs +++ b/arch/src/stream_context.rs @@ -756,7 +756,8 @@ impl StreamContext { } } } - } else { + } + else { user_messages = callout_context.user_message.as_ref().unwrap().clone(); } info!("user messages: {}", user_messages); diff --git a/model_server/app/main.py b/model_server/app/main.py index 82bbeb50..a80b04c5 100644 --- a/model_server/app/main.py +++ b/model_server/app/main.py @@ -179,7 +179,7 @@ async def hallucination(req: HallucinationRequest, res: Response): """ Take input as text and return the prediction of hallucination for each parameter """ - + logger.info(f"hallucination request: {req}") if req.model != zero_shot_model["model_name"]: raise HTTPException(status_code=400, detail="unknown model: " + req.model)