From 10b5c5b42cf9ca2f5102f01b8c9cf6a16ab664ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Ulises=20Ni=C3=B1o=20Rivera?= Date: Fri, 4 Oct 2024 19:19:44 -0700 Subject: [PATCH] [BUG FIX] Add missing chat completions request hint (#116) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: José Ulises Niño Rivera --- arch/src/consts.rs | 1 + arch/src/stream_context.rs | 8 ++++++-- arch/tests/integration.rs | 2 ++ 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/arch/src/consts.rs b/arch/src/consts.rs index 4962a75a..572bd2c3 100644 --- a/arch/src/consts.rs +++ b/arch/src/consts.rs @@ -11,3 +11,4 @@ pub const MODEL_SERVER_NAME: &str = "model_server"; pub const ARCH_ROUTING_HEADER: &str = "x-arch-llm-provider"; pub const ARCH_MESSAGES_KEY: &str = "arch_messages"; pub const ARCH_PROVIDER_HINT_HEADER: &str = "x-arch-llm-provider-hint"; +pub const CHAT_COMPLETIONS_PATH: &str = "v1/chat/completions"; diff --git a/arch/src/stream_context.rs b/arch/src/stream_context.rs index 6d15cc41..e91720b7 100644 --- a/arch/src/stream_context.rs +++ b/arch/src/stream_context.rs @@ -1,7 +1,8 @@ use crate::consts::{ ARCH_FC_REQUEST_TIMEOUT_MS, ARCH_MESSAGES_KEY, ARCH_PROVIDER_HINT_HEADER, ARCH_ROUTING_HEADER, - ARC_FC_CLUSTER, DEFAULT_EMBEDDING_MODEL, DEFAULT_INTENT_MODEL, DEFAULT_PROMPT_TARGET_THRESHOLD, - GPT_35_TURBO, MODEL_SERVER_NAME, RATELIMIT_SELECTOR_HEADER_KEY, SYSTEM_ROLE, USER_ROLE, + ARC_FC_CLUSTER, CHAT_COMPLETIONS_PATH, DEFAULT_EMBEDDING_MODEL, DEFAULT_INTENT_MODEL, + DEFAULT_PROMPT_TARGET_THRESHOLD, GPT_35_TURBO, MODEL_SERVER_NAME, + RATELIMIT_SELECTOR_HEADER_KEY, SYSTEM_ROLE, USER_ROLE, }; use crate::filter_context::{embeddings_store, WasmMetrics}; use crate::llm_providers::LlmProviders; @@ -919,6 +920,9 @@ impl HttpContext for StreamContext { self.delete_content_length_header(); self.save_ratelimit_header(); + self.chat_completions_request = + self.get_http_request_header(":path").unwrap_or_default() == CHAT_COMPLETIONS_PATH; + debug!( "S[{}] req_headers={:?}", self.context_id, diff --git a/arch/tests/integration.rs b/arch/tests/integration.rs index d16c5be7..db0ca962 100644 --- a/arch/tests/integration.rs +++ b/arch/tests/integration.rs @@ -53,6 +53,8 @@ fn request_headers_expectations(module: &mut Tester, http_context: i32) { .returning(Some("selector-value")) .expect_get_header_map_pairs(Some(MapType::HttpRequestHeaders)) .returning(None) + .expect_get_header_map_value(Some(MapType::HttpRequestHeaders), Some(":path")) + .returning(Some("/v1/chat/completions")) .expect_log(Some(LogLevel::Debug), None) .execute_and_expect(ReturnType::Action(Action::Continue)) .unwrap();