diff --git a/arch/src/consts.rs b/arch/src/consts.rs index 4962a75a..572bd2c3 100644 --- a/arch/src/consts.rs +++ b/arch/src/consts.rs @@ -11,3 +11,4 @@ pub const MODEL_SERVER_NAME: &str = "model_server"; pub const ARCH_ROUTING_HEADER: &str = "x-arch-llm-provider"; pub const ARCH_MESSAGES_KEY: &str = "arch_messages"; pub const ARCH_PROVIDER_HINT_HEADER: &str = "x-arch-llm-provider-hint"; +pub const CHAT_COMPLETIONS_PATH: &str = "v1/chat/completions"; diff --git a/arch/src/stream_context.rs b/arch/src/stream_context.rs index 6d15cc41..e91720b7 100644 --- a/arch/src/stream_context.rs +++ b/arch/src/stream_context.rs @@ -1,7 +1,8 @@ use crate::consts::{ ARCH_FC_REQUEST_TIMEOUT_MS, ARCH_MESSAGES_KEY, ARCH_PROVIDER_HINT_HEADER, ARCH_ROUTING_HEADER, - ARC_FC_CLUSTER, DEFAULT_EMBEDDING_MODEL, DEFAULT_INTENT_MODEL, DEFAULT_PROMPT_TARGET_THRESHOLD, - GPT_35_TURBO, MODEL_SERVER_NAME, RATELIMIT_SELECTOR_HEADER_KEY, SYSTEM_ROLE, USER_ROLE, + ARC_FC_CLUSTER, CHAT_COMPLETIONS_PATH, DEFAULT_EMBEDDING_MODEL, DEFAULT_INTENT_MODEL, + DEFAULT_PROMPT_TARGET_THRESHOLD, GPT_35_TURBO, MODEL_SERVER_NAME, + RATELIMIT_SELECTOR_HEADER_KEY, SYSTEM_ROLE, USER_ROLE, }; use crate::filter_context::{embeddings_store, WasmMetrics}; use crate::llm_providers::LlmProviders; @@ -919,6 +920,9 @@ impl HttpContext for StreamContext { self.delete_content_length_header(); self.save_ratelimit_header(); + self.chat_completions_request = + self.get_http_request_header(":path").unwrap_or_default() == CHAT_COMPLETIONS_PATH; + debug!( "S[{}] req_headers={:?}", self.context_id, diff --git a/arch/tests/integration.rs b/arch/tests/integration.rs index d16c5be7..db0ca962 100644 --- a/arch/tests/integration.rs +++ b/arch/tests/integration.rs @@ -53,6 +53,8 @@ fn request_headers_expectations(module: &mut Tester, http_context: i32) { .returning(Some("selector-value")) .expect_get_header_map_pairs(Some(MapType::HttpRequestHeaders)) .returning(None) + .expect_get_header_map_value(Some(MapType::HttpRequestHeaders), Some(":path")) + .returning(Some("/v1/chat/completions")) .expect_log(Some(LogLevel::Debug), None) .execute_and_expect(ReturnType::Action(Action::Continue)) .unwrap();