diff --git a/arch/tools/cli/core.py b/arch/tools/cli/core.py index b263aed2..b0a6e58c 100644 --- a/arch/tools/cli/core.py +++ b/arch/tools/cli/core.py @@ -80,9 +80,14 @@ def start_arch(arch_config_file, env, log_timeout=120, foreground=False): start_time = time.time() while True: - health_check_status = health_check_endpoint( + prompt_gateway_health_check_status = health_check_endpoint( f"http://localhost:{prompt_gateway_port}/healthz" ) + + llm_gateway_health_check_status = health_check_endpoint( + f"http://localhost:{llm_gateway_port}/healthz" + ) + archgw_status = docker_container_status(ARCHGW_DOCKER_NAME) current_time = time.time() elapsed_time = current_time - start_time @@ -92,7 +97,7 @@ def start_arch(arch_config_file, env, log_timeout=120, foreground=False): log.info(f"stopping log monitoring after {log_timeout} seconds.") break - if health_check_status: + if prompt_gateway_health_check_status or llm_gateway_health_check_status: log.info("archgw is running and is healthy!") break else: diff --git a/crates/common/src/api/open_ai.rs b/crates/common/src/api/open_ai.rs index 8c72a7cd..2a07ce3f 100644 --- a/crates/common/src/api/open_ai.rs +++ b/crates/common/src/api/open_ai.rs @@ -135,7 +135,10 @@ impl From for ParameterType { "array" => ParameterType::List, "dict" => ParameterType::Dict, "dictionary" => ParameterType::Dict, - _ => ParameterType::String, + _ => { + log::warn!("Unknown parameter type: {}, assuming type str", s); + ParameterType::String + }, } } } diff --git a/crates/llm_gateway/src/stream_context.rs b/crates/llm_gateway/src/stream_context.rs index 69496a61..a3f67dfc 100644 --- a/crates/llm_gateway/src/stream_context.rs +++ b/crates/llm_gateway/src/stream_context.rs @@ -5,7 +5,7 @@ use common::api::open_ai::{ }; use common::configuration::LlmProvider; use common::consts::{ - ARCH_PROVIDER_HINT_HEADER, ARCH_ROUTING_HEADER, CHAT_COMPLETIONS_PATH, + ARCH_PROVIDER_HINT_HEADER, ARCH_ROUTING_HEADER, CHAT_COMPLETIONS_PATH, HEALTHZ_PATH, RATELIMIT_SELECTOR_HEADER_KEY, REQUEST_ID_HEADER, TRACE_PARENT_HEADER, }; use common::errors::ServerError; @@ -176,6 +176,12 @@ impl HttpContext for StreamContext { // Envoy's HTTP model is event driven. The WASM ABI has given implementors events to hook onto // the lifecycle of the http request and response. fn on_http_request_headers(&mut self, _num_headers: usize, _end_of_stream: bool) -> Action { + let request_path = self.get_http_request_header(":path").unwrap_or_default(); + if request_path == HEALTHZ_PATH { + self.send_http_response(200, vec![], None); + return Action::Continue; + } + self.select_llm_provider(); // if endpoint is not set then use provider name as routing header so envoy can resolve the cluster name diff --git a/crates/llm_gateway/tests/integration.rs b/crates/llm_gateway/tests/integration.rs index 0b28a175..777d3790 100644 --- a/crates/llm_gateway/tests/integration.rs +++ b/crates/llm_gateway/tests/integration.rs @@ -18,6 +18,8 @@ fn wasm_module() -> String { fn request_headers_expectations(module: &mut Tester, http_context: i32) { module .call_proxy_on_request_headers(http_context, 0, false) + .expect_get_header_map_value(Some(MapType::HttpRequestHeaders), Some(":path")) + .returning(Some("/v1/chat/completions")) .expect_get_header_map_value( Some(MapType::HttpRequestHeaders), Some("x-arch-llm-provider-hint"),