add health check endpoint for llm gateway (#420)

* add health check endpoint for llm gateway * fix rust tests
2026-06-08 14:55:14 +02:00 · 2025-03-03 13:11:57 -08:00 · 2025-03-03 13:11:57 -08:00 · 10cad4d0b7
commit 10cad4d0b7
parent a402fee13b
4 changed files with 20 additions and 4 deletions
--- a/arch/tools/cli/core.py
+++ b/arch/tools/cli/core.py
@ -80,9 +80,14 @@ def start_arch(arch_config_file, env, log_timeout=120, foreground=False):

        start_time = time.time()
        while True:
-            health_check_status = health_check_endpoint(
+            prompt_gateway_health_check_status = health_check_endpoint(
                f"http://localhost:{prompt_gateway_port}/healthz"
            )
+
+            llm_gateway_health_check_status = health_check_endpoint(
+                f"http://localhost:{llm_gateway_port}/healthz"
+            )
+
            archgw_status = docker_container_status(ARCHGW_DOCKER_NAME)
            current_time = time.time()
            elapsed_time = current_time - start_time
@ -92,7 +97,7 @@ def start_arch(arch_config_file, env, log_timeout=120, foreground=False):
                log.info(f"stopping log monitoring after {log_timeout} seconds.")
                break

-            if health_check_status:
+            if prompt_gateway_health_check_status or llm_gateway_health_check_status:
                log.info("archgw is running and is healthy!")
                break
            else:
--- a/crates/common/src/api/open_ai.rs
+++ b/crates/common/src/api/open_ai.rs
@ -135,7 +135,10 @@ impl From<String> for ParameterType {
            "array" => ParameterType::List,
            "dict" => ParameterType::Dict,
            "dictionary" => ParameterType::Dict,
-            _ => ParameterType::String,
+            _ => {
+                log::warn!("Unknown parameter type: {}, assuming type str", s);
+                ParameterType::String
+            },
        }
    }
 }
--- a/crates/llm_gateway/src/stream_context.rs
+++ b/crates/llm_gateway/src/stream_context.rs
@ -5,7 +5,7 @@ use common::api::open_ai::{
 };
 use common::configuration::LlmProvider;
 use common::consts::{
-    ARCH_PROVIDER_HINT_HEADER, ARCH_ROUTING_HEADER, CHAT_COMPLETIONS_PATH,
+    ARCH_PROVIDER_HINT_HEADER, ARCH_ROUTING_HEADER, CHAT_COMPLETIONS_PATH, HEALTHZ_PATH,
    RATELIMIT_SELECTOR_HEADER_KEY, REQUEST_ID_HEADER, TRACE_PARENT_HEADER,
 };
 use common::errors::ServerError;
@ -176,6 +176,12 @@ impl HttpContext for StreamContext {
    // Envoy's HTTP model is event driven. The WASM ABI has given implementors events to hook onto
    // the lifecycle of the http request and response.
    fn on_http_request_headers(&mut self, _num_headers: usize, _end_of_stream: bool) -> Action {
+        let request_path = self.get_http_request_header(":path").unwrap_or_default();
+        if request_path == HEALTHZ_PATH {
+            self.send_http_response(200, vec![], None);
+            return Action::Continue;
+        }
+
        self.select_llm_provider();

        // if endpoint is not set then use provider name as routing header so envoy can resolve the cluster name
--- a/crates/llm_gateway/tests/integration.rs
+++ b/crates/llm_gateway/tests/integration.rs
@ -18,6 +18,8 @@ fn wasm_module() -> String {
 fn request_headers_expectations(module: &mut Tester, http_context: i32) {
    module
        .call_proxy_on_request_headers(http_context, 0, false)
+        .expect_get_header_map_value(Some(MapType::HttpRequestHeaders), Some(":path"))
+        .returning(Some("/v1/chat/completions"))
        .expect_get_header_map_value(
            Some(MapType::HttpRequestHeaders),
            Some("x-arch-llm-provider-hint"),