mirror of
https://github.com/katanemo/plano.git
synced 2026-04-25 00:36:34 +02:00
add health check endpoint for llm gateway (#420)
* add health check endpoint for llm gateway * fix rust tests
This commit is contained in:
parent
a402fee13b
commit
10cad4d0b7
4 changed files with 20 additions and 4 deletions
|
|
@ -80,9 +80,14 @@ def start_arch(arch_config_file, env, log_timeout=120, foreground=False):
|
|||
|
||||
start_time = time.time()
|
||||
while True:
|
||||
health_check_status = health_check_endpoint(
|
||||
prompt_gateway_health_check_status = health_check_endpoint(
|
||||
f"http://localhost:{prompt_gateway_port}/healthz"
|
||||
)
|
||||
|
||||
llm_gateway_health_check_status = health_check_endpoint(
|
||||
f"http://localhost:{llm_gateway_port}/healthz"
|
||||
)
|
||||
|
||||
archgw_status = docker_container_status(ARCHGW_DOCKER_NAME)
|
||||
current_time = time.time()
|
||||
elapsed_time = current_time - start_time
|
||||
|
|
@ -92,7 +97,7 @@ def start_arch(arch_config_file, env, log_timeout=120, foreground=False):
|
|||
log.info(f"stopping log monitoring after {log_timeout} seconds.")
|
||||
break
|
||||
|
||||
if health_check_status:
|
||||
if prompt_gateway_health_check_status or llm_gateway_health_check_status:
|
||||
log.info("archgw is running and is healthy!")
|
||||
break
|
||||
else:
|
||||
|
|
|
|||
|
|
@ -135,7 +135,10 @@ impl From<String> for ParameterType {
|
|||
"array" => ParameterType::List,
|
||||
"dict" => ParameterType::Dict,
|
||||
"dictionary" => ParameterType::Dict,
|
||||
_ => ParameterType::String,
|
||||
_ => {
|
||||
log::warn!("Unknown parameter type: {}, assuming type str", s);
|
||||
ParameterType::String
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ use common::api::open_ai::{
|
|||
};
|
||||
use common::configuration::LlmProvider;
|
||||
use common::consts::{
|
||||
ARCH_PROVIDER_HINT_HEADER, ARCH_ROUTING_HEADER, CHAT_COMPLETIONS_PATH,
|
||||
ARCH_PROVIDER_HINT_HEADER, ARCH_ROUTING_HEADER, CHAT_COMPLETIONS_PATH, HEALTHZ_PATH,
|
||||
RATELIMIT_SELECTOR_HEADER_KEY, REQUEST_ID_HEADER, TRACE_PARENT_HEADER,
|
||||
};
|
||||
use common::errors::ServerError;
|
||||
|
|
@ -176,6 +176,12 @@ impl HttpContext for StreamContext {
|
|||
// Envoy's HTTP model is event driven. The WASM ABI has given implementors events to hook onto
|
||||
// the lifecycle of the http request and response.
|
||||
fn on_http_request_headers(&mut self, _num_headers: usize, _end_of_stream: bool) -> Action {
|
||||
let request_path = self.get_http_request_header(":path").unwrap_or_default();
|
||||
if request_path == HEALTHZ_PATH {
|
||||
self.send_http_response(200, vec![], None);
|
||||
return Action::Continue;
|
||||
}
|
||||
|
||||
self.select_llm_provider();
|
||||
|
||||
// if endpoint is not set then use provider name as routing header so envoy can resolve the cluster name
|
||||
|
|
|
|||
|
|
@ -18,6 +18,8 @@ fn wasm_module() -> String {
|
|||
fn request_headers_expectations(module: &mut Tester, http_context: i32) {
|
||||
module
|
||||
.call_proxy_on_request_headers(http_context, 0, false)
|
||||
.expect_get_header_map_value(Some(MapType::HttpRequestHeaders), Some(":path"))
|
||||
.returning(Some("/v1/chat/completions"))
|
||||
.expect_get_header_map_value(
|
||||
Some(MapType::HttpRequestHeaders),
|
||||
Some("x-arch-llm-provider-hint"),
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue