fix llm gateway rust tests

2026-06-17 15:25:17 +02:00 · 2025-06-09 16:32:42 -07:00 · 2025-06-09 16:32:42 -07:00 · 3f9eae4aa3
commit 3f9eae4aa3
parent ea2dd85bf1
2 changed files with 21 additions and 99 deletions
--- a/crates/llm_gateway/src/stream_context.rs
+++ b/crates/llm_gateway/src/stream_context.rs
@ -196,20 +196,22 @@ impl HttpContext for StreamContext {
    // Envoy's HTTP model is event driven. The WASM ABI has given implementors events to hook onto
    // the lifecycle of the http request and response.
    fn on_http_request_headers(&mut self, _num_headers: usize, _end_of_stream: bool) -> Action {
+        // debug!("headers: {:?}", self.get_http_request_headers());
        let request_path = self.get_http_request_header(":path").unwrap_or_default();
        if request_path == HEALTHZ_PATH {
            self.send_http_response(200, vec![], None);
            return Action::Continue;
        }

-        let routing_header_value = self.get_http_request_header(ARCH_ROUTING_HEADER);
-
        let use_agent_orchestrator = match self.overrides.as_ref() {
            Some(overrides) => overrides.use_agent_orchestrator.unwrap_or_default(),
            None => false,
        };

-        if let Some(routing_header_value) = routing_header_value.as_ref() {
+        let routing_header_value = self.get_http_request_header(ARCH_ROUTING_HEADER);
+
+        if routing_header_value.is_some() && !routing_header_value.as_ref().unwrap().is_empty() {
+            let routing_header_value = routing_header_value.as_ref().unwrap();
            info!("routing header already set: {}", routing_header_value);
            self.llm_provider = Some(Rc::new(LlmProvider {
                name: routing_header_value.to_string(),
@ -386,6 +388,8 @@ impl HttpContext for StreamContext {
            }
        };

+        // trace!("on_http_request_body: update request body to: {}, len: {}", String::from_utf8_lossy(&deserialized_body_bytes), deserialized_body_bytes.len());
+
        self.set_http_request_body(0, body_size, &deserialized_body_bytes);

        Action::Continue
--- a/crates/llm_gateway/tests/integration.rs
+++ b/crates/llm_gateway/tests/integration.rs
@ -202,20 +202,7 @@ fn llm_gateway_successful_request_to_open_ai_chat_completions() {
    request_headers_expectations(&mut module, http_context);

    // Request Body
-    let chat_completions_request_body = "\
-    {\
-        \"messages\": [\
-        {\
-            \"role\": \"system\",\
-            \"content\": \"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"\
-        },\
-        {\
-            \"role\": \"user\",\
-            \"content\": \"Compose a poem.\"\
-        }\
-        ],\
-        \"model\": \"gpt-4\"\
-    }";
+    let chat_completions_request_body = r#"{"model":"gpt-4","messages":[{"role":"system","content":"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair."},{"role":"user","content":"Compose a poem."}]}"#;

    module
        .call_proxy_on_request_body(
@ -229,7 +216,6 @@ fn llm_gateway_successful_request_to_open_ai_chat_completions() {
        .expect_log(Some(LogLevel::Info), None)
        .expect_log(Some(LogLevel::Debug), None)
        .expect_log(Some(LogLevel::Debug), None)
-        .expect_log(Some(LogLevel::Debug), None)
        .expect_metric_record("input_sequence_length", 21)
        .expect_log(Some(LogLevel::Debug), None)
        .expect_log(Some(LogLevel::Debug), None)
@ -268,18 +254,7 @@ fn llm_gateway_bad_request_to_open_ai_chat_completions() {
    request_headers_expectations(&mut module, http_context);

    // Request Body
-    let incomplete_chat_completions_request_body = "\
-    {\
-        \"messages\": [\
-        {\
-            \"role\": \"system\"\
-        },\
-        {\
-            \"role\": \"user\",\
-            \"content\": \"Compose a poem that explains the concept of recursion in programming.\"\
-        }\
-        ]\
-    }";
+    let incomplete_chat_completions_request_body = r#"{"model":"gpt-1","messages":[{"role":"system","content":"Compose a poem that explains the concept of recursion in programming."}]}"#;

    module
        .call_proxy_on_request_body(
@ -290,7 +265,7 @@ fn llm_gateway_bad_request_to_open_ai_chat_completions() {
        .expect_get_buffer_bytes(Some(BufferType::HttpRequestBody))
        .returning(Some(incomplete_chat_completions_request_body))
        .expect_log(Some(LogLevel::Debug), None)
-        .expect_log(Some(LogLevel::Info), Some("on_http_request_body: provider: open-ai-gpt-4, model requested: , model selected: gpt-4"))
+        .expect_log(Some(LogLevel::Info), Some("on_http_request_body: provider: open-ai-gpt-4, model requested: gpt-1, model selected: gpt-4"))
        .expect_send_local_response(
            Some(StatusCode::BAD_REQUEST.as_u16().into()),
            None,
@ -300,8 +275,7 @@ fn llm_gateway_bad_request_to_open_ai_chat_completions() {
        .expect_log(Some(LogLevel::Debug), None)
        .expect_log(Some(LogLevel::Debug), None)
        .expect_log(Some(LogLevel::Debug), None)
-        .expect_metric_record("input_sequence_length", 14)
-        .expect_log(Some(LogLevel::Debug), None)
+        .expect_metric_record("input_sequence_length", 13)
        .expect_log(Some(LogLevel::Debug), None)
        .execute_and_expect(ReturnType::Action(Action::Continue))
        .unwrap();
@ -359,11 +333,10 @@ fn llm_gateway_request_ratelimited() {
        .expect_log(Some(LogLevel::Info), None)
        .expect_log(Some(LogLevel::Debug), None)
        .expect_log(Some(LogLevel::Debug), None)
-        .expect_log(Some(LogLevel::Debug), None)
        .expect_metric_record("input_sequence_length", 107)
+        .expect_log(Some(LogLevel::Debug), Some("Applying ratelimit for model: gpt-4"))
        .expect_log(Some(LogLevel::Debug), None)
-        .expect_log(Some(LogLevel::Debug), None)
-        .expect_log(Some(LogLevel::Warn), Some("server error occurred: exceeded limit provider=gpt-4, selector=Header { key: \"selector-key\", value: \"selector-value\" }, tokens_used=107"))
+        .expect_log(Some(LogLevel::Warn), Some(r#"server error occurred: exceeded limit provider=gpt-4, selector=Header { key: "selector-key", value: "selector-value" }, tokens_used=107"#))
        .expect_send_local_response(
            Some(StatusCode::TOO_MANY_REQUESTS.as_u16().into()),
            None,
@ -399,20 +372,7 @@ fn llm_gateway_request_not_ratelimited() {
    normal_flow(&mut module, filter_context, http_context);

    // give shorter body to avoid rate limiting
-    let chat_completions_request_body = "\
-{\
-    \"messages\": [\
-    {\
-        \"role\": \"system\",\
-        \"content\": \"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"\
-    },\
-    {\
-        \"role\": \"user\",\
-        \"content\": \"Compose a poem that explains the concept of recursion in programming.\"\
-    }\
-    ],\
-    \"model\": \"gpt-4\"\
-}";
+    let chat_completions_request_body = r#"{"model":"gpt-1","messages":[{"role":"system","content":"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair."},{"role":"user","content":"Compose a poem that explains the concept of recursion in programming."}]}"#;

    module
        .call_proxy_on_request_body(
@ -427,7 +387,6 @@ fn llm_gateway_request_not_ratelimited() {
        .expect_log(Some(LogLevel::Info), None)
        .expect_log(Some(LogLevel::Debug), None)
        .expect_log(Some(LogLevel::Debug), None)
-        .expect_log(Some(LogLevel::Debug), None)
        .expect_metric_record("input_sequence_length", 29)
        .expect_log(Some(LogLevel::Debug), None)
        .expect_log(Some(LogLevel::Debug), None)
@ -460,20 +419,7 @@ fn llm_gateway_override_model_name() {
    normal_flow(&mut module, filter_context, http_context);

    // give shorter body to avoid rate limiting
-    let chat_completions_request_body = "\
-{\
-    \"model\": \"o1-mini\",\
-    \"messages\": [\
-    {\
-        \"role\": \"system\",\
-        \"content\": \"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"\
-    },\
-    {\
-        \"role\": \"user\",\
-        \"content\": \"Compose a poem that explains the concept of recursion in programming.\"\
-    }\
-    ]
-}";
+    let chat_completions_request_body = r#"{"model":"gpt-1","messages":[{"role":"system","content":"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair."},{"role":"user","content":"Compose a poem that explains the concept of recursion in programming."}]}"#;

    module
        .call_proxy_on_request_body(
@ -485,8 +431,7 @@ fn llm_gateway_override_model_name() {
        .returning(Some(chat_completions_request_body))
        // The actual call is not important in this test, we just need to grab the token_id
        .expect_log(Some(LogLevel::Debug), None)
-        .expect_log(Some(LogLevel::Info), Some("on_http_request_body: provider: open-ai-gpt-4, model requested: o1-mini, model selected: gpt-4"))
-        .expect_log(Some(LogLevel::Debug), None)
+        .expect_log(Some(LogLevel::Info), Some("on_http_request_body: provider: open-ai-gpt-4, model requested: gpt-1, model selected: gpt-4"))
        .expect_log(Some(LogLevel::Debug), None)
        .expect_log(Some(LogLevel::Debug), None)
        .expect_metric_record("input_sequence_length", 29)
@ -521,19 +466,7 @@ fn llm_gateway_override_use_default_model() {
    normal_flow(&mut module, filter_context, http_context);

    // give shorter body to avoid rate limiting
-    let chat_completions_request_body = "\
-{\
-    \"messages\": [\
-    {\
-        \"role\": \"system\",\
-        \"content\": \"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"\
-    },\
-    {\
-        \"role\": \"user\",\
-        \"content\": \"Compose a poem that explains the concept of recursion in programming.\"\
-    }\
-    ]
-}";
+    let chat_completions_request_body = r#"{"model":"gpt-1","messages":[{"role":"system","content":"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair."},{"role":"user","content":"Compose a poem that explains the concept of recursion in programming."}]}"#;

    module
        .call_proxy_on_request_body(
@ -547,14 +480,13 @@ fn llm_gateway_override_use_default_model() {
        // The actual call is not important in this test, we just need to grab the token_id
        .expect_log(
            Some(LogLevel::Info),
-            Some("on_http_request_body: provider: open-ai-gpt-4, model requested: , model selected: gpt-4"),
+            Some("on_http_request_body: provider: open-ai-gpt-4, model requested: gpt-1, model selected: gpt-4"),
        )
        .expect_log(Some(LogLevel::Debug), None)
        .expect_log(Some(LogLevel::Debug), None)
-        .expect_log(Some(LogLevel::Debug), None)
        .expect_metric_record("input_sequence_length", 29)
-        .expect_log(Some(LogLevel::Debug), None)
-        .expect_log(Some(LogLevel::Debug), None)
+        .expect_log(Some(LogLevel::Debug), Some("Applying ratelimit for model: gpt-4"))
+        .expect_log(Some(LogLevel::Debug), Some(r#"Checking limit for provider=gpt-4, with selector=Header { key: "selector-key", value: "selector-value" }, consuming tokens=29"#))
        .expect_set_buffer_bytes(Some(BufferType::HttpRequestBody), None)
        .execute_and_expect(ReturnType::Action(Action::Continue))
        .unwrap();
@ -584,20 +516,7 @@ fn llm_gateway_override_use_model_name_none() {
    normal_flow(&mut module, filter_context, http_context);

    // give shorter body to avoid rate limiting
-    let chat_completions_request_body = "\
-{\
-    \"model\": \"none\",\
-    \"messages\": [\
-    {\
-        \"role\": \"system\",\
-        \"content\": \"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"\
-    },\
-    {\
-        \"role\": \"user\",\
-        \"content\": \"Compose a poem that explains the concept of recursion in programming.\"\
-    }\
-    ]
-}";
+    let chat_completions_request_body = r#"{"model":"none","messages":[{"role":"system","content":"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair."},{"role":"user","content":"Compose a poem that explains the concept of recursion in programming."}]}"#;

    module
        .call_proxy_on_request_body(
@ -615,7 +534,6 @@ fn llm_gateway_override_use_model_name_none() {
        .expect_metric_record("input_sequence_length", 29)
        .expect_log(Some(LogLevel::Debug), None)
        .expect_log(Some(LogLevel::Debug), None)
-        .expect_log(Some(LogLevel::Debug), None)
        .expect_set_buffer_bytes(Some(BufferType::HttpRequestBody), None)
        .execute_and_expect(ReturnType::Action(Action::Continue))
        .unwrap();