From 278f2ec0a906703e5f44a9fe8806e1d19bd53f17 Mon Sep 17 00:00:00 2001 From: Adil Hafeez Date: Wed, 26 Mar 2025 12:24:38 -0700 Subject: [PATCH] fix rust tests --- crates/common/src/http.rs | 4 +- crates/common/src/ratelimit.rs | 4 +- crates/common/src/tokenizer.rs | 6 +- crates/llm_gateway/tests/integration.rs | 91 ++++++++++++---------- crates/prompt_gateway/tests/integration.rs | 58 +++++++------- 5 files changed, 85 insertions(+), 78 deletions(-) diff --git a/crates/common/src/http.rs b/crates/common/src/http.rs index 85626f9b..8ef176b2 100644 --- a/crates/common/src/http.rs +++ b/crates/common/src/http.rs @@ -3,7 +3,7 @@ use crate::{ stats::{Gauge, IncrementingMetric}, }; use derivative::Derivative; -use log::trace; +use log::debug; use proxy_wasm::traits::Context; use serde::Serialize; use std::{cell::RefCell, collections::HashMap, fmt::Debug, time::Duration}; @@ -48,7 +48,7 @@ pub trait Client: Context { call_args: CallArgs, call_context: Self::CallContext, ) -> Result { - trace!( + debug!( "dispatching http call with args={:?} context={:?}", call_args, call_context diff --git a/crates/common/src/ratelimit.rs b/crates/common/src/ratelimit.rs index 8825fd01..ef4e5d08 100644 --- a/crates/common/src/ratelimit.rs +++ b/crates/common/src/ratelimit.rs @@ -1,7 +1,7 @@ use crate::configuration; use configuration::{Limit, Ratelimit, TimeUnit}; use governor::{DefaultKeyedRateLimiter, InsufficientCapacity, Quota}; -use log::trace; +use log::debug; use std::fmt::Display; use std::num::{NonZero, NonZeroU32}; use std::sync::RwLock; @@ -99,7 +99,7 @@ impl RatelimitMap { selector: Header, tokens_used: NonZeroU32, ) -> Result<(), Error> { - trace!( + debug!( "Checking limit for provider={}, with selector={:?}, consuming tokens={:?}", provider, selector, diff --git a/crates/common/src/tokenizer.rs b/crates/common/src/tokenizer.rs index d2611375..e9431521 100644 --- a/crates/common/src/tokenizer.rs +++ b/crates/common/src/tokenizer.rs @@ -1,14 +1,14 @@ -use log::trace; +use log::{debug}; #[allow(dead_code)] pub fn token_count(model_name: &str, text: &str) -> Result { - trace!("getting token count model={}", model_name); + debug!("getting token count model={}", model_name); //HACK: add support for tokenizing mistral and other models //filed issue https://github.com/katanemo/arch/issues/222 let updated_model = match model_name.starts_with("gpt") { false => { - trace!( + debug!( "tiktoken_rs: unsupported model: {}, using gpt-4 to compute token count", model_name ); diff --git a/crates/llm_gateway/tests/integration.rs b/crates/llm_gateway/tests/integration.rs index 9d87c00c..0f94d2d8 100644 --- a/crates/llm_gateway/tests/integration.rs +++ b/crates/llm_gateway/tests/integration.rs @@ -30,7 +30,7 @@ fn request_headers_expectations(module: &mut Tester, http_context: i32) { Some("x-arch-llm-provider-hint"), ) .returning(None) - .expect_log(Some(LogLevel::Trace), Some("request received: llm provider hint: default, selected llm: open-ai-gpt-4, model: gpt-4")) + .expect_log(Some(LogLevel::Debug), Some("request received: llm provider hint: default, selected llm: open-ai-gpt-4, model: gpt-4")) .expect_add_header_map_value( Some(MapType::HttpRequestHeaders), Some("x-arch-llm-provider"), @@ -223,15 +223,16 @@ fn llm_gateway_successful_request_to_open_ai_chat_completions() { chat_completions_request_body.len() as i32, true, ) + .expect_log(Some(LogLevel::Debug), None) .expect_get_buffer_bytes(Some(BufferType::HttpRequestBody)) .returning(Some(chat_completions_request_body)) + .expect_log(Some(LogLevel::Info), None) + .expect_log(Some(LogLevel::Debug), None) + .expect_log(Some(LogLevel::Debug), None) .expect_log(Some(LogLevel::Debug), None) - .expect_log(Some(LogLevel::Trace), None) - .expect_log(Some(LogLevel::Trace), None) - .expect_log(Some(LogLevel::Trace), None) .expect_metric_record("input_sequence_length", 21) - .expect_log(Some(LogLevel::Trace), None) - .expect_log(Some(LogLevel::Trace), None) + .expect_log(Some(LogLevel::Debug), None) + .expect_log(Some(LogLevel::Debug), None) .expect_set_buffer_bytes(Some(BufferType::HttpRequestBody), None) .execute_and_expect(ReturnType::Action(Action::Continue)) .unwrap(); @@ -289,18 +290,19 @@ fn llm_gateway_bad_request_to_open_ai_chat_completions() { .expect_get_buffer_bytes(Some(BufferType::HttpRequestBody)) .returning(Some(incomplete_chat_completions_request_body)) .expect_log(Some(LogLevel::Debug), None) + .expect_log(Some(LogLevel::Info), Some("on_http_request_body: provider: open-ai-gpt-4, model requested: , model selected: gpt-4")) .expect_send_local_response( Some(StatusCode::BAD_REQUEST.as_u16().into()), None, None, None, ) - .expect_log(Some(LogLevel::Trace), None) - .expect_log(Some(LogLevel::Trace), None) - .expect_log(Some(LogLevel::Trace), None) + .expect_log(Some(LogLevel::Debug), None) + .expect_log(Some(LogLevel::Debug), None) + .expect_log(Some(LogLevel::Debug), None) .expect_metric_record("input_sequence_length", 14) - .expect_log(Some(LogLevel::Trace), None) - .expect_log(Some(LogLevel::Trace), None) + .expect_log(Some(LogLevel::Debug), None) + .expect_log(Some(LogLevel::Debug), None) .execute_and_expect(ReturnType::Action(Action::Continue)) .unwrap(); } @@ -350,17 +352,18 @@ fn llm_gateway_request_ratelimited() { chat_completions_request_body.len() as i32, true, ) + .expect_log(Some(LogLevel::Debug), None) .expect_get_buffer_bytes(Some(BufferType::HttpRequestBody)) .returning(Some(chat_completions_request_body)) // The actual call is not important in this test, we just need to grab the token_id + .expect_log(Some(LogLevel::Info), None) + .expect_log(Some(LogLevel::Debug), None) + .expect_log(Some(LogLevel::Debug), None) .expect_log(Some(LogLevel::Debug), None) - .expect_log(Some(LogLevel::Trace), None) - .expect_log(Some(LogLevel::Trace), None) - .expect_log(Some(LogLevel::Trace), None) .expect_metric_record("input_sequence_length", 107) - .expect_log(Some(LogLevel::Trace), None) - .expect_log(Some(LogLevel::Trace), None) - .expect_log(Some(LogLevel::Debug), Some("server error occurred: exceeded limit provider=gpt-4, selector=Header { key: \"selector-key\", value: \"selector-value\" }, tokens_used=107")) + .expect_log(Some(LogLevel::Debug), None) + .expect_log(Some(LogLevel::Debug), None) + .expect_log(Some(LogLevel::Warn), Some("server error occurred: exceeded limit provider=gpt-4, selector=Header { key: \"selector-key\", value: \"selector-value\" }, tokens_used=107")) .expect_send_local_response( Some(StatusCode::TOO_MANY_REQUESTS.as_u16().into()), None, @@ -417,16 +420,17 @@ fn llm_gateway_request_not_ratelimited() { chat_completions_request_body.len() as i32, true, ) + .expect_log(Some(LogLevel::Debug), None) .expect_get_buffer_bytes(Some(BufferType::HttpRequestBody)) .returning(Some(chat_completions_request_body)) // The actual call is not important in this test, we just need to grab the token_id + .expect_log(Some(LogLevel::Info), None) + .expect_log(Some(LogLevel::Debug), None) + .expect_log(Some(LogLevel::Debug), None) .expect_log(Some(LogLevel::Debug), None) - .expect_log(Some(LogLevel::Trace), None) - .expect_log(Some(LogLevel::Trace), None) - .expect_log(Some(LogLevel::Trace), None) .expect_metric_record("input_sequence_length", 29) - .expect_log(Some(LogLevel::Trace), None) - .expect_log(Some(LogLevel::Trace), None) + .expect_log(Some(LogLevel::Debug), None) + .expect_log(Some(LogLevel::Debug), None) .expect_set_buffer_bytes(Some(BufferType::HttpRequestBody), None) .execute_and_expect(ReturnType::Action(Action::Continue)) .unwrap(); @@ -480,14 +484,15 @@ fn llm_gateway_override_model_name() { .expect_get_buffer_bytes(Some(BufferType::HttpRequestBody)) .returning(Some(chat_completions_request_body)) // The actual call is not important in this test, we just need to grab the token_id - .expect_log(Some(LogLevel::Debug), Some("provider: \"open-ai-gpt-4\", model requested: o1-mini, model selected: Some(\"gpt-4\")")) - .expect_log(Some(LogLevel::Trace), None) - .expect_log(Some(LogLevel::Trace), None) - .expect_log(Some(LogLevel::Trace), None) - .expect_log(Some(LogLevel::Trace), None) + .expect_log(Some(LogLevel::Debug), None) + .expect_log(Some(LogLevel::Info), Some("on_http_request_body: provider: open-ai-gpt-4, model requested: o1-mini, model selected: gpt-4")) + .expect_log(Some(LogLevel::Debug), None) + .expect_log(Some(LogLevel::Debug), None) + .expect_log(Some(LogLevel::Debug), None) + .expect_log(Some(LogLevel::Debug), None) .expect_metric_record("input_sequence_length", 29) - .expect_log(Some(LogLevel::Trace), None) - .expect_log(Some(LogLevel::Trace), None) + .expect_log(Some(LogLevel::Debug), None) + .expect_log(Some(LogLevel::Debug), None) .expect_set_buffer_bytes(Some(BufferType::HttpRequestBody), None) .execute_and_expect(ReturnType::Action(Action::Continue)) .unwrap(); @@ -537,19 +542,20 @@ fn llm_gateway_override_use_default_model() { chat_completions_request_body.len() as i32, true, ) + .expect_log(Some(LogLevel::Debug), None) .expect_get_buffer_bytes(Some(BufferType::HttpRequestBody)) .returning(Some(chat_completions_request_body)) // The actual call is not important in this test, we just need to grab the token_id .expect_log( - Some(LogLevel::Debug), - Some("provider: \"open-ai-gpt-4\", model requested: , model selected: Some(\"gpt-4\")"), + Some(LogLevel::Info), + Some("on_http_request_body: provider: open-ai-gpt-4, model requested: , model selected: gpt-4"), ) - .expect_log(Some(LogLevel::Trace), None) - .expect_log(Some(LogLevel::Trace), None) - .expect_log(Some(LogLevel::Trace), None) + .expect_log(Some(LogLevel::Debug), None) + .expect_log(Some(LogLevel::Debug), None) + .expect_log(Some(LogLevel::Debug), None) .expect_metric_record("input_sequence_length", 29) - .expect_log(Some(LogLevel::Trace), None) - .expect_log(Some(LogLevel::Trace), None) + .expect_log(Some(LogLevel::Debug), None) + .expect_log(Some(LogLevel::Debug), None) .expect_set_buffer_bytes(Some(BufferType::HttpRequestBody), None) .execute_and_expect(ReturnType::Action(Action::Continue)) .unwrap(); @@ -603,13 +609,14 @@ fn llm_gateway_override_use_model_name_none() { .expect_get_buffer_bytes(Some(BufferType::HttpRequestBody)) .returning(Some(chat_completions_request_body)) // The actual call is not important in this test, we just need to grab the token_id - .expect_log(Some(LogLevel::Debug), Some("provider: \"open-ai-gpt-4\", model requested: none, model selected: Some(\"gpt-4\")")) - .expect_log(Some(LogLevel::Trace), None) - .expect_log(Some(LogLevel::Trace), None) + .expect_log(Some(LogLevel::Debug), None) + .expect_log(Some(LogLevel::Info), Some("on_http_request_body: provider: open-ai-gpt-4, model requested: none, model selected: gpt-4")) + .expect_log(Some(LogLevel::Debug), None) + .expect_log(Some(LogLevel::Debug), None) .expect_metric_record("input_sequence_length", 29) - .expect_log(Some(LogLevel::Trace), None) - .expect_log(Some(LogLevel::Trace), None) - .expect_log(Some(LogLevel::Trace), None) + .expect_log(Some(LogLevel::Debug), None) + .expect_log(Some(LogLevel::Debug), None) + .expect_log(Some(LogLevel::Debug), None) .expect_set_buffer_bytes(Some(BufferType::HttpRequestBody), None) .execute_and_expect(ReturnType::Action(Action::Continue)) .unwrap(); diff --git a/crates/prompt_gateway/tests/integration.rs b/crates/prompt_gateway/tests/integration.rs index 1673ac16..bbde10b0 100644 --- a/crates/prompt_gateway/tests/integration.rs +++ b/crates/prompt_gateway/tests/integration.rs @@ -24,12 +24,12 @@ fn wasm_module() -> String { fn request_headers_expectations(module: &mut Tester, http_context: i32) { module .call_proxy_on_request_headers(http_context, 0, false) + .expect_log(Some(LogLevel::Debug), None) .expect_remove_header_map_value(Some(MapType::HttpRequestHeaders), Some("content-length")) .expect_get_header_map_value(Some(MapType::HttpRequestHeaders), Some(":path")) .returning(Some("/v1/chat/completions")) .expect_get_header_map_pairs(Some(MapType::HttpRequestHeaders)) .returning(None) - .expect_log(Some(LogLevel::Trace), None) .expect_get_header_map_value(Some(MapType::HttpRequestHeaders), Some("x-request-id")) .returning(None) .expect_get_header_map_value(Some(MapType::HttpRequestHeaders), Some("traceparent")) @@ -69,10 +69,14 @@ fn normal_flow(module: &mut Tester, filter_context: i32, http_context: i32) { chat_completions_request_body.len() as i32, true, ) + .expect_log(Some(LogLevel::Debug), None) .expect_get_buffer_bytes(Some(BufferType::HttpRequestBody)) .returning(Some(chat_completions_request_body)) // The actual call is not important in this test, we just need to grab the token_id - .expect_log(Some(LogLevel::Trace), None) + .expect_log(Some(LogLevel::Debug), None) + .expect_log(Some(LogLevel::Info), None) + .expect_log(Some(LogLevel::Debug), None) + .expect_log(Some(LogLevel::Debug), None) .expect_http_call( Some("arch_internal"), Some(vec![ @@ -88,10 +92,6 @@ fn normal_flow(module: &mut Tester, filter_context: i32, http_context: i32) { Some(5000), ) .returning(Some(1)) - .expect_log(Some(LogLevel::Trace), None) - .expect_log(Some(LogLevel::Debug), None) - .expect_log(Some(LogLevel::Trace), None) - .expect_log(Some(LogLevel::Trace), None) .expect_metric_increment("active_http_calls", 1) .execute_and_expect(ReturnType::Action(Action::Pause)) .unwrap(); @@ -233,13 +233,13 @@ fn prompt_gateway_successful_request_to_open_ai_chat_completions() { chat_completions_request_body.len() as i32, true, ) + .expect_log(Some(LogLevel::Debug), None) .expect_get_buffer_bytes(Some(BufferType::HttpRequestBody)) .returning(Some(chat_completions_request_body)) - .expect_log(Some(LogLevel::Trace), None) - .expect_log(Some(LogLevel::Trace), None) .expect_log(Some(LogLevel::Debug), None) - .expect_log(Some(LogLevel::Trace), None) - .expect_log(Some(LogLevel::Trace), None) + .expect_log(Some(LogLevel::Info), None) + .expect_log(Some(LogLevel::Debug), None) + .expect_log(Some(LogLevel::Debug), None) .expect_http_call(Some("arch_internal"), None, None, None, None) .returning(Some(4)) .expect_metric_increment("active_http_calls", 1) @@ -296,16 +296,16 @@ fn prompt_gateway_bad_request_to_open_ai_chat_completions() { incomplete_chat_completions_request_body.len() as i32, true, ) + .expect_log(Some(LogLevel::Debug), None) .expect_get_buffer_bytes(Some(BufferType::HttpRequestBody)) .returning(Some(incomplete_chat_completions_request_body)) - .expect_log(Some(LogLevel::Trace), None) + .expect_log(Some(LogLevel::Debug), None) .expect_send_local_response( Some(StatusCode::BAD_REQUEST.as_u16().into()), None, None, None, ) - .expect_log(Some(LogLevel::Trace), None) .execute_and_expect(ReturnType::Action(Action::Pause)) .unwrap(); } @@ -378,11 +378,11 @@ fn prompt_gateway_request_to_llm_gateway() { .expect_get_buffer_bytes(Some(BufferType::HttpCallResponseBody)) .returning(Some(&arch_fc_resp_str)) .expect_log(Some(LogLevel::Warn), None) - .expect_log(Some(LogLevel::Debug), None) - .expect_log(Some(LogLevel::Trace), None) + .expect_log(Some(LogLevel::Info), None) .expect_log(Some(LogLevel::Debug), None) .expect_log(Some(LogLevel::Debug), None) - .expect_log(Some(LogLevel::Trace), None) + .expect_log(Some(LogLevel::Info), None) + .expect_log(Some(LogLevel::Debug), None) .expect_http_call( Some("arch_internal"), Some(vec![ @@ -410,14 +410,14 @@ fn prompt_gateway_request_to_llm_gateway() { .expect_metric_increment("active_http_calls", -1) .expect_get_buffer_bytes(Some(BufferType::HttpCallResponseBody)) .returning(Some(&body_text)) + .expect_log(Some(LogLevel::Info), None) .expect_log(Some(LogLevel::Debug), None) - .expect_log(Some(LogLevel::Trace), None) + .expect_log(Some(LogLevel::Info), None) .expect_log(Some(LogLevel::Debug), None) - .expect_log(Some(LogLevel::Trace), None) - .expect_log(Some(LogLevel::Trace), None) .expect_get_header_map_value(Some(MapType::HttpCallResponseHeaders), Some(":status")) .returning(Some("200")) .expect_set_buffer_bytes(Some(BufferType::HttpRequestBody), None) + .expect_log(Some(LogLevel::Debug), None) .execute_and_expect(ReturnType::None) .unwrap(); @@ -449,11 +449,11 @@ fn prompt_gateway_request_to_llm_gateway() { ) .expect_get_buffer_bytes(Some(BufferType::HttpResponseBody)) .returning(Some(chat_completion_response_str.as_str())) - .expect_log(Some(LogLevel::Trace), None) .expect_log(Some(LogLevel::Debug), None) + .expect_log(Some(LogLevel::Info), None) .expect_set_buffer_bytes(Some(BufferType::HttpResponseBody), None) + .expect_log(Some(LogLevel::Info), None) .expect_log(Some(LogLevel::Debug), None) - .expect_log(Some(LogLevel::Trace), None) .execute_and_expect(ReturnType::Action(Action::Continue)) .unwrap(); } @@ -521,15 +521,15 @@ fn prompt_gateway_request_no_intent_match() { .expect_get_buffer_bytes(Some(BufferType::HttpCallResponseBody)) .returning(Some(&arch_fc_resp_str)) .expect_log(Some(LogLevel::Warn), None) + .expect_log(Some(LogLevel::Info), None) .expect_log(Some(LogLevel::Debug), None) - .expect_log(Some(LogLevel::Trace), None) - .expect_log(Some(LogLevel::Debug), Some("intent not matched")) + .expect_log(Some(LogLevel::Info), Some("intent not matched")) .expect_log( - Some(LogLevel::Debug), + Some(LogLevel::Info), Some("no default prompt target found, forwarding request to upstream llm"), ) - .expect_log(Some(LogLevel::Debug), None) - .expect_log(Some(LogLevel::Debug), None) + .expect_log(Some(LogLevel::Info), None) + .expect_log(Some(LogLevel::Info), None) .expect_set_buffer_bytes(Some(BufferType::HttpRequestBody), None) .execute_and_expect(ReturnType::None) .unwrap(); @@ -677,15 +677,15 @@ fn prompt_gateway_request_no_intent_match_default_target() { .expect_get_buffer_bytes(Some(BufferType::HttpCallResponseBody)) .returning(Some(&arch_fc_resp_str)) .expect_log(Some(LogLevel::Warn), None) + .expect_log(Some(LogLevel::Info), None) .expect_log(Some(LogLevel::Debug), None) - .expect_log(Some(LogLevel::Trace), None) - .expect_log(Some(LogLevel::Debug), Some("intent not matched")) + .expect_log(Some(LogLevel::Info), Some("intent not matched")) .expect_log( - Some(LogLevel::Debug), + Some(LogLevel::Info), Some("default prompt target found, forwarding request to default prompt target"), ) - .expect_log(Some(LogLevel::Trace), None) .expect_log(Some(LogLevel::Debug), None) + .expect_log(Some(LogLevel::Info), None) .expect_http_call( Some("arch_internal"), Some(vec![