diff --git a/crates/common/src/tokenizer.rs b/crates/common/src/tokenizer.rs index aa0870f2..c424e344 100644 --- a/crates/common/src/tokenizer.rs +++ b/crates/common/src/tokenizer.rs @@ -1,4 +1,4 @@ -use log::debug; +use log::trace; #[derive(thiserror::Error, Debug, PartialEq, Eq)] #[allow(dead_code)] @@ -9,7 +9,7 @@ pub enum Error { #[allow(dead_code)] pub fn token_count(model_name: &str, text: &str) -> Result { - debug!("getting token count model={}", model_name); + trace!("getting token count model={}", model_name); // Consideration: is it more expensive to instantiate the BPE object every time, or to contend the singleton? let bpe = tiktoken_rs::get_bpe_from_model(model_name).map_err(|_| Error::UnknownModel { model_name: model_name.to_string(), diff --git a/crates/llm_gateway/src/stream_context.rs b/crates/llm_gateway/src/stream_context.rs index 3c15366d..a0714e80 100644 --- a/crates/llm_gateway/src/stream_context.rs +++ b/crates/llm_gateway/src/stream_context.rs @@ -406,9 +406,10 @@ impl HttpContext for StreamContext { let body = if self.streaming_response { let chunk_start = 0; let chunk_size = body_size; - debug!( + trace!( "streaming response reading, {}..{}", - chunk_start, chunk_size + chunk_start, + chunk_size ); let streaming_chunk = match self.get_http_response_body(0, chunk_size) { Some(chunk) => chunk, @@ -531,9 +532,11 @@ impl HttpContext for StreamContext { } } - debug!( + trace!( "recv [S={}] total_tokens={} end_stream={}", - self.context_id, self.response_tokens, end_of_stream + self.context_id, + self.response_tokens, + end_of_stream ); Action::Continue