From 9570b167dbb3d75c2e1fa03d2cf80c8bcef65a54 Mon Sep 17 00:00:00 2001
From: Adil Hafeez <adil.hafeez@gmail.com>
Date: Tue, 14 Jan 2025 11:56:10 -0800
Subject: [PATCH] fix tracing

---
 crates/common/src/tokenizer.rs           |  4 ++--
 crates/llm_gateway/src/stream_context.rs | 17 +++++++++++------
 2 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/crates/common/src/tokenizer.rs b/crates/common/src/tokenizer.rs
index aa0870f2..c424e344 100644
--- a/crates/common/src/tokenizer.rs
+++ b/crates/common/src/tokenizer.rs
@@ -1,4 +1,4 @@
-use log::debug;
+use log::trace;
 
 #[derive(thiserror::Error, Debug, PartialEq, Eq)]
 #[allow(dead_code)]
@@ -9,7 +9,7 @@ pub enum Error {
 
 #[allow(dead_code)]
 pub fn token_count(model_name: &str, text: &str) -> Result<usize, Error> {
-    debug!("getting token count model={}", model_name);
+    trace!("getting token count model={}", model_name);
     // Consideration: is it more expensive to instantiate the BPE object every time, or to contend the singleton?
     let bpe = tiktoken_rs::get_bpe_from_model(model_name).map_err(|_| Error::UnknownModel {
         model_name: model_name.to_string(),
diff --git a/crates/llm_gateway/src/stream_context.rs b/crates/llm_gateway/src/stream_context.rs
index f0b26ffe..bb36816f 100644
--- a/crates/llm_gateway/src/stream_context.rs
+++ b/crates/llm_gateway/src/stream_context.rs
@@ -312,9 +312,11 @@ impl HttpContext for StreamContext {
     }
 
     fn on_http_response_body(&mut self, body_size: usize, end_of_stream: bool) -> Action {
-        debug!(
+        trace!(
             "on_http_response_body [S={}] bytes={} end_stream={}",
-            self.context_id, body_size, end_of_stream
+            self.context_id,
+            body_size,
+            end_of_stream
         );
 
         if !self.is_chat_completions_request {
@@ -398,9 +400,10 @@ impl HttpContext for StreamContext {
         let body = if self.streaming_response {
             let chunk_start = 0;
             let chunk_size = body_size;
-            debug!(
+            trace!(
                 "streaming response reading, {}..{}",
-                chunk_start, chunk_size
+                chunk_start,
+                chunk_size
             );
             let streaming_chunk = match self.get_http_response_body(0, chunk_size) {
                 Some(chunk) => chunk,
@@ -520,9 +523,11 @@ impl HttpContext for StreamContext {
             }
         }
 
-        debug!(
+        trace!(
             "recv [S={}] total_tokens={} end_stream={}",
-            self.context_id, self.response_tokens, end_of_stream
+            self.context_id,
+            self.response_tokens,
+            end_of_stream
         );
 
         Action::Continue