diff --git a/crates/llm_gateway/src/filter_context.rs b/crates/llm_gateway/src/filter_context.rs
index 4e44a9ff..56af01b5 100644
--- a/crates/llm_gateway/src/filter_context.rs
+++ b/crates/llm_gateway/src/filter_context.rs
@@ -9,7 +9,7 @@ use common::llm_providers::LlmProviders;
 use common::ratelimit;
 use common::stats::Gauge;
 use common::tracing::TraceData;
-use log::debug;
+use log::trace;
 use log::warn;
 use proxy_wasm::traits::*;
 use proxy_wasm::types::*;
@@ -103,10 +103,8 @@ impl RootContext for FilterContext {
     fn on_tick(&mut self) {
         let _ = self.traces_queue.try_lock().map(|mut traces_queue| {
             while let Some(trace) = traces_queue.pop_front() {
-                debug!("trace received: {:?}", trace);
-
                 let trace_str = serde_json::to_string(&trace).unwrap();
-                debug!("trace: {}", trace_str);
+                trace!("trace details: {}", trace_str);
                 let call_args = CallArgs::new(
                     OTEL_COLLECTOR_HTTP,
                     OTEL_POST_PATH,
@@ -139,7 +137,7 @@ impl Context for FilterContext {
         _body_size: usize,
         _num_trailers: usize,
     ) {
-        debug!(
+        trace!(
             "||| on_http_call_response called with token_id: {:?} |||",
             token_id
         );
@@ -151,7 +149,7 @@ impl Context for FilterContext {
             .expect("invalid token_id");
 
         if let Some(status) = self.get_http_call_response_header(":status") {
-            debug!("trace response status: {:?}", status);
+            trace!("trace response status: {:?}", status);
         };
     }
 }
diff --git a/crates/llm_gateway/src/stream_context.rs b/crates/llm_gateway/src/stream_context.rs
index 817bcefb..a0714e80 100644
--- a/crates/llm_gateway/src/stream_context.rs
+++ b/crates/llm_gateway/src/stream_context.rs
@@ -153,7 +153,7 @@ impl StreamContext {
         self.metrics
             .input_sequence_length
             .record(token_count as u64);
-        log::debug!("Recorded input token count: {}", token_count);
+        trace!("Recorded input token count: {}", token_count);
 
         // Check if rate limiting needs to be applied.
         if let Some(selector) = self.ratelimit_selector.take() {
@@ -164,7 +164,7 @@ impl StreamContext {
                 NonZero::new(token_count as u32).unwrap(),
             )?;
         } else {
-            log::debug!("No rate limit applied for model: {}", model);
+            trace!("No rate limit applied for model: {}", model);
         }
 
         Ok(())
@@ -331,7 +331,7 @@ impl HttpContext for StreamContext {
                 Ok(duration) => {
                     // Convert the duration to milliseconds
                     let duration_ms = duration.as_millis();
-                    debug!("Total latency: {} milliseconds", duration_ms);
+                    debug!("request latency: {}ms", duration_ms);
                     // Record the latency to the latency histogram
                     self.metrics.request_latency.record(duration_ms as u64);
 
@@ -339,11 +339,14 @@ impl HttpContext for StreamContext {
                         // Compute the time per output token
                         let tpot = duration_ms as u64 / self.response_tokens as u64;
 
-                        debug!("Time per output token: {} milliseconds", tpot);
                         // Record the time per output token
                         self.metrics.time_per_output_token.record(tpot);
 
-                        debug!("Tokens per second: {}", 1000 / tpot);
+                        trace!(
+                            "time per token: {}ms, tokens per second: {}",
+                            tpot,
+                            1000 / tpot
+                        );
                         // Record the tokens per second
                         self.metrics.tokens_per_second.record(1000 / tpot);
                     }
@@ -500,7 +503,7 @@ impl HttpContext for StreamContext {
                 match current_time.duration_since(self.start_time) {
                     Ok(duration) => {
                         let duration_ms = duration.as_millis();
-                        debug!("Time to First Token (TTFT): {} milliseconds", duration_ms);
+                        debug!("time to first token: {}ms", duration_ms);
                         self.ttft_duration = Some(duration);
                         self.metrics.time_to_first_token.record(duration_ms as u64);
                     }
diff --git a/crates/prompt_gateway/src/stream_context.rs b/crates/prompt_gateway/src/stream_context.rs
index 39d144f1..fc35877f 100644
--- a/crates/prompt_gateway/src/stream_context.rs
+++ b/crates/prompt_gateway/src/stream_context.rs
@@ -14,7 +14,7 @@ use common::http::{CallArgs, Client};
 use common::stats::Gauge;
 use derivative::Derivative;
 use http::StatusCode;
-use log::{debug, info, warn};
+use log::{debug, warn};
 use proxy_wasm::traits::*;
 use serde_yaml::Value;
 use std::cell::RefCell;
@@ -263,7 +263,7 @@ impl StreamContext {
             );
         }
 
-        // update prompt target name from the tool call
+        // update prompt target name from the tool call response
         callout_context.prompt_target_name =
             Some(self.tool_calls.as_ref().unwrap()[0].function.name.clone());
 
@@ -364,7 +364,6 @@ impl StreamContext {
         let http_status = self
             .get_http_call_response_header(":status")
             .unwrap_or(StatusCode::OK.as_str().to_string());
-        debug!("api_call_response_handler: http_status: {}", http_status);
         if http_status != StatusCode::OK.as_str() {
             warn!(
                 "api server responded with non 2xx status code: {}",
@@ -446,22 +445,20 @@ impl StreamContext {
     fn get_system_prompt(&self, prompt_target: Option<PromptTarget>) -> Option<String> {
         match prompt_target {
             None => self.system_prompt.as_ref().clone(),
-            Some(prompt_target) => prompt_target.system_prompt,
+            Some(prompt_target) => match prompt_target.system_prompt {
+                None => self.system_prompt.as_ref().clone(),
+                Some(system_prompt) => Some(system_prompt),
+            },
         }
     }
 
-    fn filter_out_arch_messages(&self, messages: &Vec<Message>) -> Vec<Message> {
+    fn filter_out_arch_messages(&self, messages: &[Message]) -> Vec<Message> {
         messages
-            .into_iter()
+            .iter()
             .filter(|m| {
-                if m.role == TOOL_ROLE
+                !(m.role == TOOL_ROLE
                     || m.content.is_none()
-                    || (m.tool_calls.is_some() && !m.tool_calls.as_ref().unwrap().is_empty())
-                {
-                    true
-                } else {
-                    false
-                }
+                    || (m.tool_calls.is_some() && !m.tool_calls.as_ref().unwrap().is_empty()))
             })
             .cloned()
             .collect()
@@ -470,7 +467,6 @@ impl StreamContext {
     fn construct_llm_messages(&mut self, callout_context: &StreamCallContext) -> Vec<Message> {
         let mut messages: Vec<Message> = Vec::new();
 
-        info!("prompt target: {:?}", callout_context.prompt_target_name);
         // add system prompt
         let system_prompt = match callout_context.prompt_target_name.as_ref() {
             None => self.system_prompt.as_ref().clone(),
@@ -479,8 +475,6 @@ impl StreamContext {
             }
         };
 
-        info!("system_prompt: {:?}", system_prompt);
-
         if system_prompt.is_some() {
             let system_prompt_message = Message {
                 role: SYSTEM_ROLE.to_string(),