From 00c543667d99b68a80b7e304c6be6ac0aaf2058d Mon Sep 17 00:00:00 2001
From: Salman Paracha <salmanparacha@MacBook-Pro-10.local>
Date: Thu, 4 Sep 2025 21:49:54 -0700
Subject: [PATCH] fixed integration tests and cleaned up logs

---
 arch/supervisord.conf                    |   2 +-
 crates/common/src/tokenizer.rs           |   2 +-
 crates/llm_gateway/src/stream_context.rs | 168 +++++++++++++----------
 crates/llm_gateway/tests/integration.rs  | 119 +++++++++-------
 4 files changed, 172 insertions(+), 119 deletions(-)

diff --git a/arch/supervisord.conf b/arch/supervisord.conf
index bec147cc..df25eea9 100644
--- a/arch/supervisord.conf
+++ b/arch/supervisord.conf
@@ -9,7 +9,7 @@ stdout_logfile_maxbytes=0
 stderr_logfile_maxbytes=0
 
 [program:envoy]
-command=/bin/sh -c "python /app/config_generator.py && envsubst < /etc/envoy/envoy.yaml > /etc/envoy.env_sub.yaml && envoy -c /etc/envoy.env_sub.yaml --component-log-level wasm:debug 2>&1 | tee /var/log//envoy.log"
+command=/bin/sh -c "python /app/config_generator.py && envsubst < /etc/envoy/envoy.yaml > /etc/envoy.env_sub.yaml && envoy -c /etc/envoy.env_sub.yaml --component-log-level wasm:debug --log-format '[%%Y-%%m-%%d %%T.%%e][%%l] %%v' 2>&1 | tee /var/log//envoy.log"
 stdout_logfile=/dev/stdout
 redirect_stderr=true
 stdout_logfile_maxbytes=0
diff --git a/crates/common/src/tokenizer.rs b/crates/common/src/tokenizer.rs
index ded885d5..9b11f639 100644
--- a/crates/common/src/tokenizer.rs
+++ b/crates/common/src/tokenizer.rs
@@ -2,7 +2,7 @@ use log::debug;
 
 #[allow(dead_code)]
 pub fn token_count(model_name: &str, text: &str) -> Result<usize, String> {
-    debug!("getting token count model={}", model_name);
+    debug!("TOKENIZER: computing token count for model={}", model_name);
     //HACK: add support for tokenizing mistral and other models
     //filed issue https://github.com/katanemo/arch/issues/222
 
diff --git a/crates/llm_gateway/src/stream_context.rs b/crates/llm_gateway/src/stream_context.rs
index 223e9a90..1465cf97 100644
--- a/crates/llm_gateway/src/stream_context.rs
+++ b/crates/llm_gateway/src/stream_context.rs
@@ -77,6 +77,16 @@ impl StreamContext {
             user_message: None,
         }
     }
+
+    /// Returns the appropriate request identifier for logging.
+    /// Uses request_id (from x-request-id header) when available, otherwise falls back to context_id.
+    fn request_identifier(&self) -> String {
+        self.request_id
+            .as_ref()
+            .filter(|id| !id.is_empty()) // Filter out empty strings
+            .map(|id| id.clone())
+            .unwrap_or_else(|| self.context_id.to_string())
+    }
     fn llm_provider(&self) -> &LlmProvider {
         self.llm_provider
             .as_ref()
@@ -111,7 +121,7 @@ impl StreamContext {
 
         info!(
             "[ARCHGW_REQ_ID:{}] PROVIDER_SELECTION: hint='{}' -> selected='{}'",
-            self.context_id,
+            self.request_identifier(),
             self.get_http_request_header(ARCH_PROVIDER_HINT_HEADER)
                 .unwrap_or("none".to_string()),
             self.llm_provider.as_ref().unwrap().name
@@ -186,7 +196,9 @@ impl StreamContext {
 
         info!(
             "[ARCHGW_REQ_ID:{}] TOKEN_COUNT: model='{}' input_tokens={}",
-            self.context_id, model, token_count
+            self.request_identifier(),
+            model,
+            token_count
         );
 
         // Record the token count to metrics.
@@ -198,7 +210,10 @@ impl StreamContext {
         if let Some(selector) = self.ratelimit_selector.take() {
             info!(
                 "[ARCHGW_REQ_ID:{}] RATELIMIT_CHECK: model='{}' selector='{}:{}'",
-                self.context_id, model, selector.key, selector.value
+                self.request_identifier(),
+                model,
+                selector.key,
+                selector.value
             );
             ratelimit::ratelimits(None).read().unwrap().check_limit(
                 model.to_owned(),
@@ -208,7 +223,8 @@ impl StreamContext {
         } else {
             debug!(
                 "[ARCHGW_REQ_ID:{}] RATELIMIT_SKIP: model='{}' (no selector)",
-                self.context_id, model
+                self.request_identifier(),
+                model
             );
         }
 
@@ -226,7 +242,8 @@ impl StreamContext {
                     let duration_ms = duration.as_millis();
                     info!(
                         "[ARCHGW_REQ_ID:{}] TIME_TO_FIRST_TOKEN: {}ms",
-                        self.context_id, duration_ms
+                        self.request_identifier(),
+                        duration_ms
                     );
                     self.ttft_duration = Some(duration);
                     self.metrics.time_to_first_token.record(duration_ms as u64);
@@ -234,7 +251,8 @@ impl StreamContext {
                 Err(e) => {
                     warn!(
                         "[ARCHGW_REQ_ID:{}] TIME_MEASUREMENT_ERROR: {:?}",
-                        self.context_id, e
+                        self.request_identifier(),
+                        e
                     );
                 }
             }
@@ -249,7 +267,9 @@ impl StreamContext {
                 let duration_ms = duration.as_millis();
                 info!(
                     "[ARCHGW_REQ_ID:{}] REQUEST_COMPLETE: latency={}ms tokens={}",
-                    self.context_id, duration_ms, self.response_tokens
+                    self.request_identifier(),
+                    duration_ms,
+                    self.response_tokens
                 );
                 // Record the latency to the latency histogram
                 self.metrics.request_latency.record(duration_ms as u64);
@@ -263,7 +283,7 @@ impl StreamContext {
 
                     info!(
                         "[ARCHGW_REQ_ID:{}] TOKEN_THROUGHPUT: time_per_token={}ms tokens_per_second={}",
-                        self.context_id,
+                        self.request_identifier(),
                         tpot,
                         1000 / tpot
                     );
@@ -322,14 +342,16 @@ impl StreamContext {
             let chunk_size = body_size;
             debug!(
                 "[ARCHGW_REQ_ID:{}] UPSTREAM_RESPONSE_CHUNK: streaming=true chunk_size={}",
-                self.context_id, chunk_size
+                self.request_identifier(),
+                chunk_size
             );
             let streaming_chunk = match self.get_http_response_body(0, chunk_size) {
                 Some(chunk) => chunk,
                 None => {
                     warn!(
                         "[ARCHGW_REQ_ID:{}] UPSTREAM_RESPONSE_ERROR: empty chunk, size={}",
-                        self.context_id, chunk_size
+                        self.request_identifier(),
+                        chunk_size
                     );
                     return Err(Action::Continue);
                 }
@@ -338,7 +360,7 @@ impl StreamContext {
             if streaming_chunk.len() != chunk_size {
                 warn!(
                     "[ARCHGW_REQ_ID:{}] UPSTREAM_RESPONSE_MISMATCH: expected={} actual={}",
-                    self.context_id,
+                    self.request_identifier(),
                     chunk_size,
                     streaming_chunk.len()
                 );
@@ -350,7 +372,8 @@ impl StreamContext {
             }
             debug!(
                 "[ARCHGW_REQ_ID:{}] UPSTREAM_RESPONSE_COMPLETE: streaming=false body_size={}",
-                self.context_id, body_size
+                self.request_identifier(),
+                body_size
             );
             match self.get_http_response_body(0, body_size) {
                 Some(body) => Ok(body),
@@ -365,7 +388,7 @@ impl StreamContext {
     fn debug_log_body(&self, body: &[u8]) {
         debug!(
             "[ARCHGW_REQ_ID:{}] UPSTREAM_RAW_RESPONSE: body_size={} content={}",
-            self.context_id,
+            self.request_identifier(),
             body.len(),
             String::from_utf8_lossy(body)
         );
@@ -378,7 +401,7 @@ impl StreamContext {
     ) -> Result<Vec<u8>, Action> {
         debug!(
             "[ARCHGW_REQ_ID:{}] STREAMING_PROCESS: provider_id={:?} chunk_size={}",
-            self.context_id,
+            self.request_identifier(),
             provider_id,
             body.len()
         );
@@ -420,7 +443,8 @@ impl StreamContext {
                                 if provider_response.is_final() {
                                     debug!(
                                         "[ARCHGW_REQ_ID:{}] STREAMING_FINAL_CHUNK: total_tokens={}",
-                                        self.context_id, self.response_tokens
+                                        self.request_identifier(),
+                                        self.response_tokens
                                     );
                                 }
 
@@ -429,7 +453,7 @@ impl StreamContext {
                                     self.response_tokens += estimated_tokens.max(1);
                                     debug!(
                                         "[ARCHGW_REQ_ID:{}] STREAMING_TOKEN_UPDATE: delta_chars={} estimated_tokens={} total_tokens={}",
-                                        self.context_id,
+                                        self.request_identifier(),
                                         content.len(),
                                         estimated_tokens.max(1),
                                         self.response_tokens
@@ -439,7 +463,8 @@ impl StreamContext {
                             Err(e) => {
                                 warn!(
                                     "[ARCHGW_REQ_ID:{}] STREAMING_CHUNK_ERROR: {}",
-                                    self.context_id, e
+                                    self.request_identifier(),
+                                    e
                                 );
                                 return Err(Action::Continue);
                             }
@@ -467,7 +492,7 @@ impl StreamContext {
     ) -> Result<Vec<u8>, Action> {
         info!(
             "[ARCHGW_REQ_ID:{}] NON_STREAMING_PROCESS: provider_id={:?} body_size={}",
-            self.context_id,
+            self.request_identifier(),
             provider_id,
             body.len()
         );
@@ -479,7 +504,7 @@ impl StreamContext {
                     Err(e) => {
                         warn!(
                             "[ARCHGW_REQ_ID:{}] UPSTREAM_RESPONSE_PARSE_ERROR: {} | body: {}",
-                            self.context_id,
+                            self.request_identifier(),
                             e,
                             String::from_utf8_lossy(body)
                         );
@@ -494,7 +519,7 @@ impl StreamContext {
             None => {
                 warn!(
                     "[ARCHGW_REQ_ID:{}] UPSTREAM_RESPONSE_ERROR: missing client_api",
-                    self.context_id
+                    self.request_identifier()
                 );
                 return Err(Action::Continue);
             }
@@ -506,7 +531,7 @@ impl StreamContext {
         {
             info!(
                 "[ARCHGW_REQ_ID:{}] RESPONSE_USAGE: prompt_tokens={} completion_tokens={} total_tokens={}",
-                self.context_id,
+                self.request_identifier(),
                 prompt_tokens,
                 completion_tokens,
                 total_tokens
@@ -515,7 +540,7 @@ impl StreamContext {
         } else {
             warn!(
                 "[ARCHGW_REQ_ID:{}] RESPONSE_USAGE: no usage information found",
-                self.context_id
+                self.request_identifier()
             );
         }
         // Serialize the normalized response back to JSON bytes
@@ -523,7 +548,7 @@ impl StreamContext {
             Ok(bytes) => {
                 debug!(
                     "[ARCHGW_REQ_ID:{}] CLIENT_RESPONSE_PAYLOAD: {}",
-                    self.context_id,
+                    self.request_identifier(),
                     String::from_utf8_lossy(&bytes)
                 );
                 Ok(bytes)
@@ -628,7 +653,9 @@ impl HttpContext for StreamContext {
     fn on_http_request_body(&mut self, body_size: usize, end_of_stream: bool) -> Action {
         debug!(
             "[ARCHGW_REQ_ID:{}] REQUEST_BODY_CHUNK: bytes={} end_stream={}",
-            self.context_id, body_size, end_of_stream
+            self.request_identifier(),
+            body_size,
+            end_of_stream
         );
 
         // Let the client send the gateway all the data before sending to the LLM_provider.
@@ -665,14 +692,14 @@ impl HttpContext for StreamContext {
             Some(the_client_api) => {
                 info!(
                     "[ARCHGW_REQ_ID:{}] CLIENT_REQUEST_RECEIVED: api={:?} body_size={}",
-                    self.context_id,
+                    self.request_identifier(),
                     the_client_api,
                     body_bytes.len()
                 );
 
                 debug!(
                     "[ARCHGW_REQ_ID:{}] CLIENT_REQUEST_PAYLOAD: {}",
-                    self.context_id,
+                    self.request_identifier(),
                     String::from_utf8_lossy(&body_bytes)
                 );
 
@@ -681,7 +708,7 @@ impl HttpContext for StreamContext {
                     Err(e) => {
                         warn!(
                             "[ARCHGW_REQ_ID:{}] CLIENT_REQUEST_PARSE_ERROR: {} | body: {}",
-                            self.context_id,
+                            self.request_identifier(),
                             e,
                             String::from_utf8_lossy(&body_bytes)
                         );
@@ -724,7 +751,7 @@ impl HttpContext for StreamContext {
                 } else {
                     warn!(
                         "[ARCHGW_REQ_ID:{}] MODEL_RESOLUTION_ERROR: no model specified | req_model='{}' provider='{}' config_model={:?}",
-                        self.context_id,
+                        self.request_identifier(),
                         model_requested,
                         self.llm_provider().name,
                         self.llm_provider().model
@@ -753,7 +780,7 @@ impl HttpContext for StreamContext {
 
         info!(
             "[ARCHGW_REQ_ID:{}] MODEL_RESOLUTION: req_model='{}' -> resolved_model='{}' provider='{}' streaming={}",
-            self.context_id,
+            self.request_identifier(),
             model_requested,
             resolved_model,
             self.llm_provider().name,
@@ -776,55 +803,56 @@ impl HttpContext for StreamContext {
         }
 
         // Convert chat completion request to llm provider specific request using provider interface
-        let serialized_body_bytes_upstream = match self.resolved_api.as_ref() {
-            Some(upstream) => {
-                info!(
+        let serialized_body_bytes_upstream =
+            match self.resolved_api.as_ref() {
+                Some(upstream) => {
+                    info!(
                     "[ARCHGW_REQ_ID:{}] UPSTREAM_TRANSFORM: client_api={:?} -> upstream_api={:?}",
-                    self.context_id, self.client_api, upstream
+                    self.request_identifier(), self.client_api, upstream
                 );
 
-                match ProviderRequestType::try_from((&deserialized_client_request, upstream)) {
-                    Ok(request) => {
-                        debug!(
-                            "[ARCHGW_REQ_ID:{}] UPSTREAM_REQUEST_PAYLOAD: {}",
-                            self.context_id,
-                            String::from_utf8_lossy(&request.to_bytes().unwrap_or_default())
-                        );
+                    match ProviderRequestType::try_from((&deserialized_client_request, upstream)) {
+                        Ok(request) => {
+                            debug!(
+                                "[ARCHGW_REQ_ID:{}] UPSTREAM_REQUEST_PAYLOAD: {}",
+                                self.request_identifier(),
+                                String::from_utf8_lossy(&request.to_bytes().unwrap_or_default())
+                            );
 
-                        match request.to_bytes() {
-                            Ok(bytes) => bytes,
-                            Err(e) => {
-                                warn!("Failed to serialize request body: {}", e);
-                                self.send_server_error(
-                                    ServerError::LogicError(format!(
-                                        "Request serialization error: {}",
-                                        e
-                                    )),
-                                    Some(StatusCode::BAD_REQUEST),
-                                );
-                                return Action::Pause;
+                            match request.to_bytes() {
+                                Ok(bytes) => bytes,
+                                Err(e) => {
+                                    warn!("Failed to serialize request body: {}", e);
+                                    self.send_server_error(
+                                        ServerError::LogicError(format!(
+                                            "Request serialization error: {}",
+                                            e
+                                        )),
+                                        Some(StatusCode::BAD_REQUEST),
+                                    );
+                                    return Action::Pause;
+                                }
                             }
                         }
-                    }
-                    Err(e) => {
-                        warn!("Failed to create provider request: {}", e);
-                        self.send_server_error(
-                            ServerError::LogicError(format!("Provider request error: {}", e)),
-                            Some(StatusCode::BAD_REQUEST),
-                        );
-                        return Action::Pause;
+                        Err(e) => {
+                            warn!("Failed to create provider request: {}", e);
+                            self.send_server_error(
+                                ServerError::LogicError(format!("Provider request error: {}", e)),
+                                Some(StatusCode::BAD_REQUEST),
+                            );
+                            return Action::Pause;
+                        }
                     }
                 }
-            }
-            None => {
-                warn!("No upstream API resolved");
-                self.send_server_error(
-                    ServerError::LogicError("No upstream API resolved".into()),
-                    Some(StatusCode::BAD_REQUEST),
-                );
-                return Action::Pause;
-            }
-        };
+                None => {
+                    warn!("No upstream API resolved");
+                    self.send_server_error(
+                        ServerError::LogicError("No upstream API resolved".into()),
+                        Some(StatusCode::BAD_REQUEST),
+                    );
+                    return Action::Pause;
+                }
+            };
 
         self.set_http_request_body(0, body_size, &serialized_body_bytes_upstream);
         Action::Continue
diff --git a/crates/llm_gateway/tests/integration.rs b/crates/llm_gateway/tests/integration.rs
index 82ae8322..24301111 100644
--- a/crates/llm_gateway/tests/integration.rs
+++ b/crates/llm_gateway/tests/integration.rs
@@ -31,8 +31,8 @@ fn request_headers_expectations(module: &mut Tester, http_context: i32) {
         )
         .returning(None)
         .expect_log(
-            Some(LogLevel::Debug),
-            Some("request received: llm provider hint: default, selected provider: open-ai-gpt-4"),
+            Some(LogLevel::Info),
+            None, // Dynamic request ID - could be context_id or x-request-id
         )
         .expect_add_header_map_value(
             Some(MapType::HttpRequestHeaders),
@@ -193,10 +193,7 @@ fn llm_gateway_successful_request_to_open_ai_chat_completions() {
 
     module
         .call_proxy_on_context_create(http_context, filter_context)
-        .expect_log(
-            Some(LogLevel::Trace),
-            Some("||| create_http_context called with context_id: 2 |||"),
-        )
+        .expect_log(Some(LogLevel::Trace), None)
         .execute_and_expect(ReturnType::None)
         .unwrap();
 
@@ -211,15 +208,19 @@ fn llm_gateway_successful_request_to_open_ai_chat_completions() {
             chat_completions_request_body.len() as i32,
             true,
         )
-        .expect_log(Some(LogLevel::Debug), None)
+        .expect_log(Some(LogLevel::Debug), None) // Dynamic request ID - REQUEST_BODY_CHUNK
         .expect_get_buffer_bytes(Some(BufferType::HttpRequestBody))
         .returning(Some(chat_completions_request_body))
-        .expect_log(Some(LogLevel::Info), None)
-        .expect_log(Some(LogLevel::Debug), None)
-        .expect_log(Some(LogLevel::Debug), None)
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID - CLIENT_REQUEST_RECEIVED
+        .expect_log(Some(LogLevel::Debug), None) // Dynamic request ID - CLIENT_REQUEST_PAYLOAD
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID - MODEL_RESOLUTION
+        .expect_log(Some(LogLevel::Debug), Some("TOKENIZER: computing token count for model=gpt-4"))
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID - TOKEN_COUNT
         .expect_metric_record("input_sequence_length", 21)
-        .expect_log(Some(LogLevel::Debug), None)
-        .expect_log(Some(LogLevel::Debug), None)
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID - RATELIMIT_CHECK
+        .expect_log(Some(LogLevel::Debug), Some("Checking limit for provider=gpt-4, with selector=Header { key: \"selector-key\", value: \"selector-value\" }, consuming tokens=21"))
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID - UPSTREAM_TRANSFORM
+        .expect_log(Some(LogLevel::Debug), None) // Dynamic request ID - UPSTREAM_REQUEST_PAYLOAD
         .expect_set_buffer_bytes(Some(BufferType::HttpRequestBody), None)
         .execute_and_expect(ReturnType::Action(Action::Continue))
         .unwrap();
@@ -263,15 +264,19 @@ fn llm_gateway_bad_request_to_open_ai_chat_completions() {
             incomplete_chat_completions_request_body.len() as i32,
             true,
         )
+        .expect_log(Some(LogLevel::Debug), None) // Dynamic request ID - REQUEST_BODY_CHUNK
         .expect_get_buffer_bytes(Some(BufferType::HttpRequestBody))
         .returning(Some(incomplete_chat_completions_request_body))
-        .expect_log(Some(LogLevel::Debug), None)
-        .expect_log(Some(LogLevel::Info), Some("on_http_request_body: provider: open-ai-gpt-4, model requested (in body): gpt-1, model selected: gpt-4"))
-        .expect_log(Some(LogLevel::Debug), Some("getting token count model=gpt-4"))
-        .expect_log(Some(LogLevel::Debug), Some("Recorded input token count: 13"))
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID - CLIENT_REQUEST_RECEIVED
+        .expect_log(Some(LogLevel::Debug), None) // Dynamic request ID - CLIENT_REQUEST_PAYLOAD
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID - MODEL_RESOLUTION
+        .expect_log(Some(LogLevel::Debug), Some("TOKENIZER: computing token count for model=gpt-4"))
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID - TOKEN_COUNT
         .expect_metric_record("input_sequence_length", 13)
-        .expect_log(Some(LogLevel::Debug), Some("Applying ratelimit for model: gpt-4"))
-        .expect_log(Some(LogLevel::Debug), Some(r#"Checking limit for provider=gpt-4, with selector=Header { key: "selector-key", value: "selector-value" }, consuming tokens=13"#))
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID - RATELIMIT_CHECK
+        .expect_log(Some(LogLevel::Debug), Some("Checking limit for provider=gpt-4, with selector=Header { key: \"selector-key\", value: \"selector-value\" }, consuming tokens=13"))
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID - RATELIMIT_CHECK
+        .expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:2] UPSTREAM_REQUEST_PAYLOAD: {\"messages\":[{\"role\":\"system\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}],\"model\":\"gpt-4\"}"))
         .expect_set_buffer_bytes(Some(BufferType::HttpRequestBody), None)
         .execute_and_expect(ReturnType::Action(Action::Continue))
         .unwrap();
@@ -322,16 +327,18 @@ fn llm_gateway_request_ratelimited() {
             chat_completions_request_body.len() as i32,
             true,
         )
-        .expect_log(Some(LogLevel::Debug), None)
+        .expect_log(Some(LogLevel::Debug), None) // Dynamic request ID)
         .expect_get_buffer_bytes(Some(BufferType::HttpRequestBody))
         .returning(Some(chat_completions_request_body))
         // The actual call is not important in this test, we just need to grab the token_id
-        .expect_log(Some(LogLevel::Info), None)
-        .expect_log(Some(LogLevel::Debug), None)
-        .expect_log(Some(LogLevel::Debug), None)
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
+        .expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:2] CLIENT_REQUEST_PAYLOAD: {\"messages\": [{\"role\": \"system\",\"content\": \"You are a helpful poetic assistant!, skilled in explaining complex programming concepts with creative flair. Be sure to be concise and to the point.\"},{\"role\": \"user\",\"content\": \"Compose a poem that explains the concept of recursion in programming. Compose a poem that explains the concept of recursion in programming. Compose a poem that explains the concept of recursion in programming. And also summarize it how a 4th graded would understand it. Compose a poem that explains the concept of recursion in programming. And also summarize it how a 4th graded would understand it.\"}],\"model\": \"gpt-4\"}"))
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
+        .expect_log(Some(LogLevel::Debug), Some("TOKENIZER: computing token count for model=gpt-4"))
+        .expect_log(Some(LogLevel::Info), None)// Dynamic request ID)
         .expect_metric_record("input_sequence_length", 107)
-        .expect_log(Some(LogLevel::Debug), Some("Applying ratelimit for model: gpt-4"))
-        .expect_log(Some(LogLevel::Debug), None)
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
+        .expect_log(Some(LogLevel::Debug), Some("Checking limit for provider=gpt-4, with selector=Header { key: \"selector-key\", value: \"selector-value\" }, consuming tokens=107"))
         .expect_log(Some(LogLevel::Warn), Some(r#"server error occurred: exceeded limit provider=gpt-4, selector=Header { key: "selector-key", value: "selector-value" }, tokens_used=107"#))
         .expect_send_local_response(
             Some(StatusCode::TOO_MANY_REQUESTS.as_u16().into()),
@@ -376,16 +383,21 @@ fn llm_gateway_request_not_ratelimited() {
             chat_completions_request_body.len() as i32,
             true,
         )
-        .expect_log(Some(LogLevel::Debug), None)
+        .expect_log(Some(LogLevel::Debug), None) // Dynamic request ID)
         .expect_get_buffer_bytes(Some(BufferType::HttpRequestBody))
         .returning(Some(chat_completions_request_body))
         // The actual call is not important in this test, we just need to grab the token_id
         .expect_log(Some(LogLevel::Info), None)
-        .expect_log(Some(LogLevel::Debug), Some("getting token count model=gpt-4"))
-        .expect_log(Some(LogLevel::Debug), Some("Recorded input token count: 29"))
+         // Dynamic request ID)
+        .expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:2] CLIENT_REQUEST_PAYLOAD: {\"model\":\"gpt-1\",\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}]}"))
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
+        .expect_log(Some(LogLevel::Debug), Some("TOKENIZER: computing token count for model=gpt-4"))
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
         .expect_metric_record("input_sequence_length", 29)
-        .expect_log(Some(LogLevel::Debug), Some("Applying ratelimit for model: gpt-4"))
-        .expect_log(Some(LogLevel::Debug), Some(r#"Checking limit for provider=gpt-4, with selector=Header { key: "selector-key", value: "selector-value" }, consuming tokens=29"#))
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
+        .expect_log(Some(LogLevel::Debug), Some("Checking limit for provider=gpt-4, with selector=Header { key: \"selector-key\", value: \"selector-value\" }, consuming tokens=29"))
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
+        .expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:2] UPSTREAM_REQUEST_PAYLOAD: {\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}],\"model\":\"gpt-4\"}"))
         .expect_set_buffer_bytes(Some(BufferType::HttpRequestBody), None)
         .execute_and_expect(ReturnType::Action(Action::Continue))
         .unwrap();
@@ -423,16 +435,20 @@ fn llm_gateway_override_model_name() {
             chat_completions_request_body.len() as i32,
             true,
         )
+        .expect_log(Some(LogLevel::Debug), None) // Dynamic request ID)
         .expect_get_buffer_bytes(Some(BufferType::HttpRequestBody))
         .returning(Some(chat_completions_request_body))
         // The actual call is not important in this test, we just need to grab the token_id
-        .expect_log(Some(LogLevel::Debug), None)
-        .expect_log(Some(LogLevel::Info), Some("on_http_request_body: provider: open-ai-gpt-4, model requested (in body): gpt-1, model selected: gpt-4"))
-        .expect_log(Some(LogLevel::Debug), Some("getting token count model=gpt-4"))
-        .expect_log(Some(LogLevel::Debug), Some("Recorded input token count: 29"))
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
+        .expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:2] CLIENT_REQUEST_PAYLOAD: {\"model\":\"gpt-1\",\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}]}"))
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
+        .expect_log(Some(LogLevel::Debug), Some("TOKENIZER: computing token count for model=gpt-4"))
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
         .expect_metric_record("input_sequence_length", 29)
-        .expect_log(Some(LogLevel::Debug), Some("Applying ratelimit for model: gpt-4"))
-        .expect_log(Some(LogLevel::Debug), Some(r#"Checking limit for provider=gpt-4, with selector=Header { key: "selector-key", value: "selector-value" }, consuming tokens=29"#))
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
+        .expect_log(Some(LogLevel::Debug), Some("Checking limit for provider=gpt-4, with selector=Header { key: \"selector-key\", value: \"selector-value\" }, consuming tokens=29"))
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
+        .expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:2] UPSTREAM_REQUEST_PAYLOAD: {\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}],\"model\":\"gpt-4\"}"))
         .expect_set_buffer_bytes(Some(BufferType::HttpRequestBody), None)
         .execute_and_expect(ReturnType::Action(Action::Continue))
         .unwrap();
@@ -470,19 +486,23 @@ fn llm_gateway_override_use_default_model() {
             chat_completions_request_body.len() as i32,
             true,
         )
-        .expect_log(Some(LogLevel::Debug), None)
+        .expect_log(Some(LogLevel::Debug), None) // Dynamic request ID)
         .expect_get_buffer_bytes(Some(BufferType::HttpRequestBody))
         .returning(Some(chat_completions_request_body))
         // The actual call is not important in this test, we just need to grab the token_id
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
+        .expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:2] CLIENT_REQUEST_PAYLOAD: {\"model\":\"gpt-1\",\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}]}"))
         .expect_log(
             Some(LogLevel::Info),
-            Some("on_http_request_body: provider: open-ai-gpt-4, model requested (in body): gpt-1, model selected: gpt-4"),
+            None // Dynamic request ID,
         )
-        .expect_log(Some(LogLevel::Debug), Some("getting token count model=gpt-4"))
-        .expect_log(Some(LogLevel::Debug), Some("Recorded input token count: 29"))
+        .expect_log(Some(LogLevel::Debug), Some("TOKENIZER: computing token count for model=gpt-4"))
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
         .expect_metric_record("input_sequence_length", 29)
-        .expect_log(Some(LogLevel::Debug), Some("Applying ratelimit for model: gpt-4"))
-        .expect_log(Some(LogLevel::Debug), Some(r#"Checking limit for provider=gpt-4, with selector=Header { key: "selector-key", value: "selector-value" }, consuming tokens=29"#))
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
+        .expect_log(Some(LogLevel::Debug), Some("Checking limit for provider=gpt-4, with selector=Header { key: \"selector-key\", value: \"selector-value\" }, consuming tokens=29"))
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
+        .expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:2] UPSTREAM_REQUEST_PAYLOAD: {\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}],\"model\":\"gpt-4\"}"))
         .expect_set_buffer_bytes(Some(BufferType::HttpRequestBody), None)
         .execute_and_expect(ReturnType::Action(Action::Continue))
         .unwrap();
@@ -520,16 +540,21 @@ fn llm_gateway_override_use_model_name_none() {
             chat_completions_request_body.len() as i32,
             true,
         )
+        .expect_log(Some(LogLevel::Debug), None) // Dynamic request ID)
         .expect_get_buffer_bytes(Some(BufferType::HttpRequestBody))
         .returning(Some(chat_completions_request_body))
         // The actual call is not important in this test, we just need to grab the token_id
-        .expect_log(Some(LogLevel::Debug), None)
-        .expect_log(Some(LogLevel::Info), Some("on_http_request_body: provider: open-ai-gpt-4, model requested (in body): none, model selected: gpt-4"))
-        .expect_log(Some(LogLevel::Debug), Some("getting token count model=gpt-4"))
-        .expect_log(Some(LogLevel::Debug), Some("Recorded input token count: 29"))
+        .expect_log(Some(LogLevel::Info), None)
+         // Dynamic request ID)
+        .expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:2] CLIENT_REQUEST_PAYLOAD: {\"model\":\"none\",\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}]}"))
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
+        .expect_log(Some(LogLevel::Debug), Some("TOKENIZER: computing token count for model=gpt-4"))
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
         .expect_metric_record("input_sequence_length", 29)
-        .expect_log(Some(LogLevel::Debug), Some("Applying ratelimit for model: gpt-4"))
-        .expect_log(Some(LogLevel::Debug), Some(r#"Checking limit for provider=gpt-4, with selector=Header { key: "selector-key", value: "selector-value" }, consuming tokens=29"#))
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
+        .expect_log(Some(LogLevel::Debug), Some("Checking limit for provider=gpt-4, with selector=Header { key: \"selector-key\", value: \"selector-value\" }, consuming tokens=29"))
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
+        .expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:2] UPSTREAM_REQUEST_PAYLOAD: {\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}],\"model\":\"gpt-4\"}"))
         .expect_set_buffer_bytes(Some(BufferType::HttpRequestBody), None)
         .execute_and_expect(ReturnType::Action(Action::Continue))
         .unwrap();