diff --git a/crates/hermesllm/src/apis/openai.rs b/crates/hermesllm/src/apis/openai.rs
index bd9b3217..f2534961 100644
--- a/crates/hermesllm/src/apis/openai.rs
+++ b/crates/hermesllm/src/apis/openai.rs
@@ -349,6 +349,7 @@ pub struct ChatCompletionsResponse {
     pub choices: Vec<Choice>,
     pub usage: Usage,
     pub system_fingerprint: Option<String>,
+    pub service_tier: Option<String>,
 }
 
 /// Finish reason for completion
@@ -1160,4 +1161,84 @@ mod tests {
         let invalid_result: Result<ToolChoice, _> = serde_json::from_value(json!("invalid"));
         assert!(invalid_result.is_err());
     }
+
+    #[test]
+    fn test_chat_completions_response_with_service_tier() {
+        // Test that ChatCompletionsResponse can deserialize OpenAI responses with service_tier field
+        let json_response = r#"{
+            "id": "chatcmpl-CAJc2Df6QCc7Mv3RP0Cf2xlbDV1x2",
+            "object": "chat.completion",
+            "created": 1756574706,
+            "model": "gpt-4o-2024-08-06",
+            "choices": [{
+                "index": 0,
+                "message": {
+                    "role": "assistant",
+                    "content": "Test response content",
+                    "annotations": []
+                },
+                "finish_reason": "stop"
+            }],
+            "usage": {
+                "prompt_tokens": 65,
+                "completion_tokens": 184,
+                "total_tokens": 249,
+                "prompt_tokens_details": {
+                    "cached_tokens": 0,
+                    "audio_tokens": 0
+                },
+                "completion_tokens_details": {
+                    "reasoning_tokens": 0,
+                    "audio_tokens": 0,
+                    "accepted_prediction_tokens": 0,
+                    "rejected_prediction_tokens": 0
+                }
+            },
+            "service_tier": "default",
+            "system_fingerprint": "fp_f33640a400"
+        }"#;
+
+        let response: ChatCompletionsResponse = serde_json::from_str(json_response).unwrap();
+
+        assert_eq!(response.id, "chatcmpl-CAJc2Df6QCc7Mv3RP0Cf2xlbDV1x2");
+        assert_eq!(response.object, "chat.completion");
+        assert_eq!(response.created, 1756574706);
+        assert_eq!(response.model, "gpt-4o-2024-08-06");
+        assert_eq!(response.service_tier, Some("default".to_string()));
+        assert_eq!(response.system_fingerprint, Some("fp_f33640a400".to_string()));
+        assert_eq!(response.choices.len(), 1);
+        assert_eq!(response.usage.prompt_tokens, 65);
+        assert_eq!(response.usage.completion_tokens, 184);
+        assert_eq!(response.usage.total_tokens, 249);
+    }
+
+    #[test]
+    fn test_chat_completions_response_without_service_tier() {
+        // Test that ChatCompletionsResponse can deserialize responses without service_tier (backward compatibility)
+        let json_response = r#"{
+            "id": "chatcmpl-123",
+            "object": "chat.completion",
+            "created": 1234567890,
+            "model": "gpt-4",
+            "choices": [{
+                "index": 0,
+                "message": {
+                    "role": "assistant",
+                    "content": "Test response"
+                },
+                "finish_reason": "stop"
+            }],
+            "usage": {
+                "prompt_tokens": 10,
+                "completion_tokens": 20,
+                "total_tokens": 30
+            }
+        }"#;
+
+        let response: ChatCompletionsResponse = serde_json::from_str(json_response).unwrap();
+
+        assert_eq!(response.id, "chatcmpl-123");
+        assert_eq!(response.service_tier, None); // Should be None when not present
+        assert_eq!(response.system_fingerprint, None);
+    }
 }
diff --git a/crates/hermesllm/src/clients/transformer.rs b/crates/hermesllm/src/clients/transformer.rs
index 0e8c3705..8170a53d 100644
--- a/crates/hermesllm/src/clients/transformer.rs
+++ b/crates/hermesllm/src/clients/transformer.rs
@@ -211,6 +211,7 @@ impl TryFrom<MessagesResponse> for ChatCompletionsResponse {
             choices: vec![choice],
             usage,
             system_fingerprint: None,
+            service_tier: None,
         })
     }
 }
diff --git a/crates/llm_gateway/src/stream_context.rs b/crates/llm_gateway/src/stream_context.rs
index 6c0977f0..f4dc17c9 100644
--- a/crates/llm_gateway/src/stream_context.rs
+++ b/crates/llm_gateway/src/stream_context.rs
@@ -446,7 +446,6 @@ impl StreamContext {
         } else {
             warn!("No usage information found in response");
         }
-
         // Serialize the normalized response back to JSON bytes
         match serde_json::to_vec(&response) {
             Ok(bytes) => Ok(bytes),
@@ -743,8 +742,7 @@ impl HttpContext for StreamContext {
             if let Some(supported_api) = supported_api_opt {
                 match self.handle_streaming_response(&body, supported_api, provider_id) {
                     Ok(serialized_body) => {
-                        // Pass-through: let the original streaming response continue unchanged
-                        self.set_http_response_body(0, serialized_body.len(), &serialized_body);
+                        self.set_http_response_body(0, body.len(), &serialized_body);
                     }
                     Err(action) => return action,
                 }
@@ -755,7 +753,7 @@ impl HttpContext for StreamContext {
             if let Some(supported_api) = supported_api_opt {
                 match self.handle_non_streaming_response(&body, supported_api, provider_id) {
                     Ok(serialized_body) => {
-                        self.set_http_response_body(0, serialized_body.len(), &serialized_body);
+                        self.set_http_response_body(0, body.len(), &serialized_body);
                     }
                     Err(action) => return action,
                 }