diff --git a/crates/llm_gateway/src/stream_context.rs b/crates/llm_gateway/src/stream_context.rs
index 0453afe5..fa9964dd 100644
--- a/crates/llm_gateway/src/stream_context.rs
+++ b/crates/llm_gateway/src/stream_context.rs
@@ -434,14 +434,17 @@ impl StreamContext {
             }
             Ok(streaming_chunk)
         } else {
+            if body_size == 0 {
+                return Err(Action::Continue);
+            }
             debug!(
                 "request_id={}: upstream response complete, streaming=false body_size={}",
                 self.request_identifier(),
                 body_size
             );
-            match self.get_http_response_body(0, usize::MAX) {
-                Some(body) if !body.is_empty() => Ok(body),
-                _ => {
+            match self.get_http_response_body(0, body_size) {
+                Some(body) => Ok(body),
+                None => {
                     warn!(
                         "request_id={}: non streaming response body empty",
                         self.request_identifier()
@@ -1170,14 +1173,7 @@ impl HttpContext for StreamContext {
         }
 
         let current_time = get_current_time().unwrap();
-
-        // Non-streaming upstream responses may arrive in multiple chunks; wait for the
-        // full buffered body before parsing.
-        if !self.streaming_response && !end_of_stream {
-            return Action::Continue;
-        }
-
-        if end_of_stream && body_size == 0 && self.streaming_response {
+        if end_of_stream && body_size == 0 {
             debug!(
                 "request_id={}: response body complete, total_bytes={}",
                 self.request_identifier(),
@@ -1198,20 +1194,15 @@ impl HttpContext for StreamContext {
                 );
 
                 // For error responses, forward the upstream error directly without parsing
-                if let Ok(body) = self.read_raw_response_body(body_size) {
-                    if !body.is_empty() {
+                if body_size > 0 {
+                    if let Ok(body) = self.read_raw_response_body(body_size) {
                         debug!(
                             "request_id={}: upstream error body: {}",
                             self.request_identifier(),
                             String::from_utf8_lossy(&body)
                         );
                         // Forward the error response as-is
-                        let replace_size = if body_size > 0 {
-                            body_size
-                        } else {
-                            body.len()
-                        };
-                        self.set_http_response_body(0, replace_size, &body);
+                        self.set_http_response_body(0, body_size, &body);
                     }
                 }
                 return Action::Continue;
@@ -1241,19 +1232,6 @@ impl HttpContext for StreamContext {
             Err(action) => return action,
         };
 
-        if !self.streaming_response && body.is_empty() {
-            if end_of_stream {
-                self.handle_end_of_request_metrics_and_traces(current_time);
-            }
-            return Action::Continue;
-        }
-
-        let replace_size = if body_size > 0 {
-            body_size
-        } else {
-            body.len()
-        };
-
         debug!(
             "request_id={}: upstream raw response, body_size={} content={}",
             self.request_identifier(),
@@ -1265,14 +1243,14 @@ impl HttpContext for StreamContext {
         if self.streaming_response {
             match self.handle_streaming_response(&body, provider_id) {
                 Ok(serialized_body) => {
-                    self.set_http_response_body(0, replace_size, &serialized_body);
+                    self.set_http_response_body(0, body_size, &serialized_body);
                 }
                 Err(action) => return action,
             }
         } else {
             match self.handle_non_streaming_response(&body, provider_id) {
                 Ok(serialized_body) => {
-                    self.set_http_response_body(0, replace_size, &serialized_body);
+                    self.set_http_response_body(0, body_size, &serialized_body);
                 }
                 Err(action) => return action,
             }
diff --git a/demos/shared/test_runner/run_demo_tests.sh b/demos/shared/test_runner/run_demo_tests.sh
index 44a43327..09840814 100644
--- a/demos/shared/test_runner/run_demo_tests.sh
+++ b/demos/shared/test_runner/run_demo_tests.sh
@@ -19,8 +19,7 @@ run_hurl_with_retries() {
   local max_attempts=1
   local attempt=1
 
-  if [ "$demo_name" = "llm_routing/preference_based_routing" ] \
-    || [ "$demo_name" = "advanced/currency_exchange" ]; then
+  if [ "$demo_name" = "llm_routing/preference_based_routing" ]; then
     max_attempts=3
   fi
 
diff --git a/docs/source/resources/configuration_reference.rst b/docs/source/resources/configuration_reference.rst
index 8e040f75..298f143d 100644
--- a/docs/source/resources/configuration_reference.rst
+++ b/docs/source/resources/configuration_reference.rst
@@ -7,6 +7,29 @@ The following is a complete reference of the ``plano_config.yml`` that controls
 the Plano gateway. This where you enable capabilities like routing to upstream LLm providers, defining prompt_targets
 where prompts get routed to, apply guardrails, and enable critical agent observability features.
 
+Model provider headers
+----------------------
+
+Each entry under ``model_providers`` (or the legacy ``llm_providers`` alias) may include a ``headers`` map of extra
+HTTP headers that Plano adds to upstream LLM requests. Plano applies these headers after it sets authentication from
+``access_key`` or ``passthrough_auth``, so you can supply provider-specific metadata without replacing the configured
+credentials.
+
+- **Type:** map of strings (header name → value)
+- **Optional:** yes
+- **Common uses:** required ``User-Agent`` values, organization or account identifiers, or other headers some APIs expect
+
+.. code-block:: yaml
+
+    model_providers:
+      - model: moonshotai/kimi-for-coding
+        access_key: $MOONSHOTAI_API_KEY
+        base_url: https://api.kimi.com/coding/v1
+        headers:
+          User-Agent: "KimiCLI/1.3"
+
+The example below includes this and other provider options in context.
+
 .. literalinclude:: includes/plano_config_full_reference.yaml
     :language: yaml
     :linenos: