diff --git a/crates/llm_gateway/src/stream_context.rs b/crates/llm_gateway/src/stream_context.rs index 0453afe5..fa9964dd 100644 --- a/crates/llm_gateway/src/stream_context.rs +++ b/crates/llm_gateway/src/stream_context.rs @@ -434,14 +434,17 @@ impl StreamContext { } Ok(streaming_chunk) } else { + if body_size == 0 { + return Err(Action::Continue); + } debug!( "request_id={}: upstream response complete, streaming=false body_size={}", self.request_identifier(), body_size ); - match self.get_http_response_body(0, usize::MAX) { - Some(body) if !body.is_empty() => Ok(body), - _ => { + match self.get_http_response_body(0, body_size) { + Some(body) => Ok(body), + None => { warn!( "request_id={}: non streaming response body empty", self.request_identifier() @@ -1170,14 +1173,7 @@ impl HttpContext for StreamContext { } let current_time = get_current_time().unwrap(); - - // Non-streaming upstream responses may arrive in multiple chunks; wait for the - // full buffered body before parsing. - if !self.streaming_response && !end_of_stream { - return Action::Continue; - } - - if end_of_stream && body_size == 0 && self.streaming_response { + if end_of_stream && body_size == 0 { debug!( "request_id={}: response body complete, total_bytes={}", self.request_identifier(), @@ -1198,20 +1194,15 @@ impl HttpContext for StreamContext { ); // For error responses, forward the upstream error directly without parsing - if let Ok(body) = self.read_raw_response_body(body_size) { - if !body.is_empty() { + if body_size > 0 { + if let Ok(body) = self.read_raw_response_body(body_size) { debug!( "request_id={}: upstream error body: {}", self.request_identifier(), String::from_utf8_lossy(&body) ); // Forward the error response as-is - let replace_size = if body_size > 0 { - body_size - } else { - body.len() - }; - self.set_http_response_body(0, replace_size, &body); + self.set_http_response_body(0, body_size, &body); } } return Action::Continue; @@ -1241,19 +1232,6 @@ impl HttpContext for StreamContext { Err(action) => return action, }; - if !self.streaming_response && body.is_empty() { - if end_of_stream { - self.handle_end_of_request_metrics_and_traces(current_time); - } - return Action::Continue; - } - - let replace_size = if body_size > 0 { - body_size - } else { - body.len() - }; - debug!( "request_id={}: upstream raw response, body_size={} content={}", self.request_identifier(), @@ -1265,14 +1243,14 @@ impl HttpContext for StreamContext { if self.streaming_response { match self.handle_streaming_response(&body, provider_id) { Ok(serialized_body) => { - self.set_http_response_body(0, replace_size, &serialized_body); + self.set_http_response_body(0, body_size, &serialized_body); } Err(action) => return action, } } else { match self.handle_non_streaming_response(&body, provider_id) { Ok(serialized_body) => { - self.set_http_response_body(0, replace_size, &serialized_body); + self.set_http_response_body(0, body_size, &serialized_body); } Err(action) => return action, } diff --git a/demos/shared/test_runner/run_demo_tests.sh b/demos/shared/test_runner/run_demo_tests.sh index 44a43327..09840814 100644 --- a/demos/shared/test_runner/run_demo_tests.sh +++ b/demos/shared/test_runner/run_demo_tests.sh @@ -19,8 +19,7 @@ run_hurl_with_retries() { local max_attempts=1 local attempt=1 - if [ "$demo_name" = "llm_routing/preference_based_routing" ] \ - || [ "$demo_name" = "advanced/currency_exchange" ]; then + if [ "$demo_name" = "llm_routing/preference_based_routing" ]; then max_attempts=3 fi diff --git a/docs/source/resources/configuration_reference.rst b/docs/source/resources/configuration_reference.rst index 8e040f75..298f143d 100644 --- a/docs/source/resources/configuration_reference.rst +++ b/docs/source/resources/configuration_reference.rst @@ -7,6 +7,29 @@ The following is a complete reference of the ``plano_config.yml`` that controls the Plano gateway. This where you enable capabilities like routing to upstream LLm providers, defining prompt_targets where prompts get routed to, apply guardrails, and enable critical agent observability features. +Model provider headers +---------------------- + +Each entry under ``model_providers`` (or the legacy ``llm_providers`` alias) may include a ``headers`` map of extra +HTTP headers that Plano adds to upstream LLM requests. Plano applies these headers after it sets authentication from +``access_key`` or ``passthrough_auth``, so you can supply provider-specific metadata without replacing the configured +credentials. + +- **Type:** map of strings (header name → value) +- **Optional:** yes +- **Common uses:** required ``User-Agent`` values, organization or account identifiers, or other headers some APIs expect + +.. code-block:: yaml + + model_providers: + - model: moonshotai/kimi-for-coding + access_key: $MOONSHOTAI_API_KEY + base_url: https://api.kimi.com/coding/v1 + headers: + User-Agent: "KimiCLI/1.3" + +The example below includes this and other provider options in context. + .. literalinclude:: includes/plano_config_full_reference.yaml :language: yaml :linenos: