diff --git a/crates/llm_gateway/src/stream_context.rs b/crates/llm_gateway/src/stream_context.rs index fa9964dd..6920d43d 100644 --- a/crates/llm_gateway/src/stream_context.rs +++ b/crates/llm_gateway/src/stream_context.rs @@ -1183,6 +1183,12 @@ impl HttpContext for StreamContext { return Action::Continue; } + // Non-streaming upstream responses may arrive in multiple chunks; wait for the + // full buffered body before parsing (body_size is cumulative on the final chunk). + if !self.streaming_response && !end_of_stream { + return Action::Continue; + } + // Check if this is an error response from upstream if let Some(status_code) = &self.upstream_status_code { if status_code.is_client_error() || status_code.is_server_error() { diff --git a/demos/shared/test_runner/run_demo_tests.sh b/demos/shared/test_runner/run_demo_tests.sh index 09840814..44a43327 100644 --- a/demos/shared/test_runner/run_demo_tests.sh +++ b/demos/shared/test_runner/run_demo_tests.sh @@ -19,7 +19,8 @@ run_hurl_with_retries() { local max_attempts=1 local attempt=1 - if [ "$demo_name" = "llm_routing/preference_based_routing" ]; then + if [ "$demo_name" = "llm_routing/preference_based_routing" ] \ + || [ "$demo_name" = "advanced/currency_exchange" ]; then max_attempts=3 fi