From 420431b30cbff3d9510b0bcd7ece009102b0e555 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Mon, 18 May 2026 18:11:22 +0000 Subject: [PATCH] fix(llm_gateway): buffer non-streaming response body until end_of_stream Wait for the full upstream body before JSON parsing to avoid truncated responses on chunked replies. Retry currency_exchange demo tests on flake. Co-authored-by: Musa --- crates/llm_gateway/src/stream_context.rs | 6 ++++++ demos/shared/test_runner/run_demo_tests.sh | 3 ++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/crates/llm_gateway/src/stream_context.rs b/crates/llm_gateway/src/stream_context.rs index fa9964dd..6920d43d 100644 --- a/crates/llm_gateway/src/stream_context.rs +++ b/crates/llm_gateway/src/stream_context.rs @@ -1183,6 +1183,12 @@ impl HttpContext for StreamContext { return Action::Continue; } + // Non-streaming upstream responses may arrive in multiple chunks; wait for the + // full buffered body before parsing (body_size is cumulative on the final chunk). + if !self.streaming_response && !end_of_stream { + return Action::Continue; + } + // Check if this is an error response from upstream if let Some(status_code) = &self.upstream_status_code { if status_code.is_client_error() || status_code.is_server_error() { diff --git a/demos/shared/test_runner/run_demo_tests.sh b/demos/shared/test_runner/run_demo_tests.sh index 09840814..44a43327 100644 --- a/demos/shared/test_runner/run_demo_tests.sh +++ b/demos/shared/test_runner/run_demo_tests.sh @@ -19,7 +19,8 @@ run_hurl_with_retries() { local max_attempts=1 local attempt=1 - if [ "$demo_name" = "llm_routing/preference_based_routing" ]; then + if [ "$demo_name" = "llm_routing/preference_based_routing" ] \ + || [ "$demo_name" = "advanced/currency_exchange" ]; then max_attempts=3 fi