fix(llm_gateway): buffer non-streaming response body until end_of_stream

Wait for the full upstream body before JSON parsing to avoid truncated
responses on chunked replies. Retry currency_exchange demo tests on flake.

Co-authored-by: Musa <musa@spherrrical.dev>
This commit is contained in:
Cursor Agent 2026-05-18 18:11:22 +00:00
parent e5f0cee39e
commit 420431b30c
No known key found for this signature in database
2 changed files with 8 additions and 1 deletions

View file

@ -1183,6 +1183,12 @@ impl HttpContext for StreamContext {
return Action::Continue;
}
// Non-streaming upstream responses may arrive in multiple chunks; wait for the
// full buffered body before parsing (body_size is cumulative on the final chunk).
if !self.streaming_response && !end_of_stream {
return Action::Continue;
}
// Check if this is an error response from upstream
if let Some(status_code) = &self.upstream_status_code {
if status_code.is_client_error() || status_code.is_server_error() {

View file

@ -19,7 +19,8 @@ run_hurl_with_retries() {
local max_attempts=1
local attempt=1
if [ "$demo_name" = "llm_routing/preference_based_routing" ]; then
if [ "$demo_name" = "llm_routing/preference_based_routing" ] \
|| [ "$demo_name" = "advanced/currency_exchange" ]; then
max_attempts=3
fi