mirror of
https://github.com/katanemo/plano.git
synced 2026-06-08 14:55:14 +02:00
fix(llm_gateway): buffer non-streaming response body until end_of_stream
Wait for the full upstream body before JSON parsing to avoid truncated responses on chunked replies. Retry currency_exchange demo tests on flake. Co-authored-by: Musa <musa@spherrrical.dev>
This commit is contained in:
parent
e5f0cee39e
commit
420431b30c
2 changed files with 8 additions and 1 deletions
|
|
@ -1183,6 +1183,12 @@ impl HttpContext for StreamContext {
|
|||
return Action::Continue;
|
||||
}
|
||||
|
||||
// Non-streaming upstream responses may arrive in multiple chunks; wait for the
|
||||
// full buffered body before parsing (body_size is cumulative on the final chunk).
|
||||
if !self.streaming_response && !end_of_stream {
|
||||
return Action::Continue;
|
||||
}
|
||||
|
||||
// Check if this is an error response from upstream
|
||||
if let Some(status_code) = &self.upstream_status_code {
|
||||
if status_code.is_client_error() || status_code.is_server_error() {
|
||||
|
|
|
|||
|
|
@ -19,7 +19,8 @@ run_hurl_with_retries() {
|
|||
local max_attempts=1
|
||||
local attempt=1
|
||||
|
||||
if [ "$demo_name" = "llm_routing/preference_based_routing" ]; then
|
||||
if [ "$demo_name" = "llm_routing/preference_based_routing" ] \
|
||||
|| [ "$demo_name" = "advanced/currency_exchange" ]; then
|
||||
max_attempts=3
|
||||
fi
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue