mirror of
https://github.com/katanemo/plano.git
synced 2026-06-20 15:28:07 +02:00
fix(llm_gateway): buffer non-streaming response body until end_of_stream
Wait for the full upstream body before JSON parsing to avoid truncated responses on chunked replies. Retry currency_exchange demo tests on flake. Co-authored-by: Musa <musa@spherrrical.dev>
This commit is contained in:
parent
e5f0cee39e
commit
420431b30c
2 changed files with 8 additions and 1 deletions
|
|
@ -1183,6 +1183,12 @@ impl HttpContext for StreamContext {
|
|||
return Action::Continue;
|
||||
}
|
||||
|
||||
// Non-streaming upstream responses may arrive in multiple chunks; wait for the
|
||||
// full buffered body before parsing (body_size is cumulative on the final chunk).
|
||||
if !self.streaming_response && !end_of_stream {
|
||||
return Action::Continue;
|
||||
}
|
||||
|
||||
// Check if this is an error response from upstream
|
||||
if let Some(status_code) = &self.upstream_status_code {
|
||||
if status_code.is_client_error() || status_code.is_server_error() {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue