Fix request closures during long-running streaming (#899)

This commit is contained in:
Adil Hafeez 2026-04-18 21:20:34 -07:00 committed by GitHub
parent 78d8c90184
commit c3c213b2fd
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -91,7 +91,12 @@ def convert_legacy_listeners(
"type": "model", "type": "model",
"port": 12000, "port": 12000,
"address": "0.0.0.0", "address": "0.0.0.0",
"timeout": "30s", # LLM streaming responses routinely exceed 30s (extended thinking,
# long tool reasoning, large completions). Match the 300s ceiling
# used by the direct upstream-provider routes so Envoy doesn't
# abort streams with UT mid-response. Users can override via their
# plano_config.yaml `listeners.timeout` field.
"timeout": "300s",
"model_providers": model_providers or [], "model_providers": model_providers or [],
} }
@ -100,7 +105,7 @@ def convert_legacy_listeners(
"type": "prompt", "type": "prompt",
"port": 10000, "port": 10000,
"address": "0.0.0.0", "address": "0.0.0.0",
"timeout": "30s", "timeout": "300s",
} }
# Handle None case # Handle None case