Fix request closures during long-running streaming (#899)

2026-05-21 13:55:15 +02:00 · 2026-04-18 21:20:34 -07:00 · 2026-04-18 21:20:34 -07:00 · c3c213b2fd
commit c3c213b2fd
parent 78d8c90184
1 changed files with 7 additions and 2 deletions
--- a/cli/planoai/utils.py
+++ b/cli/planoai/utils.py
@ -91,7 +91,12 @@ def convert_legacy_listeners(
        "type": "model",
        "port": 12000,
        "address": "0.0.0.0",
-        "timeout": "30s",
+        # LLM streaming responses routinely exceed 30s (extended thinking,
        # long tool reasoning, large completions). Match the 300s ceiling
        # used by the direct upstream-provider routes so Envoy doesn't
        # abort streams with UT mid-response. Users can override via their
        # plano_config.yaml `listeners.timeout` field.
        "timeout": "300s",
        "model_providers": model_providers or [],
    }
@ -100,7 +105,7 @@ def convert_legacy_listeners(
        "type": "prompt",
        "port": 10000,
        "address": "0.0.0.0",
-        "timeout": "30s",
+        "timeout": "300s",
    }
    # Handle None case