diff --git a/cli/planoai/utils.py b/cli/planoai/utils.py index 8f73bf18..214fd0a3 100644 --- a/cli/planoai/utils.py +++ b/cli/planoai/utils.py @@ -91,7 +91,12 @@ def convert_legacy_listeners( "type": "model", "port": 12000, "address": "0.0.0.0", - "timeout": "30s", + # LLM streaming responses routinely exceed 30s (extended thinking, + # long tool reasoning, large completions). Match the 300s ceiling + # used by the direct upstream-provider routes so Envoy doesn't + # abort streams with UT mid-response. Users can override via their + # plano_config.yaml `listeners.timeout` field. + "timeout": "300s", "model_providers": model_providers or [], } @@ -100,7 +105,7 @@ def convert_legacy_listeners( "type": "prompt", "port": 10000, "address": "0.0.0.0", - "timeout": "30s", + "timeout": "300s", } # Handle None case