fix: make upstream timeout configurable and increase default to 300s (#787)

Hardcoded 30s timeouts in envoy config caused premature termination of long-running LLM requests (tool-use, agentic workflows). Make timeouts configurable via upstream_timeout_ms override and default to 300s.
2026-06-08 14:55:14 +02:00 · 2026-03-04 18:53:32 -06:00 · 2026-03-04 18:53:32 -06:00 · 00bd11061e
commit 00bd11061e
parent 0c7b999770
2 changed files with 12 additions and 3 deletions
--- a/cli/planoai/config_generator.py
+++ b/cli/planoai/config_generator.py
@ -466,6 +466,15 @@ def validate_and_render_schema():
        "upstream_tls_ca_path", "/etc/ssl/certs/ca-certificates.crt"
    )

+    upstream_timeout_ms = overrides.get("upstream_timeout_ms")
+    if upstream_timeout_ms is not None:
+        timeout_s = f"{int(upstream_timeout_ms) // 1000}s"
+        llm_gateway["timeout"] = timeout_s
+        prompt_gateway["timeout"] = timeout_s
+        for listener in listeners:
+            if listener.get("type") == "agent" and "timeout" not in listener:
+                listener["timeout"] = timeout_s
+
    data = {
        "prompt_gateway_listener": prompt_gateway,
        "llm_gateway_listener": llm_gateway,
--- a/config/envoy.template.yaml
+++ b/config/envoy.template.yaml
@ -336,7 +336,7 @@ static_resources:
                            auto_host_rewrite: true
                            prefix_rewrite: "/agents/"
                            cluster: bright_staff
-                            timeout: {{ listener.timeout | default('30s') }}
+                            timeout: {{ listener.timeout | default('300s') }}
                http_filters:
                  - name: envoy.filters.http.compressor
                    typed_config:
@ -517,12 +517,12 @@ static_resources:
                          route:
                            auto_host_rewrite: true
                            cluster: {{ llm_cluster_name }}
-                            timeout: 300s
+                            timeout: {{ llm_gateway_listener.timeout }}
                            {% if llm_gateway_listener.max_retries %}
                            retry_policy:
                              retry_on: "5xx,connect-failure,refused-stream,reset,retriable-status-codes"
                              num_retries: {{ llm_gateway_listener.max_retries }}
-                              per_try_timeout: 30s
+                              per_try_timeout: {{ llm_gateway_listener.timeout }}
                              retriable_status_codes: [429, 500, 502, 503, 504]
                              retry_back_off:
                                base_interval: 0.5s