fix: make upstream timeout configurable and increase default to 300s (#787)

Hardcoded 30s timeouts in envoy config caused premature termination of
long-running LLM requests (tool-use, agentic workflows). Make timeouts
configurable via upstream_timeout_ms override and default to 300s.
This commit is contained in:
Syed Hashmi 2026-03-04 18:53:32 -06:00
parent 0c7b999770
commit 00bd11061e
2 changed files with 12 additions and 3 deletions

View file

@ -336,7 +336,7 @@ static_resources:
auto_host_rewrite: true
prefix_rewrite: "/agents/"
cluster: bright_staff
timeout: {{ listener.timeout | default('30s') }}
timeout: {{ listener.timeout | default('300s') }}
http_filters:
- name: envoy.filters.http.compressor
typed_config:
@ -517,12 +517,12 @@ static_resources:
route:
auto_host_rewrite: true
cluster: {{ llm_cluster_name }}
timeout: 300s
timeout: {{ llm_gateway_listener.timeout }}
{% if llm_gateway_listener.max_retries %}
retry_policy:
retry_on: "5xx,connect-failure,refused-stream,reset,retriable-status-codes"
num_retries: {{ llm_gateway_listener.max_retries }}
per_try_timeout: 30s
per_try_timeout: {{ llm_gateway_listener.timeout }}
retriable_status_codes: [429, 500, 502, 503, 504]
retry_back_off:
base_interval: 0.5s