mirror of
https://github.com/katanemo/plano.git
synced 2026-04-25 00:36:34 +02:00
fix: make upstream timeout configurable and increase default to 300s (#787)
Hardcoded 30s timeouts in envoy config caused premature termination of long-running LLM requests (tool-use, agentic workflows). Make timeouts configurable via upstream_timeout_ms override and default to 300s.
This commit is contained in:
parent
0c7b999770
commit
00bd11061e
2 changed files with 12 additions and 3 deletions
|
|
@ -466,6 +466,15 @@ def validate_and_render_schema():
|
|||
"upstream_tls_ca_path", "/etc/ssl/certs/ca-certificates.crt"
|
||||
)
|
||||
|
||||
upstream_timeout_ms = overrides.get("upstream_timeout_ms")
|
||||
if upstream_timeout_ms is not None:
|
||||
timeout_s = f"{int(upstream_timeout_ms) // 1000}s"
|
||||
llm_gateway["timeout"] = timeout_s
|
||||
prompt_gateway["timeout"] = timeout_s
|
||||
for listener in listeners:
|
||||
if listener.get("type") == "agent" and "timeout" not in listener:
|
||||
listener["timeout"] = timeout_s
|
||||
|
||||
data = {
|
||||
"prompt_gateway_listener": prompt_gateway,
|
||||
"llm_gateway_listener": llm_gateway,
|
||||
|
|
|
|||
|
|
@ -336,7 +336,7 @@ static_resources:
|
|||
auto_host_rewrite: true
|
||||
prefix_rewrite: "/agents/"
|
||||
cluster: bright_staff
|
||||
timeout: {{ listener.timeout | default('30s') }}
|
||||
timeout: {{ listener.timeout | default('300s') }}
|
||||
http_filters:
|
||||
- name: envoy.filters.http.compressor
|
||||
typed_config:
|
||||
|
|
@ -517,12 +517,12 @@ static_resources:
|
|||
route:
|
||||
auto_host_rewrite: true
|
||||
cluster: {{ llm_cluster_name }}
|
||||
timeout: 300s
|
||||
timeout: {{ llm_gateway_listener.timeout }}
|
||||
{% if llm_gateway_listener.max_retries %}
|
||||
retry_policy:
|
||||
retry_on: "5xx,connect-failure,refused-stream,reset,retriable-status-codes"
|
||||
num_retries: {{ llm_gateway_listener.max_retries }}
|
||||
per_try_timeout: 30s
|
||||
per_try_timeout: {{ llm_gateway_listener.timeout }}
|
||||
retriable_status_codes: [429, 500, 502, 503, 504]
|
||||
retry_back_off:
|
||||
base_interval: 0.5s
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue