From 062825f26ed35f3c3606a554cab4c4a867991432 Mon Sep 17 00:00:00 2001 From: Adil Hafeez Date: Wed, 28 Jan 2026 20:31:01 -0800 Subject: [PATCH] add envoy retries (#712) * add envoy retries * add missing file * fix tests --------- Co-authored-by: Adil Hafeez --- cli/planoai/utils.py | 2 +- config/arch_config_schema.yaml | 2 ++ config/envoy.template.yaml | 14 ++++++++++++-- demos/use_cases/llm_routing/config.yaml | 1 + .../arch_config_full_reference_rendered.yaml | 17 ++++++----------- 5 files changed, 22 insertions(+), 14 deletions(-) diff --git a/cli/planoai/utils.py b/cli/planoai/utils.py index ea4fe238..ea9b8dbd 100644 --- a/cli/planoai/utils.py +++ b/cli/planoai/utils.py @@ -128,7 +128,7 @@ def convert_legacy_listeners( model_provider_set = False for listener in listeners: - if listener.get("type") == "model_listener": + if listener.get("type") == "model": if model_provider_set: raise ValueError( "Currently only one listener can have model_providers set" diff --git a/config/arch_config_schema.yaml b/config/arch_config_schema.yaml index a147c1ea..003bb9b4 100644 --- a/config/arch_config_schema.yaml +++ b/config/arch_config_schema.yaml @@ -66,6 +66,8 @@ properties: type: string enum: - plano_orchestrator_v1 + max_retries: + type: integer type: type: string enum: diff --git a/config/envoy.template.yaml b/config/envoy.template.yaml index 54fbeb77..ed9254ea 100644 --- a/config/envoy.template.yaml +++ b/config/envoy.template.yaml @@ -413,7 +413,7 @@ static_resources: "@type": type.googleapis.com/envoy.extensions.access_loggers.file.v3.FileAccessLog path: "/var/log/access_llm.log" format: | - [%START_TIME%] "%REQ(:METHOD)% %REQ(X-ENVOY-ORIGINAL-PATH?:PATH)% %PROTOCOL%" %RESPONSE_CODE% %RESPONSE_FLAGS% %BYTES_RECEIVED% %BYTES_SENT% %DURATION% %RESP(X-ENVOY-UPSTREAM-SERVICE-TIME)% "%REQ(X-FORWARDED-FOR)%" "%REQ(USER-AGENT)%" "%REQ(X-REQUEST-ID)%" "%REQ(:AUTHORITY)%" "%UPSTREAM_HOST%" "%UPSTREAM_CLUSTER%" + [%START_TIME%] "%REQ(:METHOD)% %REQ(X-ENVOY-ORIGINAL-PATH?:PATH)% %PROTOCOL%" %RESPONSE_CODE% %RESPONSE_FLAGS% %BYTES_RECEIVED% %BYTES_SENT% %DURATION% %RESP(X-ENVOY-UPSTREAM-SERVICE-TIME)% "%REQ(X-FORWARDED-FOR)%" "%REQ(USER-AGENT)%" "%REQ(X-REQUEST-ID)%" "%REQ(:AUTHORITY)%" "%UPSTREAM_HOST%" "%UPSTREAM_CLUSTER%" attempts=%UPSTREAM_REQUEST_ATTEMPT_COUNT% route_config: name: local_routes virtual_hosts: @@ -534,7 +534,7 @@ static_resources: "@type": type.googleapis.com/envoy.extensions.access_loggers.file.v3.FileAccessLog path: "/var/log/access_llm.log" format: | - [%START_TIME%] "%REQ(:METHOD)% %REQ(X-ENVOY-ORIGINAL-PATH?:PATH)% %PROTOCOL%" %RESPONSE_CODE% %RESPONSE_FLAGS% %BYTES_RECEIVED% %BYTES_SENT% %DURATION% %RESP(X-ENVOY-UPSTREAM-SERVICE-TIME)% "%REQ(X-FORWARDED-FOR)%" "%REQ(USER-AGENT)%" "%REQ(X-REQUEST-ID)%" "%REQ(:AUTHORITY)%" "%UPSTREAM_HOST%" "%UPSTREAM_CLUSTER%" + [%START_TIME%] "%REQ(:METHOD)% %REQ(X-ENVOY-ORIGINAL-PATH?:PATH)% %PROTOCOL%" %RESPONSE_CODE% %RESPONSE_FLAGS% %BYTES_RECEIVED% %BYTES_SENT% %DURATION% %RESP(X-ENVOY-UPSTREAM-SERVICE-TIME)% "%REQ(X-FORWARDED-FOR)%" "%REQ(USER-AGENT)%" "%REQ(X-REQUEST-ID)%" "%REQ(:AUTHORITY)%" "%UPSTREAM_HOST%" "%UPSTREAM_CLUSTER%" attempts=%UPSTREAM_REQUEST_ATTEMPT_COUNT% route_config: name: local_routes virtual_hosts: @@ -559,6 +559,16 @@ static_resources: auto_host_rewrite: true cluster: {{ llm_cluster_name }} timeout: 300s + {% if llm_gateway_listener.max_retries %} + retry_policy: + retry_on: "5xx,connect-failure,refused-stream,reset,retriable-status-codes" + num_retries: {{ llm_gateway_listener.max_retries }} + per_try_timeout: 30s + retriable_status_codes: [429, 500, 502, 503, 504] + retry_back_off: + base_interval: 0.5s + max_interval: 5s + {% endif %} {% endfor %} - match: prefix: "/" diff --git a/demos/use_cases/llm_routing/config.yaml b/demos/use_cases/llm_routing/config.yaml index c96e7d02..92769648 100644 --- a/demos/use_cases/llm_routing/config.yaml +++ b/demos/use_cases/llm_routing/config.yaml @@ -5,6 +5,7 @@ listeners: name: model_1 address: 0.0.0.0 port: 12000 + max_retries: 3 model_providers: diff --git a/docs/source/resources/includes/arch_config_full_reference_rendered.yaml b/docs/source/resources/includes/arch_config_full_reference_rendered.yaml index a33878b6..62e7ab96 100644 --- a/docs/source/resources/includes/arch_config_full_reference_rendered.yaml +++ b/docs/source/resources/includes/arch_config_full_reference_rendered.yaml @@ -37,14 +37,6 @@ listeners: port: 8001 router: plano_orchestrator_v1 type: agent -- address: 0.0.0.0 - name: model_1 - port: 12000 - type: model -- address: 0.0.0.0 - name: prompt_function_listener - port: 10000 - type: prompt - address: 0.0.0.0 model_providers: - access_key: $OPENAI_API_KEY @@ -73,10 +65,13 @@ listeners: port: 443 protocol: https provider_interface: openai - name: egress_traffic + name: model_1 port: 12000 - timeout: 30s - type: model_listener + type: model +- address: 0.0.0.0 + name: prompt_function_listener + port: 10000 + type: prompt model_aliases: fast-llm: target: gpt-4o-mini