From 062825f26ed35f3c3606a554cab4c4a867991432 Mon Sep 17 00:00:00 2001
From: Adil Hafeez <adil.hafeez@gmail.com>
Date: Wed, 28 Jan 2026 20:31:01 -0800
Subject: [PATCH] add envoy retries (#712)

* add envoy retries

* add missing file

* fix tests

---------

Co-authored-by: Adil Hafeez <adil.hafeez10@t-mobile.com>
---
 cli/planoai/utils.py                            |  2 +-
 config/arch_config_schema.yaml                  |  2 ++
 config/envoy.template.yaml                      | 14 ++++++++++++--
 demos/use_cases/llm_routing/config.yaml         |  1 +
 .../arch_config_full_reference_rendered.yaml    | 17 ++++++-----------
 5 files changed, 22 insertions(+), 14 deletions(-)

diff --git a/cli/planoai/utils.py b/cli/planoai/utils.py
index ea4fe238..ea9b8dbd 100644
--- a/cli/planoai/utils.py
+++ b/cli/planoai/utils.py
@@ -128,7 +128,7 @@ def convert_legacy_listeners(
 
     model_provider_set = False
     for listener in listeners:
-        if listener.get("type") == "model_listener":
+        if listener.get("type") == "model":
             if model_provider_set:
                 raise ValueError(
                     "Currently only one listener can have model_providers set"
diff --git a/config/arch_config_schema.yaml b/config/arch_config_schema.yaml
index a147c1ea..003bb9b4 100644
--- a/config/arch_config_schema.yaml
+++ b/config/arch_config_schema.yaml
@@ -66,6 +66,8 @@ properties:
               type: string
               enum:
                 - plano_orchestrator_v1
+            max_retries:
+              type: integer
             type:
               type: string
               enum:
diff --git a/config/envoy.template.yaml b/config/envoy.template.yaml
index 54fbeb77..ed9254ea 100644
--- a/config/envoy.template.yaml
+++ b/config/envoy.template.yaml
@@ -413,7 +413,7 @@ static_resources:
                     "@type": type.googleapis.com/envoy.extensions.access_loggers.file.v3.FileAccessLog
                     path: "/var/log/access_llm.log"
                     format: |
-                     [%START_TIME%] "%REQ(:METHOD)% %REQ(X-ENVOY-ORIGINAL-PATH?:PATH)% %PROTOCOL%" %RESPONSE_CODE% %RESPONSE_FLAGS% %BYTES_RECEIVED% %BYTES_SENT% %DURATION% %RESP(X-ENVOY-UPSTREAM-SERVICE-TIME)% "%REQ(X-FORWARDED-FOR)%" "%REQ(USER-AGENT)%" "%REQ(X-REQUEST-ID)%" "%REQ(:AUTHORITY)%" "%UPSTREAM_HOST%" "%UPSTREAM_CLUSTER%"
+                     [%START_TIME%] "%REQ(:METHOD)% %REQ(X-ENVOY-ORIGINAL-PATH?:PATH)% %PROTOCOL%" %RESPONSE_CODE% %RESPONSE_FLAGS% %BYTES_RECEIVED% %BYTES_SENT% %DURATION% %RESP(X-ENVOY-UPSTREAM-SERVICE-TIME)% "%REQ(X-FORWARDED-FOR)%" "%REQ(USER-AGENT)%" "%REQ(X-REQUEST-ID)%" "%REQ(:AUTHORITY)%" "%UPSTREAM_HOST%" "%UPSTREAM_CLUSTER%" attempts=%UPSTREAM_REQUEST_ATTEMPT_COUNT%
                 route_config:
                   name: local_routes
                   virtual_hosts:
@@ -534,7 +534,7 @@ static_resources:
                     "@type": type.googleapis.com/envoy.extensions.access_loggers.file.v3.FileAccessLog
                     path: "/var/log/access_llm.log"
                     format: |
-                     [%START_TIME%] "%REQ(:METHOD)% %REQ(X-ENVOY-ORIGINAL-PATH?:PATH)% %PROTOCOL%" %RESPONSE_CODE% %RESPONSE_FLAGS% %BYTES_RECEIVED% %BYTES_SENT% %DURATION% %RESP(X-ENVOY-UPSTREAM-SERVICE-TIME)% "%REQ(X-FORWARDED-FOR)%" "%REQ(USER-AGENT)%" "%REQ(X-REQUEST-ID)%" "%REQ(:AUTHORITY)%" "%UPSTREAM_HOST%" "%UPSTREAM_CLUSTER%"
+                     [%START_TIME%] "%REQ(:METHOD)% %REQ(X-ENVOY-ORIGINAL-PATH?:PATH)% %PROTOCOL%" %RESPONSE_CODE% %RESPONSE_FLAGS% %BYTES_RECEIVED% %BYTES_SENT% %DURATION% %RESP(X-ENVOY-UPSTREAM-SERVICE-TIME)% "%REQ(X-FORWARDED-FOR)%" "%REQ(USER-AGENT)%" "%REQ(X-REQUEST-ID)%" "%REQ(:AUTHORITY)%" "%UPSTREAM_HOST%" "%UPSTREAM_CLUSTER%" attempts=%UPSTREAM_REQUEST_ATTEMPT_COUNT%
                 route_config:
                   name: local_routes
                   virtual_hosts:
@@ -559,6 +559,16 @@ static_resources:
                             auto_host_rewrite: true
                             cluster: {{ llm_cluster_name }}
                             timeout: 300s
+                            {% if llm_gateway_listener.max_retries %}
+                            retry_policy:
+                              retry_on: "5xx,connect-failure,refused-stream,reset,retriable-status-codes"
+                              num_retries: {{ llm_gateway_listener.max_retries }}
+                              per_try_timeout: 30s
+                              retriable_status_codes: [429, 500, 502, 503, 504]
+                              retry_back_off:
+                                base_interval: 0.5s
+                                max_interval: 5s
+                            {% endif %}
                       {% endfor %}
                         - match:
                             prefix: "/"
diff --git a/demos/use_cases/llm_routing/config.yaml b/demos/use_cases/llm_routing/config.yaml
index c96e7d02..92769648 100644
--- a/demos/use_cases/llm_routing/config.yaml
+++ b/demos/use_cases/llm_routing/config.yaml
@@ -5,6 +5,7 @@ listeners:
     name: model_1
     address: 0.0.0.0
     port: 12000
+    max_retries: 3
 
 model_providers:
 
diff --git a/docs/source/resources/includes/arch_config_full_reference_rendered.yaml b/docs/source/resources/includes/arch_config_full_reference_rendered.yaml
index a33878b6..62e7ab96 100644
--- a/docs/source/resources/includes/arch_config_full_reference_rendered.yaml
+++ b/docs/source/resources/includes/arch_config_full_reference_rendered.yaml
@@ -37,14 +37,6 @@ listeners:
   port: 8001
   router: plano_orchestrator_v1
   type: agent
-- address: 0.0.0.0
-  name: model_1
-  port: 12000
-  type: model
-- address: 0.0.0.0
-  name: prompt_function_listener
-  port: 10000
-  type: prompt
 - address: 0.0.0.0
   model_providers:
   - access_key: $OPENAI_API_KEY
@@ -73,10 +65,13 @@ listeners:
     port: 443
     protocol: https
     provider_interface: openai
-  name: egress_traffic
+  name: model_1
   port: 12000
-  timeout: 30s
-  type: model_listener
+  type: model
+- address: 0.0.0.0
+  name: prompt_function_listener
+  port: 10000
+  type: prompt
 model_aliases:
   fast-llm:
     target: gpt-4o-mini