add envoy retries (#712)

* add envoy retries * add missing file * fix tests --------- Co-authored-by: Adil Hafeez <adil.hafeez10@t-mobile.com>
2026-05-21 13:55:15 +02:00 · 2026-01-28 20:31:01 -08:00 · 2026-01-28 20:31:01 -08:00 · 062825f26e
commit 062825f26e
parent 2a36dd7376
5 changed files with 22 additions and 14 deletions
--- a/cli/planoai/utils.py
+++ b/cli/planoai/utils.py
@ -128,7 +128,7 @@ def convert_legacy_listeners(

    model_provider_set = False
    for listener in listeners:
-        if listener.get("type") == "model_listener":
+        if listener.get("type") == "model":
            if model_provider_set:
                raise ValueError(
                    "Currently only one listener can have model_providers set"
--- a/config/arch_config_schema.yaml
+++ b/config/arch_config_schema.yaml
@ -66,6 +66,8 @@ properties:
              type: string
              enum:
                - plano_orchestrator_v1
+            max_retries:
+              type: integer
            type:
              type: string
              enum:
--- a/config/envoy.template.yaml
+++ b/config/envoy.template.yaml
@ -413,7 +413,7 @@ static_resources:
                    "@type": type.googleapis.com/envoy.extensions.access_loggers.file.v3.FileAccessLog
                    path: "/var/log/access_llm.log"
                    format: |
-                     [%START_TIME%] "%REQ(:METHOD)% %REQ(X-ENVOY-ORIGINAL-PATH?:PATH)% %PROTOCOL%" %RESPONSE_CODE% %RESPONSE_FLAGS% %BYTES_RECEIVED% %BYTES_SENT% %DURATION% %RESP(X-ENVOY-UPSTREAM-SERVICE-TIME)% "%REQ(X-FORWARDED-FOR)%" "%REQ(USER-AGENT)%" "%REQ(X-REQUEST-ID)%" "%REQ(:AUTHORITY)%" "%UPSTREAM_HOST%" "%UPSTREAM_CLUSTER%"
+                     [%START_TIME%] "%REQ(:METHOD)% %REQ(X-ENVOY-ORIGINAL-PATH?:PATH)% %PROTOCOL%" %RESPONSE_CODE% %RESPONSE_FLAGS% %BYTES_RECEIVED% %BYTES_SENT% %DURATION% %RESP(X-ENVOY-UPSTREAM-SERVICE-TIME)% "%REQ(X-FORWARDED-FOR)%" "%REQ(USER-AGENT)%" "%REQ(X-REQUEST-ID)%" "%REQ(:AUTHORITY)%" "%UPSTREAM_HOST%" "%UPSTREAM_CLUSTER%" attempts=%UPSTREAM_REQUEST_ATTEMPT_COUNT%
                route_config:
                  name: local_routes
                  virtual_hosts:
@ -534,7 +534,7 @@ static_resources:
                    "@type": type.googleapis.com/envoy.extensions.access_loggers.file.v3.FileAccessLog
                    path: "/var/log/access_llm.log"
                    format: |
-                     [%START_TIME%] "%REQ(:METHOD)% %REQ(X-ENVOY-ORIGINAL-PATH?:PATH)% %PROTOCOL%" %RESPONSE_CODE% %RESPONSE_FLAGS% %BYTES_RECEIVED% %BYTES_SENT% %DURATION% %RESP(X-ENVOY-UPSTREAM-SERVICE-TIME)% "%REQ(X-FORWARDED-FOR)%" "%REQ(USER-AGENT)%" "%REQ(X-REQUEST-ID)%" "%REQ(:AUTHORITY)%" "%UPSTREAM_HOST%" "%UPSTREAM_CLUSTER%"
+                     [%START_TIME%] "%REQ(:METHOD)% %REQ(X-ENVOY-ORIGINAL-PATH?:PATH)% %PROTOCOL%" %RESPONSE_CODE% %RESPONSE_FLAGS% %BYTES_RECEIVED% %BYTES_SENT% %DURATION% %RESP(X-ENVOY-UPSTREAM-SERVICE-TIME)% "%REQ(X-FORWARDED-FOR)%" "%REQ(USER-AGENT)%" "%REQ(X-REQUEST-ID)%" "%REQ(:AUTHORITY)%" "%UPSTREAM_HOST%" "%UPSTREAM_CLUSTER%" attempts=%UPSTREAM_REQUEST_ATTEMPT_COUNT%
                route_config:
                  name: local_routes
                  virtual_hosts:
@ -559,6 +559,16 @@ static_resources:
                            auto_host_rewrite: true
                            cluster: {{ llm_cluster_name }}
                            timeout: 300s
+                            {% if llm_gateway_listener.max_retries %}
+                            retry_policy:
+                              retry_on: "5xx,connect-failure,refused-stream,reset,retriable-status-codes"
+                              num_retries: {{ llm_gateway_listener.max_retries }}
+                              per_try_timeout: 30s
+                              retriable_status_codes: [429, 500, 502, 503, 504]
+                              retry_back_off:
+                                base_interval: 0.5s
+                                max_interval: 5s
+                            {% endif %}
                      {% endfor %}
                        - match:
                            prefix: "/"
--- a/demos/use_cases/llm_routing/config.yaml
+++ b/demos/use_cases/llm_routing/config.yaml
@ -5,6 +5,7 @@ listeners:
    name: model_1
    address: 0.0.0.0
    port: 12000
+    max_retries: 3

 model_providers:

--- a/docs/source/resources/includes/arch_config_full_reference_rendered.yaml
+++ b/docs/source/resources/includes/arch_config_full_reference_rendered.yaml
@ -37,14 +37,6 @@ listeners:
  port: 8001
  router: plano_orchestrator_v1
  type: agent
- address: 0.0.0.0
-  name: model_1
-  port: 12000
-  type: model
- address: 0.0.0.0
-  name: prompt_function_listener
-  port: 10000
-  type: prompt
 - address: 0.0.0.0
  model_providers:
  - access_key: $OPENAI_API_KEY
@ -73,10 +65,13 @@ listeners:
    port: 443
    protocol: https
    provider_interface: openai
-  name: egress_traffic
+  name: model_1
  port: 12000
-  timeout: 30s
-  type: model_listener
+  type: model
+- address: 0.0.0.0
+  name: prompt_function_listener
+  port: 10000
+  type: prompt
 model_aliases:
  fast-llm:
    target: gpt-4o-mini