add envoy retries (#712)

* add envoy retries

* add missing file

* fix tests

---------

Co-authored-by: Adil Hafeez <adil.hafeez10@t-mobile.com>
This commit is contained in:
Adil Hafeez 2026-01-28 20:31:01 -08:00 committed by GitHub
parent 2a36dd7376
commit 062825f26e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 22 additions and 14 deletions

View file

@ -128,7 +128,7 @@ def convert_legacy_listeners(
model_provider_set = False model_provider_set = False
for listener in listeners: for listener in listeners:
if listener.get("type") == "model_listener": if listener.get("type") == "model":
if model_provider_set: if model_provider_set:
raise ValueError( raise ValueError(
"Currently only one listener can have model_providers set" "Currently only one listener can have model_providers set"

View file

@ -66,6 +66,8 @@ properties:
type: string type: string
enum: enum:
- plano_orchestrator_v1 - plano_orchestrator_v1
max_retries:
type: integer
type: type:
type: string type: string
enum: enum:

View file

@ -413,7 +413,7 @@ static_resources:
"@type": type.googleapis.com/envoy.extensions.access_loggers.file.v3.FileAccessLog "@type": type.googleapis.com/envoy.extensions.access_loggers.file.v3.FileAccessLog
path: "/var/log/access_llm.log" path: "/var/log/access_llm.log"
format: | format: |
[%START_TIME%] "%REQ(:METHOD)% %REQ(X-ENVOY-ORIGINAL-PATH?:PATH)% %PROTOCOL%" %RESPONSE_CODE% %RESPONSE_FLAGS% %BYTES_RECEIVED% %BYTES_SENT% %DURATION% %RESP(X-ENVOY-UPSTREAM-SERVICE-TIME)% "%REQ(X-FORWARDED-FOR)%" "%REQ(USER-AGENT)%" "%REQ(X-REQUEST-ID)%" "%REQ(:AUTHORITY)%" "%UPSTREAM_HOST%" "%UPSTREAM_CLUSTER%" [%START_TIME%] "%REQ(:METHOD)% %REQ(X-ENVOY-ORIGINAL-PATH?:PATH)% %PROTOCOL%" %RESPONSE_CODE% %RESPONSE_FLAGS% %BYTES_RECEIVED% %BYTES_SENT% %DURATION% %RESP(X-ENVOY-UPSTREAM-SERVICE-TIME)% "%REQ(X-FORWARDED-FOR)%" "%REQ(USER-AGENT)%" "%REQ(X-REQUEST-ID)%" "%REQ(:AUTHORITY)%" "%UPSTREAM_HOST%" "%UPSTREAM_CLUSTER%" attempts=%UPSTREAM_REQUEST_ATTEMPT_COUNT%
route_config: route_config:
name: local_routes name: local_routes
virtual_hosts: virtual_hosts:
@ -534,7 +534,7 @@ static_resources:
"@type": type.googleapis.com/envoy.extensions.access_loggers.file.v3.FileAccessLog "@type": type.googleapis.com/envoy.extensions.access_loggers.file.v3.FileAccessLog
path: "/var/log/access_llm.log" path: "/var/log/access_llm.log"
format: | format: |
[%START_TIME%] "%REQ(:METHOD)% %REQ(X-ENVOY-ORIGINAL-PATH?:PATH)% %PROTOCOL%" %RESPONSE_CODE% %RESPONSE_FLAGS% %BYTES_RECEIVED% %BYTES_SENT% %DURATION% %RESP(X-ENVOY-UPSTREAM-SERVICE-TIME)% "%REQ(X-FORWARDED-FOR)%" "%REQ(USER-AGENT)%" "%REQ(X-REQUEST-ID)%" "%REQ(:AUTHORITY)%" "%UPSTREAM_HOST%" "%UPSTREAM_CLUSTER%" [%START_TIME%] "%REQ(:METHOD)% %REQ(X-ENVOY-ORIGINAL-PATH?:PATH)% %PROTOCOL%" %RESPONSE_CODE% %RESPONSE_FLAGS% %BYTES_RECEIVED% %BYTES_SENT% %DURATION% %RESP(X-ENVOY-UPSTREAM-SERVICE-TIME)% "%REQ(X-FORWARDED-FOR)%" "%REQ(USER-AGENT)%" "%REQ(X-REQUEST-ID)%" "%REQ(:AUTHORITY)%" "%UPSTREAM_HOST%" "%UPSTREAM_CLUSTER%" attempts=%UPSTREAM_REQUEST_ATTEMPT_COUNT%
route_config: route_config:
name: local_routes name: local_routes
virtual_hosts: virtual_hosts:
@ -559,6 +559,16 @@ static_resources:
auto_host_rewrite: true auto_host_rewrite: true
cluster: {{ llm_cluster_name }} cluster: {{ llm_cluster_name }}
timeout: 300s timeout: 300s
{% if llm_gateway_listener.max_retries %}
retry_policy:
retry_on: "5xx,connect-failure,refused-stream,reset,retriable-status-codes"
num_retries: {{ llm_gateway_listener.max_retries }}
per_try_timeout: 30s
retriable_status_codes: [429, 500, 502, 503, 504]
retry_back_off:
base_interval: 0.5s
max_interval: 5s
{% endif %}
{% endfor %} {% endfor %}
- match: - match:
prefix: "/" prefix: "/"

View file

@ -5,6 +5,7 @@ listeners:
name: model_1 name: model_1
address: 0.0.0.0 address: 0.0.0.0
port: 12000 port: 12000
max_retries: 3
model_providers: model_providers:

View file

@ -37,14 +37,6 @@ listeners:
port: 8001 port: 8001
router: plano_orchestrator_v1 router: plano_orchestrator_v1
type: agent type: agent
- address: 0.0.0.0
name: model_1
port: 12000
type: model
- address: 0.0.0.0
name: prompt_function_listener
port: 10000
type: prompt
- address: 0.0.0.0 - address: 0.0.0.0
model_providers: model_providers:
- access_key: $OPENAI_API_KEY - access_key: $OPENAI_API_KEY
@ -73,10 +65,13 @@ listeners:
port: 443 port: 443
protocol: https protocol: https
provider_interface: openai provider_interface: openai
name: egress_traffic name: model_1
port: 12000 port: 12000
timeout: 30s type: model
type: model_listener - address: 0.0.0.0
name: prompt_function_listener
port: 10000
type: prompt
model_aliases: model_aliases:
fast-llm: fast-llm:
target: gpt-4o-mini target: gpt-4o-mini