mirror of
https://github.com/katanemo/plano.git
synced 2026-05-21 13:55:15 +02:00
add envoy retries (#712)
* add envoy retries * add missing file * fix tests --------- Co-authored-by: Adil Hafeez <adil.hafeez10@t-mobile.com>
This commit is contained in:
parent
2a36dd7376
commit
062825f26e
5 changed files with 22 additions and 14 deletions
|
|
@ -128,7 +128,7 @@ def convert_legacy_listeners(
|
|||
|
||||
model_provider_set = False
|
||||
for listener in listeners:
|
||||
if listener.get("type") == "model_listener":
|
||||
if listener.get("type") == "model":
|
||||
if model_provider_set:
|
||||
raise ValueError(
|
||||
"Currently only one listener can have model_providers set"
|
||||
|
|
|
|||
|
|
@ -66,6 +66,8 @@ properties:
|
|||
type: string
|
||||
enum:
|
||||
- plano_orchestrator_v1
|
||||
max_retries:
|
||||
type: integer
|
||||
type:
|
||||
type: string
|
||||
enum:
|
||||
|
|
|
|||
|
|
@ -413,7 +413,7 @@ static_resources:
|
|||
"@type": type.googleapis.com/envoy.extensions.access_loggers.file.v3.FileAccessLog
|
||||
path: "/var/log/access_llm.log"
|
||||
format: |
|
||||
[%START_TIME%] "%REQ(:METHOD)% %REQ(X-ENVOY-ORIGINAL-PATH?:PATH)% %PROTOCOL%" %RESPONSE_CODE% %RESPONSE_FLAGS% %BYTES_RECEIVED% %BYTES_SENT% %DURATION% %RESP(X-ENVOY-UPSTREAM-SERVICE-TIME)% "%REQ(X-FORWARDED-FOR)%" "%REQ(USER-AGENT)%" "%REQ(X-REQUEST-ID)%" "%REQ(:AUTHORITY)%" "%UPSTREAM_HOST%" "%UPSTREAM_CLUSTER%"
|
||||
[%START_TIME%] "%REQ(:METHOD)% %REQ(X-ENVOY-ORIGINAL-PATH?:PATH)% %PROTOCOL%" %RESPONSE_CODE% %RESPONSE_FLAGS% %BYTES_RECEIVED% %BYTES_SENT% %DURATION% %RESP(X-ENVOY-UPSTREAM-SERVICE-TIME)% "%REQ(X-FORWARDED-FOR)%" "%REQ(USER-AGENT)%" "%REQ(X-REQUEST-ID)%" "%REQ(:AUTHORITY)%" "%UPSTREAM_HOST%" "%UPSTREAM_CLUSTER%" attempts=%UPSTREAM_REQUEST_ATTEMPT_COUNT%
|
||||
route_config:
|
||||
name: local_routes
|
||||
virtual_hosts:
|
||||
|
|
@ -534,7 +534,7 @@ static_resources:
|
|||
"@type": type.googleapis.com/envoy.extensions.access_loggers.file.v3.FileAccessLog
|
||||
path: "/var/log/access_llm.log"
|
||||
format: |
|
||||
[%START_TIME%] "%REQ(:METHOD)% %REQ(X-ENVOY-ORIGINAL-PATH?:PATH)% %PROTOCOL%" %RESPONSE_CODE% %RESPONSE_FLAGS% %BYTES_RECEIVED% %BYTES_SENT% %DURATION% %RESP(X-ENVOY-UPSTREAM-SERVICE-TIME)% "%REQ(X-FORWARDED-FOR)%" "%REQ(USER-AGENT)%" "%REQ(X-REQUEST-ID)%" "%REQ(:AUTHORITY)%" "%UPSTREAM_HOST%" "%UPSTREAM_CLUSTER%"
|
||||
[%START_TIME%] "%REQ(:METHOD)% %REQ(X-ENVOY-ORIGINAL-PATH?:PATH)% %PROTOCOL%" %RESPONSE_CODE% %RESPONSE_FLAGS% %BYTES_RECEIVED% %BYTES_SENT% %DURATION% %RESP(X-ENVOY-UPSTREAM-SERVICE-TIME)% "%REQ(X-FORWARDED-FOR)%" "%REQ(USER-AGENT)%" "%REQ(X-REQUEST-ID)%" "%REQ(:AUTHORITY)%" "%UPSTREAM_HOST%" "%UPSTREAM_CLUSTER%" attempts=%UPSTREAM_REQUEST_ATTEMPT_COUNT%
|
||||
route_config:
|
||||
name: local_routes
|
||||
virtual_hosts:
|
||||
|
|
@ -559,6 +559,16 @@ static_resources:
|
|||
auto_host_rewrite: true
|
||||
cluster: {{ llm_cluster_name }}
|
||||
timeout: 300s
|
||||
{% if llm_gateway_listener.max_retries %}
|
||||
retry_policy:
|
||||
retry_on: "5xx,connect-failure,refused-stream,reset,retriable-status-codes"
|
||||
num_retries: {{ llm_gateway_listener.max_retries }}
|
||||
per_try_timeout: 30s
|
||||
retriable_status_codes: [429, 500, 502, 503, 504]
|
||||
retry_back_off:
|
||||
base_interval: 0.5s
|
||||
max_interval: 5s
|
||||
{% endif %}
|
||||
{% endfor %}
|
||||
- match:
|
||||
prefix: "/"
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@ listeners:
|
|||
name: model_1
|
||||
address: 0.0.0.0
|
||||
port: 12000
|
||||
max_retries: 3
|
||||
|
||||
model_providers:
|
||||
|
||||
|
|
|
|||
|
|
@ -37,14 +37,6 @@ listeners:
|
|||
port: 8001
|
||||
router: plano_orchestrator_v1
|
||||
type: agent
|
||||
- address: 0.0.0.0
|
||||
name: model_1
|
||||
port: 12000
|
||||
type: model
|
||||
- address: 0.0.0.0
|
||||
name: prompt_function_listener
|
||||
port: 10000
|
||||
type: prompt
|
||||
- address: 0.0.0.0
|
||||
model_providers:
|
||||
- access_key: $OPENAI_API_KEY
|
||||
|
|
@ -73,10 +65,13 @@ listeners:
|
|||
port: 443
|
||||
protocol: https
|
||||
provider_interface: openai
|
||||
name: egress_traffic
|
||||
name: model_1
|
||||
port: 12000
|
||||
timeout: 30s
|
||||
type: model_listener
|
||||
type: model
|
||||
- address: 0.0.0.0
|
||||
name: prompt_function_listener
|
||||
port: 10000
|
||||
type: prompt
|
||||
model_aliases:
|
||||
fast-llm:
|
||||
target: gpt-4o-mini
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue