mirror of
https://github.com/katanemo/plano.git
synced 2026-05-21 13:55:15 +02:00
add envoy retries (#712)
* add envoy retries * add missing file * fix tests --------- Co-authored-by: Adil Hafeez <adil.hafeez10@t-mobile.com>
This commit is contained in:
parent
2a36dd7376
commit
062825f26e
5 changed files with 22 additions and 14 deletions
|
|
@ -128,7 +128,7 @@ def convert_legacy_listeners(
|
||||||
|
|
||||||
model_provider_set = False
|
model_provider_set = False
|
||||||
for listener in listeners:
|
for listener in listeners:
|
||||||
if listener.get("type") == "model_listener":
|
if listener.get("type") == "model":
|
||||||
if model_provider_set:
|
if model_provider_set:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"Currently only one listener can have model_providers set"
|
"Currently only one listener can have model_providers set"
|
||||||
|
|
|
||||||
|
|
@ -66,6 +66,8 @@ properties:
|
||||||
type: string
|
type: string
|
||||||
enum:
|
enum:
|
||||||
- plano_orchestrator_v1
|
- plano_orchestrator_v1
|
||||||
|
max_retries:
|
||||||
|
type: integer
|
||||||
type:
|
type:
|
||||||
type: string
|
type: string
|
||||||
enum:
|
enum:
|
||||||
|
|
|
||||||
|
|
@ -413,7 +413,7 @@ static_resources:
|
||||||
"@type": type.googleapis.com/envoy.extensions.access_loggers.file.v3.FileAccessLog
|
"@type": type.googleapis.com/envoy.extensions.access_loggers.file.v3.FileAccessLog
|
||||||
path: "/var/log/access_llm.log"
|
path: "/var/log/access_llm.log"
|
||||||
format: |
|
format: |
|
||||||
[%START_TIME%] "%REQ(:METHOD)% %REQ(X-ENVOY-ORIGINAL-PATH?:PATH)% %PROTOCOL%" %RESPONSE_CODE% %RESPONSE_FLAGS% %BYTES_RECEIVED% %BYTES_SENT% %DURATION% %RESP(X-ENVOY-UPSTREAM-SERVICE-TIME)% "%REQ(X-FORWARDED-FOR)%" "%REQ(USER-AGENT)%" "%REQ(X-REQUEST-ID)%" "%REQ(:AUTHORITY)%" "%UPSTREAM_HOST%" "%UPSTREAM_CLUSTER%"
|
[%START_TIME%] "%REQ(:METHOD)% %REQ(X-ENVOY-ORIGINAL-PATH?:PATH)% %PROTOCOL%" %RESPONSE_CODE% %RESPONSE_FLAGS% %BYTES_RECEIVED% %BYTES_SENT% %DURATION% %RESP(X-ENVOY-UPSTREAM-SERVICE-TIME)% "%REQ(X-FORWARDED-FOR)%" "%REQ(USER-AGENT)%" "%REQ(X-REQUEST-ID)%" "%REQ(:AUTHORITY)%" "%UPSTREAM_HOST%" "%UPSTREAM_CLUSTER%" attempts=%UPSTREAM_REQUEST_ATTEMPT_COUNT%
|
||||||
route_config:
|
route_config:
|
||||||
name: local_routes
|
name: local_routes
|
||||||
virtual_hosts:
|
virtual_hosts:
|
||||||
|
|
@ -534,7 +534,7 @@ static_resources:
|
||||||
"@type": type.googleapis.com/envoy.extensions.access_loggers.file.v3.FileAccessLog
|
"@type": type.googleapis.com/envoy.extensions.access_loggers.file.v3.FileAccessLog
|
||||||
path: "/var/log/access_llm.log"
|
path: "/var/log/access_llm.log"
|
||||||
format: |
|
format: |
|
||||||
[%START_TIME%] "%REQ(:METHOD)% %REQ(X-ENVOY-ORIGINAL-PATH?:PATH)% %PROTOCOL%" %RESPONSE_CODE% %RESPONSE_FLAGS% %BYTES_RECEIVED% %BYTES_SENT% %DURATION% %RESP(X-ENVOY-UPSTREAM-SERVICE-TIME)% "%REQ(X-FORWARDED-FOR)%" "%REQ(USER-AGENT)%" "%REQ(X-REQUEST-ID)%" "%REQ(:AUTHORITY)%" "%UPSTREAM_HOST%" "%UPSTREAM_CLUSTER%"
|
[%START_TIME%] "%REQ(:METHOD)% %REQ(X-ENVOY-ORIGINAL-PATH?:PATH)% %PROTOCOL%" %RESPONSE_CODE% %RESPONSE_FLAGS% %BYTES_RECEIVED% %BYTES_SENT% %DURATION% %RESP(X-ENVOY-UPSTREAM-SERVICE-TIME)% "%REQ(X-FORWARDED-FOR)%" "%REQ(USER-AGENT)%" "%REQ(X-REQUEST-ID)%" "%REQ(:AUTHORITY)%" "%UPSTREAM_HOST%" "%UPSTREAM_CLUSTER%" attempts=%UPSTREAM_REQUEST_ATTEMPT_COUNT%
|
||||||
route_config:
|
route_config:
|
||||||
name: local_routes
|
name: local_routes
|
||||||
virtual_hosts:
|
virtual_hosts:
|
||||||
|
|
@ -559,6 +559,16 @@ static_resources:
|
||||||
auto_host_rewrite: true
|
auto_host_rewrite: true
|
||||||
cluster: {{ llm_cluster_name }}
|
cluster: {{ llm_cluster_name }}
|
||||||
timeout: 300s
|
timeout: 300s
|
||||||
|
{% if llm_gateway_listener.max_retries %}
|
||||||
|
retry_policy:
|
||||||
|
retry_on: "5xx,connect-failure,refused-stream,reset,retriable-status-codes"
|
||||||
|
num_retries: {{ llm_gateway_listener.max_retries }}
|
||||||
|
per_try_timeout: 30s
|
||||||
|
retriable_status_codes: [429, 500, 502, 503, 504]
|
||||||
|
retry_back_off:
|
||||||
|
base_interval: 0.5s
|
||||||
|
max_interval: 5s
|
||||||
|
{% endif %}
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
- match:
|
- match:
|
||||||
prefix: "/"
|
prefix: "/"
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,7 @@ listeners:
|
||||||
name: model_1
|
name: model_1
|
||||||
address: 0.0.0.0
|
address: 0.0.0.0
|
||||||
port: 12000
|
port: 12000
|
||||||
|
max_retries: 3
|
||||||
|
|
||||||
model_providers:
|
model_providers:
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -37,14 +37,6 @@ listeners:
|
||||||
port: 8001
|
port: 8001
|
||||||
router: plano_orchestrator_v1
|
router: plano_orchestrator_v1
|
||||||
type: agent
|
type: agent
|
||||||
- address: 0.0.0.0
|
|
||||||
name: model_1
|
|
||||||
port: 12000
|
|
||||||
type: model
|
|
||||||
- address: 0.0.0.0
|
|
||||||
name: prompt_function_listener
|
|
||||||
port: 10000
|
|
||||||
type: prompt
|
|
||||||
- address: 0.0.0.0
|
- address: 0.0.0.0
|
||||||
model_providers:
|
model_providers:
|
||||||
- access_key: $OPENAI_API_KEY
|
- access_key: $OPENAI_API_KEY
|
||||||
|
|
@ -73,10 +65,13 @@ listeners:
|
||||||
port: 443
|
port: 443
|
||||||
protocol: https
|
protocol: https
|
||||||
provider_interface: openai
|
provider_interface: openai
|
||||||
name: egress_traffic
|
name: model_1
|
||||||
port: 12000
|
port: 12000
|
||||||
timeout: 30s
|
type: model
|
||||||
type: model_listener
|
- address: 0.0.0.0
|
||||||
|
name: prompt_function_listener
|
||||||
|
port: 10000
|
||||||
|
type: prompt
|
||||||
model_aliases:
|
model_aliases:
|
||||||
fast-llm:
|
fast-llm:
|
||||||
target: gpt-4o-mini
|
target: gpt-4o-mini
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue