This commit is contained in:
Troy 2026-05-31 00:21:30 +08:00 committed by GitHub
commit 3f2e423e68
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 1460 additions and 3 deletions

View file

@ -213,6 +213,183 @@ properties:
required:
- name
- description
retry_policy:
type: object
description: "Retry policy configuration. When not specified, no retry logic is enabled."
properties:
fallback_models:
type: array
description: "Ordered list of model identifiers to fallback to before using Provider_List."
items:
type: string
default_strategy:
type: string
description: "Default retry strategy for unconfigured status codes. Default: different_provider."
enum:
- same_model
- same_provider
- different_provider
default_max_attempts:
type: integer
description: "Default max retry attempts for unconfigured status codes. Default: 2."
minimum: 0
on_status_codes:
type: array
description: "Per-status-code retry configuration."
items:
type: object
properties:
codes:
type: array
description: "List of status codes as integers or range strings (e.g. '502-504')."
items:
anyOf:
- type: integer
minimum: 100
maximum: 599
- type: string
description: "Range string in 'start-end' format (e.g. '502-504')."
strategy:
type: string
description: "Retry strategy for these status codes."
enum:
- same_model
- same_provider
- different_provider
max_attempts:
type: integer
description: "Max retry attempts for these status codes."
minimum: 0
additionalProperties: false
required:
- codes
- strategy
- max_attempts
on_timeout:
type: object
description: "Timeout-specific retry configuration. When omitted, timeouts use default_strategy and default_max_attempts."
properties:
strategy:
type: string
description: "Retry strategy for timeout errors."
enum:
- same_model
- same_provider
- different_provider
max_attempts:
type: integer
description: "Max retry attempts for timeout errors."
minimum: 1
additionalProperties: false
required:
- strategy
- max_attempts
on_high_latency:
type: object
description: "High latency proactive failover configuration. When omitted, no latency-based failover is performed."
properties:
threshold_ms:
type: integer
description: "Latency threshold in milliseconds. When response time exceeds this value, a High_Latency_Event is triggered."
minimum: 1
measure:
type: string
description: "What latency metric to measure. Default: ttfb."
enum:
- ttfb
- total
strategy:
type: string
description: "Retry strategy when latency threshold is exceeded."
enum:
- same_model
- same_provider
- different_provider
max_attempts:
type: integer
description: "Max retry attempts when latency threshold is exceeded."
minimum: 1
block_duration_seconds:
type: integer
description: "How long to block the model/provider after detecting high latency, in seconds. Default: 300."
minimum: 1
scope:
type: string
description: "What to block: model-level or provider-level. Default: model."
enum:
- model
- provider
apply_to:
type: string
description: "Blocking scope: global or request-scoped. Default: global."
enum:
- global
- request
min_triggers:
type: integer
description: "Number of High_Latency_Events required before creating a block. Default: 1."
minimum: 1
trigger_window_seconds:
type: integer
description: "Sliding time window in seconds for counting triggers. Required when min_triggers > 1."
minimum: 1
additionalProperties: false
required:
- threshold_ms
- strategy
- max_attempts
- block_duration_seconds
backoff:
type: object
description: "Exponential backoff configuration. When omitted, no backoff delays are applied."
properties:
apply_to:
type: string
description: "REQUIRED. Determines when backoff delays are applied."
enum:
- same_model
- same_provider
- global
base_ms:
type: integer
description: "Base delay in milliseconds for exponential backoff. Default: 100."
minimum: 1
max_ms:
type: integer
description: "Maximum delay in milliseconds for exponential backoff. Default: 5000."
minimum: 1
jitter:
type: boolean
description: "Add random jitter to prevent thundering herd. Default: true."
additionalProperties: false
required:
- apply_to
retry_after_handling:
type: object
description: "Retry-After header handling customization. When omitted, Retry-After is honored with defaults (scope: model, apply_to: global, max_retry_after_seconds: 300)."
properties:
scope:
type: string
description: "What to block: model-level or provider-level. Default: model."
enum:
- model
- provider
apply_to:
type: string
description: "Blocking scope: request-scoped or global. Default: global."
enum:
- request
- global
max_retry_after_seconds:
type: integer
description: "Maximum Retry-After value honored in seconds. Default: 300."
minimum: 1
additionalProperties: false
max_retry_duration_ms:
type: integer
description: "Maximum total time in milliseconds for all retry attempts combined. Timer starts on first retry."
minimum: 0
additionalProperties: false
additionalProperties: false
required:
- model
@ -271,6 +448,183 @@ properties:
required:
- name
- description
retry_policy:
type: object
description: "Retry policy configuration. When not specified, no retry logic is enabled."
properties:
fallback_models:
type: array
description: "Ordered list of model identifiers to fallback to before using Provider_List."
items:
type: string
default_strategy:
type: string
description: "Default retry strategy for unconfigured status codes. Default: different_provider."
enum:
- same_model
- same_provider
- different_provider
default_max_attempts:
type: integer
description: "Default max retry attempts for unconfigured status codes. Default: 2."
minimum: 0
on_status_codes:
type: array
description: "Per-status-code retry configuration."
items:
type: object
properties:
codes:
type: array
description: "List of status codes as integers or range strings (e.g. '502-504')."
items:
anyOf:
- type: integer
minimum: 100
maximum: 599
- type: string
description: "Range string in 'start-end' format (e.g. '502-504')."
strategy:
type: string
description: "Retry strategy for these status codes."
enum:
- same_model
- same_provider
- different_provider
max_attempts:
type: integer
description: "Max retry attempts for these status codes."
minimum: 0
additionalProperties: false
required:
- codes
- strategy
- max_attempts
on_timeout:
type: object
description: "Timeout-specific retry configuration. When omitted, timeouts use default_strategy and default_max_attempts."
properties:
strategy:
type: string
description: "Retry strategy for timeout errors."
enum:
- same_model
- same_provider
- different_provider
max_attempts:
type: integer
description: "Max retry attempts for timeout errors."
minimum: 1
additionalProperties: false
required:
- strategy
- max_attempts
on_high_latency:
type: object
description: "High latency proactive failover configuration. When omitted, no latency-based failover is performed."
properties:
threshold_ms:
type: integer
description: "Latency threshold in milliseconds. When response time exceeds this value, a High_Latency_Event is triggered."
minimum: 1
measure:
type: string
description: "What latency metric to measure. Default: ttfb."
enum:
- ttfb
- total
strategy:
type: string
description: "Retry strategy when latency threshold is exceeded."
enum:
- same_model
- same_provider
- different_provider
max_attempts:
type: integer
description: "Max retry attempts when latency threshold is exceeded."
minimum: 1
block_duration_seconds:
type: integer
description: "How long to block the model/provider after detecting high latency, in seconds. Default: 300."
minimum: 1
scope:
type: string
description: "What to block: model-level or provider-level. Default: model."
enum:
- model
- provider
apply_to:
type: string
description: "Blocking scope: global or request-scoped. Default: global."
enum:
- global
- request
min_triggers:
type: integer
description: "Number of High_Latency_Events required before creating a block. Default: 1."
minimum: 1
trigger_window_seconds:
type: integer
description: "Sliding time window in seconds for counting triggers. Required when min_triggers > 1."
minimum: 1
additionalProperties: false
required:
- threshold_ms
- strategy
- max_attempts
- block_duration_seconds
backoff:
type: object
description: "Exponential backoff configuration. When omitted, no backoff delays are applied."
properties:
apply_to:
type: string
description: "REQUIRED. Determines when backoff delays are applied."
enum:
- same_model
- same_provider
- global
base_ms:
type: integer
description: "Base delay in milliseconds for exponential backoff. Default: 100."
minimum: 1
max_ms:
type: integer
description: "Maximum delay in milliseconds for exponential backoff. Default: 5000."
minimum: 1
jitter:
type: boolean
description: "Add random jitter to prevent thundering herd. Default: true."
additionalProperties: false
required:
- apply_to
retry_after_handling:
type: object
description: "Retry-After header handling customization. When omitted, Retry-After is honored with defaults (scope: model, apply_to: global, max_retry_after_seconds: 300)."
properties:
scope:
type: string
description: "What to block: model-level or provider-level. Default: model."
enum:
- model
- provider
apply_to:
type: string
description: "Blocking scope: request-scoped or global. Default: global."
enum:
- request
- global
max_retry_after_seconds:
type: integer
description: "Maximum Retry-After value honored in seconds. Default: 300."
minimum: 1
additionalProperties: false
max_retry_duration_ms:
type: integer
description: "Maximum total time in milliseconds for all retry attempts combined. Timer starts on first retry."
minimum: 0
additionalProperties: false
additionalProperties: false
required:
- model

81
crates/Cargo.lock generated
View file

@ -293,7 +293,16 @@ version = "0.5.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1"
dependencies = [
"bit-vec",
"bit-vec 0.6.3",
]
[[package]]
name = "bit-set"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3"
dependencies = [
"bit-vec 0.8.0",
]
[[package]]
@ -302,6 +311,12 @@ version = "0.6.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb"
[[package]]
name = "bit-vec"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7"
[[package]]
name = "bitflags"
version = "2.11.0"
@ -528,6 +543,7 @@ dependencies = [
"hyper 1.9.0",
"log",
"pretty_assertions",
"proptest",
"proxy-wasm",
"rand 0.8.5",
"serde",
@ -928,7 +944,7 @@ version = "0.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7493d4c459da9f84325ad297371a6b2b8a162800873a22e3b6b6512e61d18c05"
dependencies = [
"bit-set",
"bit-set 0.5.3",
"regex",
]
@ -2527,6 +2543,25 @@ dependencies = [
"thiserror 1.0.69",
]
[[package]]
name = "proptest"
version = "1.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4b45fcc2344c680f5025fe57779faef368840d0bd1f42f216291f0dc4ace4744"
dependencies = [
"bit-set 0.8.0",
"bit-vec 0.8.0",
"bitflags",
"num-traits",
"rand 0.9.4",
"rand_chacha 0.9.0",
"rand_xorshift",
"regex-syntax",
"rusty-fork",
"tempfile",
"unarray",
]
[[package]]
name = "prost"
version = "0.14.3"
@ -2575,6 +2610,12 @@ dependencies = [
"winapi",
]
[[package]]
name = "quick-error"
version = "1.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0"
[[package]]
name = "quinn"
version = "0.11.9"
@ -2727,6 +2768,15 @@ version = "0.10.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "63b8176103e19a2643978565ca18b50549f6101881c443590420e4dc998a3c69"
[[package]]
name = "rand_xorshift"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "513962919efc330f829edb2535844d1b912b0fbe2ca165d613e4e8788bb05a5a"
dependencies = [
"rand_core 0.9.5",
]
[[package]]
name = "raw-cpuid"
version = "11.6.0"
@ -3056,6 +3106,18 @@ version = "1.0.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d"
[[package]]
name = "rusty-fork"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cc6bf79ff24e648f6da1f8d1f011e9cac26491b619e6b9280f2b47f1774e6ee2"
dependencies = [
"fnv",
"quick-error",
"tempfile",
"wait-timeout",
]
[[package]]
name = "ryu"
version = "1.0.23"
@ -3984,6 +4046,12 @@ version = "1.19.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb"
[[package]]
name = "unarray"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eaea85b334db583fe3274d12b4cd1880032beab409c0d774be044d4480ab9a94"
[[package]]
name = "unicase"
version = "2.9.0"
@ -4133,6 +4201,15 @@ version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c3082ca00d5a5ef149bb8b555a72ae84c9c59f7250f013ac822ac2e49b19c64"
[[package]]
name = "wait-timeout"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09ac3b126d3914f9849036f826e054cbabdc8519970b8998ddaf3b5bd3c65f11"
dependencies = [
"libc",
]
[[package]]
name = "want"
version = "0.3.1"

View file

@ -36,3 +36,4 @@ tokio = { version = "1.44", features = ["sync", "time", "macros", "rt"] }
hyper = { version = "1.0", features = ["full"] }
bytes = "1.0"
http-body-util = "0.1"
proptest = "1.4"

View file

@ -0,0 +1,7 @@
# Seeds for failure cases proptest has generated in the past. It is
# automatically read and these particular cases re-run before any
# novel cases are generated.
#
# It is recommended to check this file in to source control so that
# everyone who runs the test benefits from these saved cases.
cc e6443c9611ecf84b57514e7d12084d62e6558989f663f1106d3cedd746a20bf3 # shrinks to include_on_status_codes = false, include_backoff = true, include_retry_after = false, include_on_timeout = false, include_on_high_latency = false

File diff suppressed because it is too large Load diff

View file

@ -278,6 +278,7 @@ mod tests {
stream: None,
passthrough_auth: None,
headers: None,
retry_policy: None,
}
}