mirror of
https://github.com/katanemo/plano.git
synced 2026-06-08 14:55:14 +02:00
Merge 18dbbd25f7 into 554a3d1f6a
This commit is contained in:
commit
3f2e423e68
6 changed files with 1460 additions and 3 deletions
|
|
@ -213,6 +213,183 @@ properties:
|
|||
required:
|
||||
- name
|
||||
- description
|
||||
retry_policy:
|
||||
type: object
|
||||
description: "Retry policy configuration. When not specified, no retry logic is enabled."
|
||||
properties:
|
||||
fallback_models:
|
||||
type: array
|
||||
description: "Ordered list of model identifiers to fallback to before using Provider_List."
|
||||
items:
|
||||
type: string
|
||||
default_strategy:
|
||||
type: string
|
||||
description: "Default retry strategy for unconfigured status codes. Default: different_provider."
|
||||
enum:
|
||||
- same_model
|
||||
- same_provider
|
||||
- different_provider
|
||||
default_max_attempts:
|
||||
type: integer
|
||||
description: "Default max retry attempts for unconfigured status codes. Default: 2."
|
||||
minimum: 0
|
||||
on_status_codes:
|
||||
type: array
|
||||
description: "Per-status-code retry configuration."
|
||||
items:
|
||||
type: object
|
||||
properties:
|
||||
codes:
|
||||
type: array
|
||||
description: "List of status codes as integers or range strings (e.g. '502-504')."
|
||||
items:
|
||||
anyOf:
|
||||
- type: integer
|
||||
minimum: 100
|
||||
maximum: 599
|
||||
- type: string
|
||||
description: "Range string in 'start-end' format (e.g. '502-504')."
|
||||
strategy:
|
||||
type: string
|
||||
description: "Retry strategy for these status codes."
|
||||
enum:
|
||||
- same_model
|
||||
- same_provider
|
||||
- different_provider
|
||||
max_attempts:
|
||||
type: integer
|
||||
description: "Max retry attempts for these status codes."
|
||||
minimum: 0
|
||||
additionalProperties: false
|
||||
required:
|
||||
- codes
|
||||
- strategy
|
||||
- max_attempts
|
||||
on_timeout:
|
||||
type: object
|
||||
description: "Timeout-specific retry configuration. When omitted, timeouts use default_strategy and default_max_attempts."
|
||||
properties:
|
||||
strategy:
|
||||
type: string
|
||||
description: "Retry strategy for timeout errors."
|
||||
enum:
|
||||
- same_model
|
||||
- same_provider
|
||||
- different_provider
|
||||
max_attempts:
|
||||
type: integer
|
||||
description: "Max retry attempts for timeout errors."
|
||||
minimum: 1
|
||||
additionalProperties: false
|
||||
required:
|
||||
- strategy
|
||||
- max_attempts
|
||||
on_high_latency:
|
||||
type: object
|
||||
description: "High latency proactive failover configuration. When omitted, no latency-based failover is performed."
|
||||
properties:
|
||||
threshold_ms:
|
||||
type: integer
|
||||
description: "Latency threshold in milliseconds. When response time exceeds this value, a High_Latency_Event is triggered."
|
||||
minimum: 1
|
||||
measure:
|
||||
type: string
|
||||
description: "What latency metric to measure. Default: ttfb."
|
||||
enum:
|
||||
- ttfb
|
||||
- total
|
||||
strategy:
|
||||
type: string
|
||||
description: "Retry strategy when latency threshold is exceeded."
|
||||
enum:
|
||||
- same_model
|
||||
- same_provider
|
||||
- different_provider
|
||||
max_attempts:
|
||||
type: integer
|
||||
description: "Max retry attempts when latency threshold is exceeded."
|
||||
minimum: 1
|
||||
block_duration_seconds:
|
||||
type: integer
|
||||
description: "How long to block the model/provider after detecting high latency, in seconds. Default: 300."
|
||||
minimum: 1
|
||||
scope:
|
||||
type: string
|
||||
description: "What to block: model-level or provider-level. Default: model."
|
||||
enum:
|
||||
- model
|
||||
- provider
|
||||
apply_to:
|
||||
type: string
|
||||
description: "Blocking scope: global or request-scoped. Default: global."
|
||||
enum:
|
||||
- global
|
||||
- request
|
||||
min_triggers:
|
||||
type: integer
|
||||
description: "Number of High_Latency_Events required before creating a block. Default: 1."
|
||||
minimum: 1
|
||||
trigger_window_seconds:
|
||||
type: integer
|
||||
description: "Sliding time window in seconds for counting triggers. Required when min_triggers > 1."
|
||||
minimum: 1
|
||||
additionalProperties: false
|
||||
required:
|
||||
- threshold_ms
|
||||
- strategy
|
||||
- max_attempts
|
||||
- block_duration_seconds
|
||||
backoff:
|
||||
type: object
|
||||
description: "Exponential backoff configuration. When omitted, no backoff delays are applied."
|
||||
properties:
|
||||
apply_to:
|
||||
type: string
|
||||
description: "REQUIRED. Determines when backoff delays are applied."
|
||||
enum:
|
||||
- same_model
|
||||
- same_provider
|
||||
- global
|
||||
base_ms:
|
||||
type: integer
|
||||
description: "Base delay in milliseconds for exponential backoff. Default: 100."
|
||||
minimum: 1
|
||||
max_ms:
|
||||
type: integer
|
||||
description: "Maximum delay in milliseconds for exponential backoff. Default: 5000."
|
||||
minimum: 1
|
||||
jitter:
|
||||
type: boolean
|
||||
description: "Add random jitter to prevent thundering herd. Default: true."
|
||||
additionalProperties: false
|
||||
required:
|
||||
- apply_to
|
||||
retry_after_handling:
|
||||
type: object
|
||||
description: "Retry-After header handling customization. When omitted, Retry-After is honored with defaults (scope: model, apply_to: global, max_retry_after_seconds: 300)."
|
||||
properties:
|
||||
scope:
|
||||
type: string
|
||||
description: "What to block: model-level or provider-level. Default: model."
|
||||
enum:
|
||||
- model
|
||||
- provider
|
||||
apply_to:
|
||||
type: string
|
||||
description: "Blocking scope: request-scoped or global. Default: global."
|
||||
enum:
|
||||
- request
|
||||
- global
|
||||
max_retry_after_seconds:
|
||||
type: integer
|
||||
description: "Maximum Retry-After value honored in seconds. Default: 300."
|
||||
minimum: 1
|
||||
additionalProperties: false
|
||||
max_retry_duration_ms:
|
||||
type: integer
|
||||
description: "Maximum total time in milliseconds for all retry attempts combined. Timer starts on first retry."
|
||||
minimum: 0
|
||||
additionalProperties: false
|
||||
additionalProperties: false
|
||||
required:
|
||||
- model
|
||||
|
|
@ -271,6 +448,183 @@ properties:
|
|||
required:
|
||||
- name
|
||||
- description
|
||||
retry_policy:
|
||||
type: object
|
||||
description: "Retry policy configuration. When not specified, no retry logic is enabled."
|
||||
properties:
|
||||
fallback_models:
|
||||
type: array
|
||||
description: "Ordered list of model identifiers to fallback to before using Provider_List."
|
||||
items:
|
||||
type: string
|
||||
default_strategy:
|
||||
type: string
|
||||
description: "Default retry strategy for unconfigured status codes. Default: different_provider."
|
||||
enum:
|
||||
- same_model
|
||||
- same_provider
|
||||
- different_provider
|
||||
default_max_attempts:
|
||||
type: integer
|
||||
description: "Default max retry attempts for unconfigured status codes. Default: 2."
|
||||
minimum: 0
|
||||
on_status_codes:
|
||||
type: array
|
||||
description: "Per-status-code retry configuration."
|
||||
items:
|
||||
type: object
|
||||
properties:
|
||||
codes:
|
||||
type: array
|
||||
description: "List of status codes as integers or range strings (e.g. '502-504')."
|
||||
items:
|
||||
anyOf:
|
||||
- type: integer
|
||||
minimum: 100
|
||||
maximum: 599
|
||||
- type: string
|
||||
description: "Range string in 'start-end' format (e.g. '502-504')."
|
||||
strategy:
|
||||
type: string
|
||||
description: "Retry strategy for these status codes."
|
||||
enum:
|
||||
- same_model
|
||||
- same_provider
|
||||
- different_provider
|
||||
max_attempts:
|
||||
type: integer
|
||||
description: "Max retry attempts for these status codes."
|
||||
minimum: 0
|
||||
additionalProperties: false
|
||||
required:
|
||||
- codes
|
||||
- strategy
|
||||
- max_attempts
|
||||
on_timeout:
|
||||
type: object
|
||||
description: "Timeout-specific retry configuration. When omitted, timeouts use default_strategy and default_max_attempts."
|
||||
properties:
|
||||
strategy:
|
||||
type: string
|
||||
description: "Retry strategy for timeout errors."
|
||||
enum:
|
||||
- same_model
|
||||
- same_provider
|
||||
- different_provider
|
||||
max_attempts:
|
||||
type: integer
|
||||
description: "Max retry attempts for timeout errors."
|
||||
minimum: 1
|
||||
additionalProperties: false
|
||||
required:
|
||||
- strategy
|
||||
- max_attempts
|
||||
on_high_latency:
|
||||
type: object
|
||||
description: "High latency proactive failover configuration. When omitted, no latency-based failover is performed."
|
||||
properties:
|
||||
threshold_ms:
|
||||
type: integer
|
||||
description: "Latency threshold in milliseconds. When response time exceeds this value, a High_Latency_Event is triggered."
|
||||
minimum: 1
|
||||
measure:
|
||||
type: string
|
||||
description: "What latency metric to measure. Default: ttfb."
|
||||
enum:
|
||||
- ttfb
|
||||
- total
|
||||
strategy:
|
||||
type: string
|
||||
description: "Retry strategy when latency threshold is exceeded."
|
||||
enum:
|
||||
- same_model
|
||||
- same_provider
|
||||
- different_provider
|
||||
max_attempts:
|
||||
type: integer
|
||||
description: "Max retry attempts when latency threshold is exceeded."
|
||||
minimum: 1
|
||||
block_duration_seconds:
|
||||
type: integer
|
||||
description: "How long to block the model/provider after detecting high latency, in seconds. Default: 300."
|
||||
minimum: 1
|
||||
scope:
|
||||
type: string
|
||||
description: "What to block: model-level or provider-level. Default: model."
|
||||
enum:
|
||||
- model
|
||||
- provider
|
||||
apply_to:
|
||||
type: string
|
||||
description: "Blocking scope: global or request-scoped. Default: global."
|
||||
enum:
|
||||
- global
|
||||
- request
|
||||
min_triggers:
|
||||
type: integer
|
||||
description: "Number of High_Latency_Events required before creating a block. Default: 1."
|
||||
minimum: 1
|
||||
trigger_window_seconds:
|
||||
type: integer
|
||||
description: "Sliding time window in seconds for counting triggers. Required when min_triggers > 1."
|
||||
minimum: 1
|
||||
additionalProperties: false
|
||||
required:
|
||||
- threshold_ms
|
||||
- strategy
|
||||
- max_attempts
|
||||
- block_duration_seconds
|
||||
backoff:
|
||||
type: object
|
||||
description: "Exponential backoff configuration. When omitted, no backoff delays are applied."
|
||||
properties:
|
||||
apply_to:
|
||||
type: string
|
||||
description: "REQUIRED. Determines when backoff delays are applied."
|
||||
enum:
|
||||
- same_model
|
||||
- same_provider
|
||||
- global
|
||||
base_ms:
|
||||
type: integer
|
||||
description: "Base delay in milliseconds for exponential backoff. Default: 100."
|
||||
minimum: 1
|
||||
max_ms:
|
||||
type: integer
|
||||
description: "Maximum delay in milliseconds for exponential backoff. Default: 5000."
|
||||
minimum: 1
|
||||
jitter:
|
||||
type: boolean
|
||||
description: "Add random jitter to prevent thundering herd. Default: true."
|
||||
additionalProperties: false
|
||||
required:
|
||||
- apply_to
|
||||
retry_after_handling:
|
||||
type: object
|
||||
description: "Retry-After header handling customization. When omitted, Retry-After is honored with defaults (scope: model, apply_to: global, max_retry_after_seconds: 300)."
|
||||
properties:
|
||||
scope:
|
||||
type: string
|
||||
description: "What to block: model-level or provider-level. Default: model."
|
||||
enum:
|
||||
- model
|
||||
- provider
|
||||
apply_to:
|
||||
type: string
|
||||
description: "Blocking scope: request-scoped or global. Default: global."
|
||||
enum:
|
||||
- request
|
||||
- global
|
||||
max_retry_after_seconds:
|
||||
type: integer
|
||||
description: "Maximum Retry-After value honored in seconds. Default: 300."
|
||||
minimum: 1
|
||||
additionalProperties: false
|
||||
max_retry_duration_ms:
|
||||
type: integer
|
||||
description: "Maximum total time in milliseconds for all retry attempts combined. Timer starts on first retry."
|
||||
minimum: 0
|
||||
additionalProperties: false
|
||||
additionalProperties: false
|
||||
required:
|
||||
- model
|
||||
|
|
|
|||
81
crates/Cargo.lock
generated
81
crates/Cargo.lock
generated
|
|
@ -293,7 +293,16 @@ version = "0.5.3"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1"
|
||||
dependencies = [
|
||||
"bit-vec",
|
||||
"bit-vec 0.6.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bit-set"
|
||||
version = "0.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3"
|
||||
dependencies = [
|
||||
"bit-vec 0.8.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -302,6 +311,12 @@ version = "0.6.3"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb"
|
||||
|
||||
[[package]]
|
||||
name = "bit-vec"
|
||||
version = "0.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7"
|
||||
|
||||
[[package]]
|
||||
name = "bitflags"
|
||||
version = "2.11.0"
|
||||
|
|
@ -528,6 +543,7 @@ dependencies = [
|
|||
"hyper 1.9.0",
|
||||
"log",
|
||||
"pretty_assertions",
|
||||
"proptest",
|
||||
"proxy-wasm",
|
||||
"rand 0.8.5",
|
||||
"serde",
|
||||
|
|
@ -928,7 +944,7 @@ version = "0.12.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7493d4c459da9f84325ad297371a6b2b8a162800873a22e3b6b6512e61d18c05"
|
||||
dependencies = [
|
||||
"bit-set",
|
||||
"bit-set 0.5.3",
|
||||
"regex",
|
||||
]
|
||||
|
||||
|
|
@ -2527,6 +2543,25 @@ dependencies = [
|
|||
"thiserror 1.0.69",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "proptest"
|
||||
version = "1.11.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4b45fcc2344c680f5025fe57779faef368840d0bd1f42f216291f0dc4ace4744"
|
||||
dependencies = [
|
||||
"bit-set 0.8.0",
|
||||
"bit-vec 0.8.0",
|
||||
"bitflags",
|
||||
"num-traits",
|
||||
"rand 0.9.4",
|
||||
"rand_chacha 0.9.0",
|
||||
"rand_xorshift",
|
||||
"regex-syntax",
|
||||
"rusty-fork",
|
||||
"tempfile",
|
||||
"unarray",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "prost"
|
||||
version = "0.14.3"
|
||||
|
|
@ -2575,6 +2610,12 @@ dependencies = [
|
|||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quick-error"
|
||||
version = "1.2.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0"
|
||||
|
||||
[[package]]
|
||||
name = "quinn"
|
||||
version = "0.11.9"
|
||||
|
|
@ -2727,6 +2768,15 @@ version = "0.10.1"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "63b8176103e19a2643978565ca18b50549f6101881c443590420e4dc998a3c69"
|
||||
|
||||
[[package]]
|
||||
name = "rand_xorshift"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "513962919efc330f829edb2535844d1b912b0fbe2ca165d613e4e8788bb05a5a"
|
||||
dependencies = [
|
||||
"rand_core 0.9.5",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "raw-cpuid"
|
||||
version = "11.6.0"
|
||||
|
|
@ -3056,6 +3106,18 @@ version = "1.0.22"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d"
|
||||
|
||||
[[package]]
|
||||
name = "rusty-fork"
|
||||
version = "0.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cc6bf79ff24e648f6da1f8d1f011e9cac26491b619e6b9280f2b47f1774e6ee2"
|
||||
dependencies = [
|
||||
"fnv",
|
||||
"quick-error",
|
||||
"tempfile",
|
||||
"wait-timeout",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ryu"
|
||||
version = "1.0.23"
|
||||
|
|
@ -3984,6 +4046,12 @@ version = "1.19.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb"
|
||||
|
||||
[[package]]
|
||||
name = "unarray"
|
||||
version = "0.1.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "eaea85b334db583fe3274d12b4cd1880032beab409c0d774be044d4480ab9a94"
|
||||
|
||||
[[package]]
|
||||
name = "unicase"
|
||||
version = "2.9.0"
|
||||
|
|
@ -4133,6 +4201,15 @@ version = "0.8.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5c3082ca00d5a5ef149bb8b555a72ae84c9c59f7250f013ac822ac2e49b19c64"
|
||||
|
||||
[[package]]
|
||||
name = "wait-timeout"
|
||||
version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "09ac3b126d3914f9849036f826e054cbabdc8519970b8998ddaf3b5bd3c65f11"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "want"
|
||||
version = "0.3.1"
|
||||
|
|
|
|||
|
|
@ -36,3 +36,4 @@ tokio = { version = "1.44", features = ["sync", "time", "macros", "rt"] }
|
|||
hyper = { version = "1.0", features = ["full"] }
|
||||
bytes = "1.0"
|
||||
http-body-util = "0.1"
|
||||
proptest = "1.4"
|
||||
|
|
|
|||
7
crates/common/proptest-regressions/configuration.txt
Normal file
7
crates/common/proptest-regressions/configuration.txt
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
# Seeds for failure cases proptest has generated in the past. It is
|
||||
# automatically read and these particular cases re-run before any
|
||||
# novel cases are generated.
|
||||
#
|
||||
# It is recommended to check this file in to source control so that
|
||||
# everyone who runs the test benefits from these saved cases.
|
||||
cc e6443c9611ecf84b57514e7d12084d62e6558989f663f1106d3cedd746a20bf3 # shrinks to include_on_status_codes = false, include_backoff = true, include_retry_after = false, include_on_timeout = false, include_on_high_latency = false
|
||||
File diff suppressed because it is too large
Load diff
|
|
@ -278,6 +278,7 @@ mod tests {
|
|||
stream: None,
|
||||
passthrough_auth: None,
|
||||
headers: None,
|
||||
retry_policy: None,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue