diff --git a/config/plano_config_schema.yaml b/config/plano_config_schema.yaml index 9560b437..7de77db2 100644 --- a/config/plano_config_schema.yaml +++ b/config/plano_config_schema.yaml @@ -213,6 +213,183 @@ properties: required: - name - description + retry_policy: + type: object + description: "Retry policy configuration. When not specified, no retry logic is enabled." + properties: + fallback_models: + type: array + description: "Ordered list of model identifiers to fallback to before using Provider_List." + items: + type: string + default_strategy: + type: string + description: "Default retry strategy for unconfigured status codes. Default: different_provider." + enum: + - same_model + - same_provider + - different_provider + default_max_attempts: + type: integer + description: "Default max retry attempts for unconfigured status codes. Default: 2." + minimum: 0 + on_status_codes: + type: array + description: "Per-status-code retry configuration." + items: + type: object + properties: + codes: + type: array + description: "List of status codes as integers or range strings (e.g. '502-504')." + items: + anyOf: + - type: integer + minimum: 100 + maximum: 599 + - type: string + description: "Range string in 'start-end' format (e.g. '502-504')." + strategy: + type: string + description: "Retry strategy for these status codes." + enum: + - same_model + - same_provider + - different_provider + max_attempts: + type: integer + description: "Max retry attempts for these status codes." + minimum: 0 + additionalProperties: false + required: + - codes + - strategy + - max_attempts + on_timeout: + type: object + description: "Timeout-specific retry configuration. When omitted, timeouts use default_strategy and default_max_attempts." + properties: + strategy: + type: string + description: "Retry strategy for timeout errors." + enum: + - same_model + - same_provider + - different_provider + max_attempts: + type: integer + description: "Max retry attempts for timeout errors." + minimum: 1 + additionalProperties: false + required: + - strategy + - max_attempts + on_high_latency: + type: object + description: "High latency proactive failover configuration. When omitted, no latency-based failover is performed." + properties: + threshold_ms: + type: integer + description: "Latency threshold in milliseconds. When response time exceeds this value, a High_Latency_Event is triggered." + minimum: 1 + measure: + type: string + description: "What latency metric to measure. Default: ttfb." + enum: + - ttfb + - total + strategy: + type: string + description: "Retry strategy when latency threshold is exceeded." + enum: + - same_model + - same_provider + - different_provider + max_attempts: + type: integer + description: "Max retry attempts when latency threshold is exceeded." + minimum: 1 + block_duration_seconds: + type: integer + description: "How long to block the model/provider after detecting high latency, in seconds. Default: 300." + minimum: 1 + scope: + type: string + description: "What to block: model-level or provider-level. Default: model." + enum: + - model + - provider + apply_to: + type: string + description: "Blocking scope: global or request-scoped. Default: global." + enum: + - global + - request + min_triggers: + type: integer + description: "Number of High_Latency_Events required before creating a block. Default: 1." + minimum: 1 + trigger_window_seconds: + type: integer + description: "Sliding time window in seconds for counting triggers. Required when min_triggers > 1." + minimum: 1 + additionalProperties: false + required: + - threshold_ms + - strategy + - max_attempts + - block_duration_seconds + backoff: + type: object + description: "Exponential backoff configuration. When omitted, no backoff delays are applied." + properties: + apply_to: + type: string + description: "REQUIRED. Determines when backoff delays are applied." + enum: + - same_model + - same_provider + - global + base_ms: + type: integer + description: "Base delay in milliseconds for exponential backoff. Default: 100." + minimum: 1 + max_ms: + type: integer + description: "Maximum delay in milliseconds for exponential backoff. Default: 5000." + minimum: 1 + jitter: + type: boolean + description: "Add random jitter to prevent thundering herd. Default: true." + additionalProperties: false + required: + - apply_to + retry_after_handling: + type: object + description: "Retry-After header handling customization. When omitted, Retry-After is honored with defaults (scope: model, apply_to: global, max_retry_after_seconds: 300)." + properties: + scope: + type: string + description: "What to block: model-level or provider-level. Default: model." + enum: + - model + - provider + apply_to: + type: string + description: "Blocking scope: request-scoped or global. Default: global." + enum: + - request + - global + max_retry_after_seconds: + type: integer + description: "Maximum Retry-After value honored in seconds. Default: 300." + minimum: 1 + additionalProperties: false + max_retry_duration_ms: + type: integer + description: "Maximum total time in milliseconds for all retry attempts combined. Timer starts on first retry." + minimum: 0 + additionalProperties: false additionalProperties: false required: - model @@ -271,6 +448,183 @@ properties: required: - name - description + retry_policy: + type: object + description: "Retry policy configuration. When not specified, no retry logic is enabled." + properties: + fallback_models: + type: array + description: "Ordered list of model identifiers to fallback to before using Provider_List." + items: + type: string + default_strategy: + type: string + description: "Default retry strategy for unconfigured status codes. Default: different_provider." + enum: + - same_model + - same_provider + - different_provider + default_max_attempts: + type: integer + description: "Default max retry attempts for unconfigured status codes. Default: 2." + minimum: 0 + on_status_codes: + type: array + description: "Per-status-code retry configuration." + items: + type: object + properties: + codes: + type: array + description: "List of status codes as integers or range strings (e.g. '502-504')." + items: + anyOf: + - type: integer + minimum: 100 + maximum: 599 + - type: string + description: "Range string in 'start-end' format (e.g. '502-504')." + strategy: + type: string + description: "Retry strategy for these status codes." + enum: + - same_model + - same_provider + - different_provider + max_attempts: + type: integer + description: "Max retry attempts for these status codes." + minimum: 0 + additionalProperties: false + required: + - codes + - strategy + - max_attempts + on_timeout: + type: object + description: "Timeout-specific retry configuration. When omitted, timeouts use default_strategy and default_max_attempts." + properties: + strategy: + type: string + description: "Retry strategy for timeout errors." + enum: + - same_model + - same_provider + - different_provider + max_attempts: + type: integer + description: "Max retry attempts for timeout errors." + minimum: 1 + additionalProperties: false + required: + - strategy + - max_attempts + on_high_latency: + type: object + description: "High latency proactive failover configuration. When omitted, no latency-based failover is performed." + properties: + threshold_ms: + type: integer + description: "Latency threshold in milliseconds. When response time exceeds this value, a High_Latency_Event is triggered." + minimum: 1 + measure: + type: string + description: "What latency metric to measure. Default: ttfb." + enum: + - ttfb + - total + strategy: + type: string + description: "Retry strategy when latency threshold is exceeded." + enum: + - same_model + - same_provider + - different_provider + max_attempts: + type: integer + description: "Max retry attempts when latency threshold is exceeded." + minimum: 1 + block_duration_seconds: + type: integer + description: "How long to block the model/provider after detecting high latency, in seconds. Default: 300." + minimum: 1 + scope: + type: string + description: "What to block: model-level or provider-level. Default: model." + enum: + - model + - provider + apply_to: + type: string + description: "Blocking scope: global or request-scoped. Default: global." + enum: + - global + - request + min_triggers: + type: integer + description: "Number of High_Latency_Events required before creating a block. Default: 1." + minimum: 1 + trigger_window_seconds: + type: integer + description: "Sliding time window in seconds for counting triggers. Required when min_triggers > 1." + minimum: 1 + additionalProperties: false + required: + - threshold_ms + - strategy + - max_attempts + - block_duration_seconds + backoff: + type: object + description: "Exponential backoff configuration. When omitted, no backoff delays are applied." + properties: + apply_to: + type: string + description: "REQUIRED. Determines when backoff delays are applied." + enum: + - same_model + - same_provider + - global + base_ms: + type: integer + description: "Base delay in milliseconds for exponential backoff. Default: 100." + minimum: 1 + max_ms: + type: integer + description: "Maximum delay in milliseconds for exponential backoff. Default: 5000." + minimum: 1 + jitter: + type: boolean + description: "Add random jitter to prevent thundering herd. Default: true." + additionalProperties: false + required: + - apply_to + retry_after_handling: + type: object + description: "Retry-After header handling customization. When omitted, Retry-After is honored with defaults (scope: model, apply_to: global, max_retry_after_seconds: 300)." + properties: + scope: + type: string + description: "What to block: model-level or provider-level. Default: model." + enum: + - model + - provider + apply_to: + type: string + description: "Blocking scope: request-scoped or global. Default: global." + enum: + - request + - global + max_retry_after_seconds: + type: integer + description: "Maximum Retry-After value honored in seconds. Default: 300." + minimum: 1 + additionalProperties: false + max_retry_duration_ms: + type: integer + description: "Maximum total time in milliseconds for all retry attempts combined. Timer starts on first retry." + minimum: 0 + additionalProperties: false additionalProperties: false required: - model diff --git a/crates/Cargo.lock b/crates/Cargo.lock index c5819de9..27511878 100644 --- a/crates/Cargo.lock +++ b/crates/Cargo.lock @@ -293,7 +293,16 @@ version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1" dependencies = [ - "bit-vec", + "bit-vec 0.6.3", +] + +[[package]] +name = "bit-set" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3" +dependencies = [ + "bit-vec 0.8.0", ] [[package]] @@ -302,6 +311,12 @@ version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb" +[[package]] +name = "bit-vec" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7" + [[package]] name = "bitflags" version = "2.11.0" @@ -528,6 +543,7 @@ dependencies = [ "hyper 1.9.0", "log", "pretty_assertions", + "proptest", "proxy-wasm", "rand 0.8.5", "serde", @@ -928,7 +944,7 @@ version = "0.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7493d4c459da9f84325ad297371a6b2b8a162800873a22e3b6b6512e61d18c05" dependencies = [ - "bit-set", + "bit-set 0.5.3", "regex", ] @@ -2527,6 +2543,25 @@ dependencies = [ "thiserror 1.0.69", ] +[[package]] +name = "proptest" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b45fcc2344c680f5025fe57779faef368840d0bd1f42f216291f0dc4ace4744" +dependencies = [ + "bit-set 0.8.0", + "bit-vec 0.8.0", + "bitflags", + "num-traits", + "rand 0.9.4", + "rand_chacha 0.9.0", + "rand_xorshift", + "regex-syntax", + "rusty-fork", + "tempfile", + "unarray", +] + [[package]] name = "prost" version = "0.14.3" @@ -2575,6 +2610,12 @@ dependencies = [ "winapi", ] +[[package]] +name = "quick-error" +version = "1.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" + [[package]] name = "quinn" version = "0.11.9" @@ -2727,6 +2768,15 @@ version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "63b8176103e19a2643978565ca18b50549f6101881c443590420e4dc998a3c69" +[[package]] +name = "rand_xorshift" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "513962919efc330f829edb2535844d1b912b0fbe2ca165d613e4e8788bb05a5a" +dependencies = [ + "rand_core 0.9.5", +] + [[package]] name = "raw-cpuid" version = "11.6.0" @@ -3056,6 +3106,18 @@ version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" +[[package]] +name = "rusty-fork" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc6bf79ff24e648f6da1f8d1f011e9cac26491b619e6b9280f2b47f1774e6ee2" +dependencies = [ + "fnv", + "quick-error", + "tempfile", + "wait-timeout", +] + [[package]] name = "ryu" version = "1.0.23" @@ -3984,6 +4046,12 @@ version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb" +[[package]] +name = "unarray" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eaea85b334db583fe3274d12b4cd1880032beab409c0d774be044d4480ab9a94" + [[package]] name = "unicase" version = "2.9.0" @@ -4133,6 +4201,15 @@ version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5c3082ca00d5a5ef149bb8b555a72ae84c9c59f7250f013ac822ac2e49b19c64" +[[package]] +name = "wait-timeout" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ac3b126d3914f9849036f826e054cbabdc8519970b8998ddaf3b5bd3c65f11" +dependencies = [ + "libc", +] + [[package]] name = "want" version = "0.3.1" diff --git a/crates/common/Cargo.toml b/crates/common/Cargo.toml index dd2cba15..0a5e5651 100644 --- a/crates/common/Cargo.toml +++ b/crates/common/Cargo.toml @@ -36,3 +36,4 @@ tokio = { version = "1.44", features = ["sync", "time", "macros", "rt"] } hyper = { version = "1.0", features = ["full"] } bytes = "1.0" http-body-util = "0.1" +proptest = "1.4" diff --git a/crates/common/proptest-regressions/configuration.txt b/crates/common/proptest-regressions/configuration.txt new file mode 100644 index 00000000..1382b74e --- /dev/null +++ b/crates/common/proptest-regressions/configuration.txt @@ -0,0 +1,7 @@ +# Seeds for failure cases proptest has generated in the past. It is +# automatically read and these particular cases re-run before any +# novel cases are generated. +# +# It is recommended to check this file in to source control so that +# everyone who runs the test benefits from these saved cases. +cc e6443c9611ecf84b57514e7d12084d62e6558989f663f1106d3cedd746a20bf3 # shrinks to include_on_status_codes = false, include_backoff = true, include_retry_after = false, include_on_timeout = false, include_on_high_latency = false diff --git a/crates/common/src/configuration.rs b/crates/common/src/configuration.rs index 37492904..2fed0290 100644 --- a/crates/common/src/configuration.rs +++ b/crates/common/src/configuration.rs @@ -474,6 +474,225 @@ impl serde::Serialize for OrchestrationPreference { } } +// ── Retry Policy Configuration Types ────────────────────────────────────────── + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum RetryStrategy { + SameModel, + SameProvider, + DifferentProvider, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum BlockScope { + Model, + Provider, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum ApplyTo { + Global, + Request, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum BackoffApplyTo { + SameModel, + SameProvider, + Global, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum LatencyMeasure { + Ttfb, + Total, +} + +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +#[serde(untagged)] +pub enum StatusCodeEntry { + Single(u16), + Range(String), +} + +impl StatusCodeEntry { + /// Expand a StatusCodeEntry into a list of individual status codes. + /// For Single, returns a vec with one element. + /// For Range (e.g. "502-504"), returns [502, 503, 504]. + pub fn expand(&self) -> Result, String> { + match self { + StatusCodeEntry::Single(code) => Ok(vec![*code]), + StatusCodeEntry::Range(range_str) => { + let parts: Vec<&str> = range_str.split('-').collect(); + if parts.len() != 2 { + return Err(format!( + "Invalid status code range format: '{}'. Expected 'start-end'.", + range_str + )); + } + let start: u16 = parts[0] + .trim() + .parse() + .map_err(|_| format!("Invalid start in status code range: '{}'", parts[0]))?; + let end: u16 = parts[1] + .trim() + .parse() + .map_err(|_| format!("Invalid end in status code range: '{}'", parts[1]))?; + if start > end { + return Err(format!( + "Status code range start ({}) must be <= end ({})", + start, end + )); + } + Ok((start..=end).collect()) + } + } + } +} + +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct StatusCodeConfig { + pub codes: Vec, + pub strategy: RetryStrategy, + pub max_attempts: u32, +} + +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct TimeoutRetryConfig { + pub strategy: RetryStrategy, + pub max_attempts: u32, +} + +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct BackoffConfig { + pub apply_to: BackoffApplyTo, + #[serde(default = "default_base_ms")] + pub base_ms: u64, + #[serde(default = "default_max_ms")] + pub max_ms: u64, + #[serde(default = "default_jitter")] + pub jitter: bool, +} + +fn default_base_ms() -> u64 { + 100 +} +fn default_max_ms() -> u64 { + 5000 +} +fn default_jitter() -> bool { + true +} + +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct RetryAfterHandlingConfig { + #[serde(default = "default_retry_after_scope")] + pub scope: BlockScope, + #[serde(default = "default_retry_after_apply_to")] + pub apply_to: ApplyTo, + #[serde(default = "default_max_retry_after_seconds")] + pub max_retry_after_seconds: u64, +} + +fn default_retry_after_scope() -> BlockScope { + BlockScope::Model +} +fn default_retry_after_apply_to() -> ApplyTo { + ApplyTo::Global +} +fn default_max_retry_after_seconds() -> u64 { + 300 +} + +impl Default for RetryAfterHandlingConfig { + fn default() -> Self { + Self { + scope: BlockScope::Model, + apply_to: ApplyTo::Global, + max_retry_after_seconds: 300, + } + } +} + +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct HighLatencyConfig { + pub threshold_ms: u64, + #[serde(default = "default_latency_measure")] + pub measure: LatencyMeasure, + #[serde(default = "default_min_triggers")] + pub min_triggers: u32, + pub trigger_window_seconds: Option, + pub strategy: RetryStrategy, + pub max_attempts: u32, + #[serde(default = "default_block_duration")] + pub block_duration_seconds: u64, + #[serde(default = "default_block_scope")] + pub scope: BlockScope, + #[serde(default = "default_high_latency_apply_to")] + pub apply_to: ApplyTo, +} + +fn default_latency_measure() -> LatencyMeasure { + LatencyMeasure::Ttfb +} +fn default_min_triggers() -> u32 { + 1 +} +fn default_block_duration() -> u64 { + 300 +} +fn default_block_scope() -> BlockScope { + BlockScope::Model +} +fn default_high_latency_apply_to() -> ApplyTo { + ApplyTo::Global +} + +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct RetryPolicy { + #[serde(default)] + pub fallback_models: Vec, + #[serde(default = "default_retry_strategy")] + pub default_strategy: RetryStrategy, + #[serde(default = "default_max_attempts")] + pub default_max_attempts: u32, + #[serde(default)] + pub on_status_codes: Vec, + pub on_timeout: Option, + pub on_high_latency: Option, + pub backoff: Option, + pub retry_after_handling: Option, + pub max_retry_duration_ms: Option, +} + +fn default_retry_strategy() -> RetryStrategy { + RetryStrategy::DifferentProvider +} +fn default_max_attempts() -> u32 { + 2 +} + +impl RetryPolicy { + /// Get the effective Retry-After handling config. + /// Always returns a config when retry_policy exists (Retry-After is always-on). + pub fn effective_retry_after_config(&self) -> RetryAfterHandlingConfig { + self.retry_after_handling.clone().unwrap_or_default() + } +} + +/// Extract provider prefix from a model identifier. +/// e.g., "openai/gpt-4o" -> "openai" +pub fn extract_provider(model_id: &str) -> &str { + model_id.split('/').next().unwrap_or(model_id) +} + +// ── End Retry Policy Configuration Types ───────────────────────────────────── + #[derive(Debug, Clone, Serialize, Deserialize)] //TODO: use enum for model, but if there is a new model, we need to update the code pub struct LlmProvider { @@ -492,6 +711,8 @@ pub struct LlmProvider { pub internal: Option, pub passthrough_auth: Option, pub headers: Option>, + /// Retry policy configuration. When None, retry logic is disabled. + pub retry_policy: Option, } pub trait IntoModels { @@ -536,6 +757,7 @@ impl Default for LlmProvider { internal: None, passthrough_auth: None, headers: None, + retry_policy: None, } } } @@ -654,6 +876,307 @@ mod test { use super::{IntoModels, LlmProvider, LlmProviderType}; use crate::api::open_ai::ToolType; + use proptest::prelude::*; + + // ── Proptest Strategies for Retry Config Types ───────────────────────── + + fn arb_retry_strategy() -> impl Strategy { + prop_oneof![ + Just(super::RetryStrategy::SameModel), + Just(super::RetryStrategy::SameProvider), + Just(super::RetryStrategy::DifferentProvider), + ] + } + + fn arb_block_scope() -> impl Strategy { + prop_oneof![ + Just(super::BlockScope::Model), + Just(super::BlockScope::Provider), + ] + } + + fn arb_apply_to() -> impl Strategy { + prop_oneof![Just(super::ApplyTo::Global), Just(super::ApplyTo::Request),] + } + + fn arb_backoff_apply_to() -> impl Strategy { + prop_oneof![ + Just(super::BackoffApplyTo::SameModel), + Just(super::BackoffApplyTo::SameProvider), + Just(super::BackoffApplyTo::Global), + ] + } + + fn arb_latency_measure() -> impl Strategy { + prop_oneof![ + Just(super::LatencyMeasure::Ttfb), + Just(super::LatencyMeasure::Total), + ] + } + + fn arb_status_code_entry() -> impl Strategy { + prop_oneof![ + (100u16..=599u16).prop_map(super::StatusCodeEntry::Single), + (100u16..=599u16) + .prop_flat_map(|start| (Just(start), start..=599u16)) + .prop_map(|(start, end)| super::StatusCodeEntry::Range(format!( + "{}-{}", + start, end + ))), + ] + } + + fn arb_status_code_config() -> impl Strategy { + ( + prop::collection::vec(arb_status_code_entry(), 1..=3), + arb_retry_strategy(), + 1u32..=10u32, + ) + .prop_map(|(codes, strategy, max_attempts)| super::StatusCodeConfig { + codes, + strategy, + max_attempts, + }) + } + + fn arb_timeout_retry_config() -> impl Strategy { + (arb_retry_strategy(), 1u32..=10u32).prop_map(|(strategy, max_attempts)| { + super::TimeoutRetryConfig { + strategy, + max_attempts, + } + }) + } + + fn arb_backoff_config() -> impl Strategy { + (arb_backoff_apply_to(), 1u64..=1000u64, prop::bool::ANY) + .prop_flat_map(|(apply_to, base_ms, jitter)| { + let max_ms_min = base_ms + 1; + ( + Just(apply_to), + Just(base_ms), + max_ms_min..=(base_ms + 50000), + Just(jitter), + ) + }) + .prop_map(|(apply_to, base_ms, max_ms, jitter)| super::BackoffConfig { + apply_to, + base_ms, + max_ms, + jitter, + }) + } + + fn arb_retry_after_handling_config() -> impl Strategy { + (arb_block_scope(), arb_apply_to(), 1u64..=3600u64).prop_map( + |(scope, apply_to, max_retry_after_seconds)| super::RetryAfterHandlingConfig { + scope, + apply_to, + max_retry_after_seconds, + }, + ) + } + + fn arb_high_latency_config() -> impl Strategy { + ( + 1u64..=60000u64, + arb_latency_measure(), + 1u32..=10u32, + arb_retry_strategy(), + 1u32..=10u32, + 1u64..=3600u64, + arb_block_scope(), + arb_apply_to(), + ) + .prop_map( + |( + threshold_ms, + measure, + min_triggers, + strategy, + max_attempts, + block_duration_seconds, + scope, + apply_to, + )| { + let trigger_window_seconds = if min_triggers > 1 { Some(60u64) } else { None }; + super::HighLatencyConfig { + threshold_ms, + measure, + min_triggers, + trigger_window_seconds, + strategy, + max_attempts, + block_duration_seconds, + scope, + apply_to, + } + }, + ) + } + + fn arb_retry_policy() -> impl Strategy { + ( + prop::collection::vec("[a-z]{2,6}/[a-z0-9-]{3,10}", 0..=3), + arb_retry_strategy(), + 1u32..=10u32, + prop::collection::vec(arb_status_code_config(), 0..=3), + prop::option::of(arb_timeout_retry_config()), + prop::option::of(arb_high_latency_config()), + prop::option::of(arb_backoff_config()), + prop::option::of(arb_retry_after_handling_config()), + prop::option::of(1u64..=120000u64), + ) + .prop_map( + |( + fallback_models, + default_strategy, + default_max_attempts, + on_status_codes, + on_timeout, + on_high_latency, + backoff, + retry_after_handling, + max_retry_duration_ms, + )| { + super::RetryPolicy { + fallback_models, + default_strategy, + default_max_attempts, + on_status_codes, + on_timeout, + on_high_latency, + backoff, + retry_after_handling, + max_retry_duration_ms, + } + }, + ) + } + + // ── Property Tests ───────────────────────────────────────────────────── + + // Feature: retry-on-ratelimit, Property 1: Configuration Round-Trip Parsing + // **Validates: Requirements 1.2** + proptest! { + #![proptest_config(proptest::prelude::ProptestConfig::with_cases(100))] + + /// Property 1: Configuration Round-Trip Parsing + /// Generate arbitrary valid RetryPolicy structs, serialize to YAML, + /// re-parse, and assert equivalence. + #[test] + fn prop_retry_policy_round_trip(policy in arb_retry_policy()) { + let yaml = serde_yaml::to_string(&policy) + .expect("serialization should succeed"); + let parsed: super::RetryPolicy = serde_yaml::from_str(&yaml) + .expect("deserialization should succeed"); + + // Direct structural equality — all types derive PartialEq + prop_assert_eq!(&policy, &parsed); + } + + } + + // Feature: retry-on-ratelimit, Property 2: Configuration Defaults Applied Correctly + // **Validates: Requirements 1.2** + proptest! { + #![proptest_config(proptest::prelude::ProptestConfig::with_cases(100))] + + /// Property 2: Configuration Defaults Applied Correctly + /// Generate RetryPolicy YAML with optional fields omitted, parse, + /// and assert correct defaults are applied. + #[test] + fn prop_retry_policy_defaults( + include_on_status_codes in prop::bool::ANY, + include_backoff in prop::bool::ANY, + include_retry_after in prop::bool::ANY, + include_on_timeout in prop::bool::ANY, + include_on_high_latency in prop::bool::ANY, + ) { + // Build a minimal YAML — RetryPolicy has serde defaults for all fields, + // so even an empty mapping is valid. + let mut parts: Vec = Vec::new(); + + // When we include sections, only provide required sub-fields so + // we can verify the optional sub-fields get their defaults. + if include_on_status_codes { + parts.push("on_status_codes:\n - codes: [429]\n strategy: same_model\n max_attempts: 2".to_string()); + } + if include_backoff { + parts.push("backoff:\n apply_to: global".to_string()); + } + if include_retry_after { + parts.push("retry_after_handling:\n scope: provider".to_string()); + } + if include_on_timeout { + parts.push("on_timeout:\n strategy: same_model\n max_attempts: 1".to_string()); + } + if include_on_high_latency { + parts.push("on_high_latency:\n threshold_ms: 5000\n strategy: different_provider\n max_attempts: 2".to_string()); + } + + let yaml = if parts.is_empty() { + "{}".to_string() + } else { + parts.join("\n") + }; + + let parsed: super::RetryPolicy = serde_yaml::from_str(&yaml) + .expect("deserialization should succeed"); + + // Assert top-level defaults + prop_assert_eq!(parsed.default_strategy, super::RetryStrategy::DifferentProvider); + prop_assert_eq!(parsed.default_max_attempts, 2); + prop_assert!(parsed.fallback_models.is_empty()); + prop_assert_eq!(parsed.max_retry_duration_ms, None); + + // Assert on_status_codes defaults to empty vec + if !include_on_status_codes { + prop_assert!(parsed.on_status_codes.is_empty()); + } + + // Assert backoff defaults when present + if include_backoff { + let backoff = parsed.backoff.as_ref().unwrap(); + prop_assert_eq!(backoff.base_ms, 100); + prop_assert_eq!(backoff.max_ms, 5000); + prop_assert_eq!(backoff.jitter, true); + } else { + prop_assert!(parsed.backoff.is_none()); + } + + // Assert retry_after_handling defaults when present + if include_retry_after { + let rah = parsed.retry_after_handling.as_ref().unwrap(); + prop_assert_eq!(rah.scope, super::BlockScope::Provider); // explicitly set + prop_assert_eq!(rah.apply_to, super::ApplyTo::Global); // default + prop_assert_eq!(rah.max_retry_after_seconds, 300); // default + } else { + prop_assert!(parsed.retry_after_handling.is_none()); + } + + // Assert effective_retry_after_config always returns valid defaults + let effective = parsed.effective_retry_after_config(); + if include_retry_after { + prop_assert_eq!(effective.scope, super::BlockScope::Provider); + } else { + prop_assert_eq!(effective.scope, super::BlockScope::Model); + } + prop_assert_eq!(effective.apply_to, super::ApplyTo::Global); + prop_assert_eq!(effective.max_retry_after_seconds, 300); + + // Assert high latency defaults when present + if include_on_high_latency { + let hl = parsed.on_high_latency.as_ref().unwrap(); + prop_assert_eq!(hl.measure, super::LatencyMeasure::Ttfb); // default + prop_assert_eq!(hl.min_triggers, 1); // default + prop_assert_eq!(hl.block_duration_seconds, 300); // default + prop_assert_eq!(hl.scope, super::BlockScope::Model); // default + prop_assert_eq!(hl.apply_to, super::ApplyTo::Global); // default + } + } + } + #[test] fn test_deserialize_configuration() { let ref_config = fs::read_to_string( @@ -735,6 +1258,60 @@ mod test { } } + // Feature: retry-on-ratelimit, Property 4: Status Code Range Expansion + // **Validates: Requirements 1.8** + proptest! { + #![proptest_config(proptest::prelude::ProptestConfig::with_cases(100))] + + /// Property 4: Status Code Range Expansion — degenerate range (start == end) + /// A range "N-N" should expand to a single-element vec containing N. + #[test] + fn prop_status_code_range_expansion( + code in 100u16..=599u16, + ) { + let range_str = format!("{}-{}", code, code); + let entry = super::StatusCodeEntry::Range(range_str); + let expanded = entry.expand().expect("expand should succeed for valid range"); + prop_assert_eq!(expanded.len(), 1); + prop_assert_eq!(expanded[0], code); + } + + /// Property 4: Status Code Range Expansion — Single variant + /// Generate arbitrary code (100..=599), expand, assert vec of length 1 containing that code. + #[test] + fn prop_status_code_single_expansion(code in 100u16..=599u16) { + let entry = super::StatusCodeEntry::Single(code); + let expanded = entry.expand().expect("expand should succeed for Single"); + prop_assert_eq!(expanded.len(), 1); + prop_assert_eq!(expanded[0], code); + } + } + + proptest! { + #![proptest_config(proptest::prelude::ProptestConfig::with_cases(100))] + + /// Property 4: Status Code Range Expansion — arbitrary start..=end range + /// Generate arbitrary valid range strings "start-end" (100 ≤ start ≤ end ≤ 599), + /// expand, and assert correct count and bounds. + #[test] + fn prop_status_code_range_expansion_full( + (start, end) in (100u16..=599u16).prop_flat_map(|s| (Just(s), s..=599u16)) + ) { + let range_str = format!("{}-{}", start, end); + let entry = super::StatusCodeEntry::Range(range_str); + let expanded = entry.expand().expect("expand should succeed for valid range"); + + let expected_len = (end - start + 1) as usize; + prop_assert_eq!(expanded.len(), expected_len, "length should be end - start + 1"); + prop_assert_eq!(*expanded.first().unwrap(), start, "first element should be start"); + prop_assert_eq!(*expanded.last().unwrap(), end, "last element should be end"); + + for &code in &expanded { + prop_assert!(code >= start && code <= end, "all codes should be in [start, end]"); + } + } + } + #[test] fn test_into_models_filters_internal_providers() { let providers = vec![ @@ -762,7 +1339,6 @@ mod test { assert!(model_ids.contains(&"openai-gpt4".to_string())); assert!(!model_ids.contains(&"plano-orchestrator".to_string())); } - #[test] fn test_llm_provider_type_vercel_and_openrouter_roundtrip() { // Regression: brightstaff used to reject `provider_interface: vercel` @@ -807,4 +1383,445 @@ disable_signals: false let overrides: super::Overrides = serde_yaml::from_str(yaml_missing).unwrap(); assert_eq!(overrides.disable_signals, None); } + + // ── P0 Edge Case Tests: YAML Config Pattern Parsing ──────────────────── + + /// Helper to parse a RetryPolicy from a YAML string. + fn parse_retry_policy(yaml: &str) -> super::RetryPolicy { + serde_yaml::from_str(yaml).expect("YAML should parse into RetryPolicy") + } + + #[test] + fn test_pattern1_multi_provider_failover_for_rate_limits() { + let yaml = r#" + fallback_models: [anthropic/claude-3-5-sonnet] + on_status_codes: + - codes: [429] + strategy: "different_provider" + max_attempts: 2 + "#; + let policy = parse_retry_policy(yaml); + assert_eq!(policy.fallback_models, vec!["anthropic/claude-3-5-sonnet"]); + assert_eq!(policy.on_status_codes.len(), 1); + assert_eq!( + policy.on_status_codes[0].strategy, + super::RetryStrategy::DifferentProvider + ); + assert_eq!(policy.on_status_codes[0].max_attempts, 2); + } + + #[test] + fn test_pattern2_same_provider_failover_with_model_downgrade() { + let yaml = r#" + fallback_models: [openai/gpt-4o-mini, anthropic/claude-3-5-sonnet] + on_status_codes: + - codes: [429] + strategy: "same_provider" + max_attempts: 2 + "#; + let policy = parse_retry_policy(yaml); + assert_eq!(policy.fallback_models.len(), 2); + assert_eq!( + policy.on_status_codes[0].strategy, + super::RetryStrategy::SameProvider + ); + } + + #[test] + fn test_pattern3_single_model_with_backoff_on_multiple_error_types() { + let yaml = r#" + fallback_models: [] + on_status_codes: + - codes: [429] + strategy: "same_model" + max_attempts: 3 + - codes: [503] + strategy: "same_model" + max_attempts: 3 + backoff: + apply_to: "same_model" + base_ms: 500 + "#; + let policy = parse_retry_policy(yaml); + assert!(policy.fallback_models.is_empty()); + assert_eq!(policy.on_status_codes.len(), 2); + let backoff = policy.backoff.unwrap(); + assert_eq!(backoff.apply_to, super::BackoffApplyTo::SameModel); + assert_eq!(backoff.base_ms, 500); + // max_ms defaults to 5000 + assert_eq!(backoff.max_ms, 5000); + } + + #[test] + fn test_pattern4_per_status_code_strategy_customization() { + let yaml = r#" + fallback_models: [openai/gpt-4o-mini, anthropic/claude-3-5-sonnet] + default_strategy: "different_provider" + default_max_attempts: 2 + on_status_codes: + - codes: [429] + strategy: "same_provider" + max_attempts: 2 + - codes: [502] + strategy: "different_provider" + max_attempts: 3 + - codes: [503] + strategy: "same_model" + max_attempts: 2 + - codes: [504] + strategy: "different_provider" + max_attempts: 2 + on_timeout: + strategy: "different_provider" + max_attempts: 2 + "#; + let policy = parse_retry_policy(yaml); + assert_eq!( + policy.default_strategy, + super::RetryStrategy::DifferentProvider + ); + assert_eq!(policy.default_max_attempts, 2); + assert_eq!(policy.on_status_codes.len(), 4); + assert_eq!( + policy.on_status_codes[2].strategy, + super::RetryStrategy::SameModel + ); + let timeout = policy.on_timeout.unwrap(); + assert_eq!(timeout.strategy, super::RetryStrategy::DifferentProvider); + assert_eq!(timeout.max_attempts, 2); + } + + #[test] + fn test_pattern5_timeout_specific_configuration() { + let yaml = r#" + fallback_models: [anthropic/claude-3-5-sonnet] + default_strategy: "different_provider" + default_max_attempts: 2 + on_status_codes: + - codes: [429] + strategy: "same_provider" + max_attempts: 2 + on_timeout: + strategy: "different_provider" + max_attempts: 3 + "#; + let policy = parse_retry_policy(yaml); + let timeout = policy.on_timeout.unwrap(); + assert_eq!(timeout.max_attempts, 3); + } + + #[test] + fn test_pattern6_no_retry_parses_as_empty() { + // Pattern 6: No retry_policy section. We test that an empty YAML + // object parses with all defaults. + let yaml = "{}"; + let policy = parse_retry_policy(yaml); + assert!(policy.fallback_models.is_empty()); + assert_eq!( + policy.default_strategy, + super::RetryStrategy::DifferentProvider + ); + assert_eq!(policy.default_max_attempts, 2); + assert!(policy.on_status_codes.is_empty()); + assert!(policy.on_timeout.is_none()); + assert!(policy.backoff.is_none()); + assert!(policy.max_retry_duration_ms.is_none()); + } + + #[test] + fn test_pattern7_backoff_only_for_same_model() { + let yaml = r#" + fallback_models: [anthropic/claude-3-5-sonnet] + on_status_codes: + - codes: [429] + strategy: "same_model" + max_attempts: 2 + backoff: + apply_to: "same_model" + base_ms: 100 + max_ms: 5000 + jitter: true + "#; + let policy = parse_retry_policy(yaml); + let backoff = policy.backoff.unwrap(); + assert_eq!(backoff.apply_to, super::BackoffApplyTo::SameModel); + assert!(backoff.jitter); + } + + #[test] + fn test_pattern8_backoff_for_same_provider() { + let yaml = r#" + fallback_models: [openai/gpt-4o-mini, anthropic/claude-3-5-sonnet] + on_status_codes: + - codes: [429] + strategy: "same_provider" + max_attempts: 2 + backoff: + apply_to: "same_provider" + base_ms: 200 + max_ms: 10000 + jitter: true + "#; + let policy = parse_retry_policy(yaml); + let backoff = policy.backoff.unwrap(); + assert_eq!(backoff.apply_to, super::BackoffApplyTo::SameProvider); + assert_eq!(backoff.base_ms, 200); + assert_eq!(backoff.max_ms, 10000); + } + + #[test] + fn test_pattern9_global_backoff() { + let yaml = r#" + fallback_models: [anthropic/claude-3-5-sonnet] + on_status_codes: + - codes: [429] + strategy: "different_provider" + max_attempts: 2 + backoff: + apply_to: "global" + base_ms: 50 + max_ms: 2000 + jitter: true + "#; + let policy = parse_retry_policy(yaml); + let backoff = policy.backoff.unwrap(); + assert_eq!(backoff.apply_to, super::BackoffApplyTo::Global); + assert_eq!(backoff.base_ms, 50); + assert_eq!(backoff.max_ms, 2000); + } + + #[test] + fn test_pattern10_deterministic_backoff_without_jitter() { + let yaml = r#" + fallback_models: [] + on_status_codes: + - codes: [429] + strategy: "same_model" + max_attempts: 3 + backoff: + apply_to: "same_model" + base_ms: 1000 + max_ms: 30000 + jitter: false + "#; + let policy = parse_retry_policy(yaml); + let backoff = policy.backoff.unwrap(); + assert!(!backoff.jitter); + assert_eq!(backoff.base_ms, 1000); + assert_eq!(backoff.max_ms, 30000); + } + + #[test] + fn test_pattern11_no_backoff_fast_failover() { + let yaml = r#" + fallback_models: [anthropic/claude-3-5-sonnet] + on_status_codes: + - codes: [429] + strategy: "different_provider" + max_attempts: 2 + "#; + let policy = parse_retry_policy(yaml); + assert!(policy.backoff.is_none()); + } + + #[test] + fn test_pattern17_mixed_integer_and_range_codes() { + let yaml = r#" + fallback_models: [anthropic/claude-3-5-sonnet] + default_strategy: "different_provider" + default_max_attempts: 2 + on_status_codes: + - codes: [429, "430-450", 526] + strategy: "same_provider" + max_attempts: 2 + - codes: ["502-504"] + strategy: "different_provider" + max_attempts: 3 + "#; + let policy = parse_retry_policy(yaml); + assert_eq!(policy.on_status_codes.len(), 2); + + // Verify first entry: 429 + range 430-450 + 526 + let first = &policy.on_status_codes[0]; + assert_eq!(first.codes.len(), 3); + let expanded: Vec = first + .codes + .iter() + .flat_map(|c| c.expand().unwrap()) + .collect(); + // 429 + (430..=450 = 21 codes) + 526 = 23 codes + assert_eq!(expanded.len(), 23); + assert!(expanded.contains(&429)); + assert!(expanded.contains(&430)); + assert!(expanded.contains(&450)); + assert!(expanded.contains(&526)); + assert!(!expanded.contains(&451)); + + // Verify second entry: range 502-504 + let second = &policy.on_status_codes[1]; + let expanded2: Vec = second + .codes + .iter() + .flat_map(|c| c.expand().unwrap()) + .collect(); + assert_eq!(expanded2, vec![502, 503, 504]); + } + + #[test] + fn test_pattern12_model_level_retry_after_blocking() { + let yaml = r#" + fallback_models: [openai/gpt-4o-mini, anthropic/claude-3-5-sonnet] + on_status_codes: + - codes: [429] + strategy: "different_provider" + max_attempts: 2 + - codes: [503] + strategy: "different_provider" + max_attempts: 2 + retry_after_handling: + scope: "model" + apply_to: "global" + "#; + let policy = parse_retry_policy(yaml); + assert_eq!(policy.fallback_models.len(), 2); + assert_eq!(policy.on_status_codes.len(), 2); + let rah = policy.retry_after_handling.unwrap(); + assert_eq!(rah.scope, super::BlockScope::Model); + assert_eq!(rah.apply_to, super::ApplyTo::Global); + // max_retry_after_seconds defaults to 300 + assert_eq!(rah.max_retry_after_seconds, 300); + } + + #[test] + fn test_pattern13_provider_level_retry_after_blocking() { + let yaml = r#" + fallback_models: [anthropic/claude-3-5-sonnet] + on_status_codes: + - codes: [429] + strategy: "different_provider" + max_attempts: 2 + - codes: [503] + strategy: "different_provider" + max_attempts: 2 + - codes: [502] + strategy: "different_provider" + max_attempts: 2 + retry_after_handling: + scope: "provider" + apply_to: "global" + "#; + let policy = parse_retry_policy(yaml); + assert_eq!(policy.on_status_codes.len(), 3); + let rah = policy.retry_after_handling.unwrap(); + assert_eq!(rah.scope, super::BlockScope::Provider); + assert_eq!(rah.apply_to, super::ApplyTo::Global); + assert_eq!(rah.max_retry_after_seconds, 300); + } + + #[test] + fn test_pattern14_request_level_retry_after() { + let yaml = r#" + fallback_models: [anthropic/claude-3-5-sonnet] + on_status_codes: + - codes: [429] + strategy: "different_provider" + max_attempts: 2 + - codes: [503] + strategy: "different_provider" + max_attempts: 2 + retry_after_handling: + scope: "model" + apply_to: "request" + "#; + let policy = parse_retry_policy(yaml); + let rah = policy.retry_after_handling.unwrap(); + assert_eq!(rah.scope, super::BlockScope::Model); + assert_eq!(rah.apply_to, super::ApplyTo::Request); + assert_eq!(rah.max_retry_after_seconds, 300); + } + + #[test] + fn test_pattern15_no_custom_retry_after_config_defaults_plus_backoff() { + let yaml = r#" + fallback_models: [] + on_status_codes: + - codes: [429] + strategy: "same_model" + max_attempts: 3 + - codes: [503] + strategy: "same_model" + max_attempts: 3 + backoff: + apply_to: "same_model" + base_ms: 1000 + max_ms: 30000 + jitter: true + "#; + let policy = parse_retry_policy(yaml); + // No retry_after_handling section → None + assert!(policy.retry_after_handling.is_none()); + // But effective config should return defaults + let effective = policy.effective_retry_after_config(); + assert_eq!(effective.scope, super::BlockScope::Model); + assert_eq!(effective.apply_to, super::ApplyTo::Global); + assert_eq!(effective.max_retry_after_seconds, 300); + // Backoff is present + let backoff = policy.backoff.unwrap(); + assert_eq!(backoff.apply_to, super::BackoffApplyTo::SameModel); + assert_eq!(backoff.base_ms, 1000); + assert_eq!(backoff.max_ms, 30000); + assert!(backoff.jitter); + } + + #[test] + fn test_pattern16_fallback_models_list_for_targeted_failover() { + let yaml = r#" + fallback_models: [openai/gpt-4o-mini, anthropic/claude-3-5-sonnet, anthropic/claude-3-opus] + default_strategy: "different_provider" + default_max_attempts: 2 + on_status_codes: + - codes: [429] + strategy: "same_provider" + max_attempts: 2 + "#; + let policy = parse_retry_policy(yaml); + assert_eq!( + policy.fallback_models, + vec![ + "openai/gpt-4o-mini", + "anthropic/claude-3-5-sonnet", + "anthropic/claude-3-opus", + ] + ); + assert_eq!( + policy.default_strategy, + super::RetryStrategy::DifferentProvider + ); + assert_eq!(policy.default_max_attempts, 2); + assert_eq!(policy.on_status_codes.len(), 1); + assert_eq!( + policy.on_status_codes[0].strategy, + super::RetryStrategy::SameProvider + ); + } + + #[test] + fn test_backoff_without_apply_to_fails_deserialization() { + // backoff.apply_to is a required field (no serde default), so YAML + // without it should fail to deserialize. + let yaml = r#" + on_status_codes: + - codes: [429] + strategy: "same_model" + max_attempts: 2 + backoff: + base_ms: 100 + max_ms: 5000 + "#; + let result: Result = serde_yaml::from_str(yaml); + assert!( + result.is_err(), + "backoff without apply_to should fail deserialization" + ); + } } diff --git a/crates/common/src/llm_providers.rs b/crates/common/src/llm_providers.rs index b4355a2f..e369f669 100644 --- a/crates/common/src/llm_providers.rs +++ b/crates/common/src/llm_providers.rs @@ -278,6 +278,7 @@ mod tests { stream: None, passthrough_auth: None, headers: None, + retry_policy: None, } }