diff --git a/crates/common/src/configuration.rs b/crates/common/src/configuration.rs index 37492904..aaf2c71f 100644 --- a/crates/common/src/configuration.rs +++ b/crates/common/src/configuration.rs @@ -474,6 +474,225 @@ impl serde::Serialize for OrchestrationPreference { } } +// ── Retry Policy Configuration Types ────────────────────────────────────────── + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum RetryStrategy { + SameModel, + SameProvider, + DifferentProvider, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum BlockScope { + Model, + Provider, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum ApplyTo { + Global, + Request, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum BackoffApplyTo { + SameModel, + SameProvider, + Global, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum LatencyMeasure { + Ttfb, + Total, +} + +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +#[serde(untagged)] +pub enum StatusCodeEntry { + Single(u16), + Range(String), +} + +impl StatusCodeEntry { + /// Expand a StatusCodeEntry into a list of individual status codes. + /// For Single, returns a vec with one element. + /// For Range (e.g. "502-504"), returns [502, 503, 504]. + pub fn expand(&self) -> Result, String> { + match self { + StatusCodeEntry::Single(code) => Ok(vec![*code]), + StatusCodeEntry::Range(range_str) => { + let parts: Vec<&str> = range_str.split('-').collect(); + if parts.len() != 2 { + return Err(format!( + "Invalid status code range format: '{}'. Expected 'start-end'.", + range_str + )); + } + let start: u16 = parts[0] + .trim() + .parse() + .map_err(|_| format!("Invalid start in status code range: '{}'", parts[0]))?; + let end: u16 = parts[1] + .trim() + .parse() + .map_err(|_| format!("Invalid end in status code range: '{}'", parts[1]))?; + if start > end { + return Err(format!( + "Status code range start ({}) must be <= end ({})", + start, end + )); + } + Ok((start..=end).collect()) + } + } + } +} + +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct StatusCodeConfig { + pub codes: Vec, + pub strategy: RetryStrategy, + pub max_attempts: u32, +} + +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct TimeoutRetryConfig { + pub strategy: RetryStrategy, + pub max_attempts: u32, +} + +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct BackoffConfig { + pub apply_to: BackoffApplyTo, + #[serde(default = "default_base_ms")] + pub base_ms: u64, + #[serde(default = "default_max_ms")] + pub max_ms: u64, + #[serde(default = "default_jitter")] + pub jitter: bool, +} + +fn default_base_ms() -> u64 { + 100 +} +fn default_max_ms() -> u64 { + 5000 +} +fn default_jitter() -> bool { + true +} + +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct RetryAfterHandlingConfig { + #[serde(default = "default_retry_after_scope")] + pub scope: BlockScope, + #[serde(default = "default_retry_after_apply_to")] + pub apply_to: ApplyTo, + #[serde(default = "default_max_retry_after_seconds")] + pub max_retry_after_seconds: u64, +} + +fn default_retry_after_scope() -> BlockScope { + BlockScope::Model +} +fn default_retry_after_apply_to() -> ApplyTo { + ApplyTo::Global +} +fn default_max_retry_after_seconds() -> u64 { + 300 +} + +impl Default for RetryAfterHandlingConfig { + fn default() -> Self { + Self { + scope: BlockScope::Model, + apply_to: ApplyTo::Global, + max_retry_after_seconds: 300, + } + } +} + +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct HighLatencyConfig { + pub threshold_ms: u64, + #[serde(default = "default_latency_measure")] + pub measure: LatencyMeasure, + #[serde(default = "default_min_triggers")] + pub min_triggers: u32, + pub trigger_window_seconds: Option, + pub strategy: RetryStrategy, + pub max_attempts: u32, + #[serde(default = "default_block_duration")] + pub block_duration_seconds: u64, + #[serde(default = "default_block_scope")] + pub scope: BlockScope, + #[serde(default = "default_high_latency_apply_to")] + pub apply_to: ApplyTo, +} + +fn default_latency_measure() -> LatencyMeasure { + LatencyMeasure::Ttfb +} +fn default_min_triggers() -> u32 { + 1 +} +fn default_block_duration() -> u64 { + 300 +} +fn default_block_scope() -> BlockScope { + BlockScope::Model +} +fn default_high_latency_apply_to() -> ApplyTo { + ApplyTo::Global +} + +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct RetryPolicy { + #[serde(default)] + pub fallback_models: Vec, + #[serde(default = "default_retry_strategy")] + pub default_strategy: RetryStrategy, + #[serde(default = "default_max_attempts")] + pub default_max_attempts: u32, + #[serde(default)] + pub on_status_codes: Vec, + pub on_timeout: Option, + pub on_high_latency: Option, + pub backoff: Option, + pub retry_after_handling: Option, + pub max_retry_duration_ms: Option, +} + +fn default_retry_strategy() -> RetryStrategy { + RetryStrategy::DifferentProvider +} +fn default_max_attempts() -> u32 { + 2 +} + +impl RetryPolicy { + /// Get the effective Retry-After handling config. + /// Always returns a config when retry_policy exists (Retry-After is always-on). + pub fn effective_retry_after_config(&self) -> RetryAfterHandlingConfig { + self.retry_after_handling.clone().unwrap_or_default() + } +} + +/// Extract provider prefix from a model identifier. +/// e.g., "openai/gpt-4o" -> "openai" +pub fn extract_provider(model_id: &str) -> &str { + model_id.split('/').next().unwrap_or(model_id) +} + +// ── End Retry Policy Configuration Types ───────────────────────────────────── + #[derive(Debug, Clone, Serialize, Deserialize)] //TODO: use enum for model, but if there is a new model, we need to update the code pub struct LlmProvider { @@ -492,6 +711,8 @@ pub struct LlmProvider { pub internal: Option, pub passthrough_auth: Option, pub headers: Option>, + /// Retry policy configuration. When None, retry logic is disabled. + pub retry_policy: Option, } pub trait IntoModels { @@ -536,6 +757,7 @@ impl Default for LlmProvider { internal: None, passthrough_auth: None, headers: None, + retry_policy: None, } } } diff --git a/crates/common/src/llm_providers.rs b/crates/common/src/llm_providers.rs index b4355a2f..e369f669 100644 --- a/crates/common/src/llm_providers.rs +++ b/crates/common/src/llm_providers.rs @@ -278,6 +278,7 @@ mod tests { stream: None, passthrough_auth: None, headers: None, + retry_policy: None, } }