common: add RetryPolicy configuration types

Add retry policy configuration types to support automatic retry and failover for LLM requests: - RetryPolicy: top-level config with fallback_models, default_strategy, default_max_attempts, and per-status-code overrides - BackoffConfig: exponential backoff with base_ms, max_ms, jitter, and scope (per-model, per-provider, or global) - RetryAfterConfig: Retry-After header handling with block scope and duration limits - HighLatencyConfig: latency-based blocking with threshold, measurement type, and trigger conditions - LatencyTriggerConfig: min_triggers and trigger_window for debouncing - RetryStrategy enum: same_model, same_provider, different_provider - StatusCodeEntry: flexible status code matching (single, range, list) Also add retry_policy field to GatewayConfig with Default impl. Signed-off-by: Troy Mitchell <i@troy-y.org>
2026-06-08 14:55:14 +02:00 · 2026-04-28 15:22:47 +08:00 · 2026-04-28 15:22:47 +08:00 · a58a283e20
commit a58a283e20
parent 2548aa71cb
2 changed files with 223 additions and 0 deletions
--- a/crates/common/src/configuration.rs
+++ b/crates/common/src/configuration.rs
@ -474,6 +474,225 @@ impl serde::Serialize for OrchestrationPreference {
    }
 }

+// ── Retry Policy Configuration Types ──────────────────────────────────────────
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(rename_all = "snake_case")]
+pub enum RetryStrategy {
+    SameModel,
+    SameProvider,
+    DifferentProvider,
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(rename_all = "snake_case")]
+pub enum BlockScope {
+    Model,
+    Provider,
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(rename_all = "snake_case")]
+pub enum ApplyTo {
+    Global,
+    Request,
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(rename_all = "snake_case")]
+pub enum BackoffApplyTo {
+    SameModel,
+    SameProvider,
+    Global,
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(rename_all = "snake_case")]
+pub enum LatencyMeasure {
+    Ttfb,
+    Total,
+}
+
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+#[serde(untagged)]
+pub enum StatusCodeEntry {
+    Single(u16),
+    Range(String),
+}
+
+impl StatusCodeEntry {
+    /// Expand a StatusCodeEntry into a list of individual status codes.
+    /// For Single, returns a vec with one element.
+    /// For Range (e.g. "502-504"), returns [502, 503, 504].
+    pub fn expand(&self) -> Result<Vec<u16>, String> {
+        match self {
+            StatusCodeEntry::Single(code) => Ok(vec![*code]),
+            StatusCodeEntry::Range(range_str) => {
+                let parts: Vec<&str> = range_str.split('-').collect();
+                if parts.len() != 2 {
+                    return Err(format!(
+                        "Invalid status code range format: '{}'. Expected 'start-end'.",
+                        range_str
+                    ));
+                }
+                let start: u16 = parts[0]
+                    .trim()
+                    .parse()
+                    .map_err(|_| format!("Invalid start in status code range: '{}'", parts[0]))?;
+                let end: u16 = parts[1]
+                    .trim()
+                    .parse()
+                    .map_err(|_| format!("Invalid end in status code range: '{}'", parts[1]))?;
+                if start > end {
+                    return Err(format!(
+                        "Status code range start ({}) must be <= end ({})",
+                        start, end
+                    ));
+                }
+                Ok((start..=end).collect())
+            }
+        }
+    }
+}
+
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+pub struct StatusCodeConfig {
+    pub codes: Vec<StatusCodeEntry>,
+    pub strategy: RetryStrategy,
+    pub max_attempts: u32,
+}
+
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+pub struct TimeoutRetryConfig {
+    pub strategy: RetryStrategy,
+    pub max_attempts: u32,
+}
+
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+pub struct BackoffConfig {
+    pub apply_to: BackoffApplyTo,
+    #[serde(default = "default_base_ms")]
+    pub base_ms: u64,
+    #[serde(default = "default_max_ms")]
+    pub max_ms: u64,
+    #[serde(default = "default_jitter")]
+    pub jitter: bool,
+}
+
+fn default_base_ms() -> u64 {
+    100
+}
+fn default_max_ms() -> u64 {
+    5000
+}
+fn default_jitter() -> bool {
+    true
+}
+
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+pub struct RetryAfterHandlingConfig {
+    #[serde(default = "default_retry_after_scope")]
+    pub scope: BlockScope,
+    #[serde(default = "default_retry_after_apply_to")]
+    pub apply_to: ApplyTo,
+    #[serde(default = "default_max_retry_after_seconds")]
+    pub max_retry_after_seconds: u64,
+}
+
+fn default_retry_after_scope() -> BlockScope {
+    BlockScope::Model
+}
+fn default_retry_after_apply_to() -> ApplyTo {
+    ApplyTo::Global
+}
+fn default_max_retry_after_seconds() -> u64 {
+    300
+}
+
+impl Default for RetryAfterHandlingConfig {
+    fn default() -> Self {
+        Self {
+            scope: BlockScope::Model,
+            apply_to: ApplyTo::Global,
+            max_retry_after_seconds: 300,
+        }
+    }
+}
+
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+pub struct HighLatencyConfig {
+    pub threshold_ms: u64,
+    #[serde(default = "default_latency_measure")]
+    pub measure: LatencyMeasure,
+    #[serde(default = "default_min_triggers")]
+    pub min_triggers: u32,
+    pub trigger_window_seconds: Option<u64>,
+    pub strategy: RetryStrategy,
+    pub max_attempts: u32,
+    #[serde(default = "default_block_duration")]
+    pub block_duration_seconds: u64,
+    #[serde(default = "default_block_scope")]
+    pub scope: BlockScope,
+    #[serde(default = "default_high_latency_apply_to")]
+    pub apply_to: ApplyTo,
+}
+
+fn default_latency_measure() -> LatencyMeasure {
+    LatencyMeasure::Ttfb
+}
+fn default_min_triggers() -> u32 {
+    1
+}
+fn default_block_duration() -> u64 {
+    300
+}
+fn default_block_scope() -> BlockScope {
+    BlockScope::Model
+}
+fn default_high_latency_apply_to() -> ApplyTo {
+    ApplyTo::Global
+}
+
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+pub struct RetryPolicy {
+    #[serde(default)]
+    pub fallback_models: Vec<String>,
+    #[serde(default = "default_retry_strategy")]
+    pub default_strategy: RetryStrategy,
+    #[serde(default = "default_max_attempts")]
+    pub default_max_attempts: u32,
+    #[serde(default)]
+    pub on_status_codes: Vec<StatusCodeConfig>,
+    pub on_timeout: Option<TimeoutRetryConfig>,
+    pub on_high_latency: Option<HighLatencyConfig>,
+    pub backoff: Option<BackoffConfig>,
+    pub retry_after_handling: Option<RetryAfterHandlingConfig>,
+    pub max_retry_duration_ms: Option<u64>,
+}
+
+fn default_retry_strategy() -> RetryStrategy {
+    RetryStrategy::DifferentProvider
+}
+fn default_max_attempts() -> u32 {
+    2
+}
+
+impl RetryPolicy {
+    /// Get the effective Retry-After handling config.
+    /// Always returns a config when retry_policy exists (Retry-After is always-on).
+    pub fn effective_retry_after_config(&self) -> RetryAfterHandlingConfig {
+        self.retry_after_handling.clone().unwrap_or_default()
+    }
+}
+
+/// Extract provider prefix from a model identifier.
+/// e.g., "openai/gpt-4o" -> "openai"
+pub fn extract_provider(model_id: &str) -> &str {
+    model_id.split('/').next().unwrap_or(model_id)
+}
+
+// ── End Retry Policy Configuration Types ─────────────────────────────────────
+
 #[derive(Debug, Clone, Serialize, Deserialize)]
 //TODO: use enum for model, but if there is a new model, we need to update the code
 pub struct LlmProvider {
@ -492,6 +711,8 @@ pub struct LlmProvider {
    pub internal: Option<bool>,
    pub passthrough_auth: Option<bool>,
    pub headers: Option<HashMap<String, String>>,
+    /// Retry policy configuration. When None, retry logic is disabled.
+    pub retry_policy: Option<RetryPolicy>,
 }

 pub trait IntoModels {
@ -536,6 +757,7 @@ impl Default for LlmProvider {
            internal: None,
            passthrough_auth: None,
            headers: None,
+            retry_policy: None,
        }
    }
 }
--- a/crates/common/src/llm_providers.rs
+++ b/crates/common/src/llm_providers.rs
@ -278,6 +278,7 @@ mod tests {
            stream: None,
            passthrough_auth: None,
            headers: None,
+            retry_policy: None,
        }
    }