common: add RetryPolicy configuration types

Add retry policy configuration types to support automatic retry and
failover for LLM requests:

- RetryPolicy: top-level config with fallback_models, default_strategy,
  default_max_attempts, and per-status-code overrides
- BackoffConfig: exponential backoff with base_ms, max_ms, jitter, and
  scope (per-model, per-provider, or global)
- RetryAfterConfig: Retry-After header handling with block scope and
  duration limits
- HighLatencyConfig: latency-based blocking with threshold, measurement
  type, and trigger conditions
- LatencyTriggerConfig: min_triggers and trigger_window for debouncing
- RetryStrategy enum: same_model, same_provider, different_provider
- StatusCodeEntry: flexible status code matching (single, range, list)

Also add retry_policy field to GatewayConfig with Default impl.

Signed-off-by: Troy Mitchell <i@troy-y.org>
This commit is contained in:
Troy Mitchell 2026-04-28 15:22:47 +08:00
parent 2548aa71cb
commit a58a283e20
2 changed files with 223 additions and 0 deletions

View file

@ -474,6 +474,225 @@ impl serde::Serialize for OrchestrationPreference {
}
}
// ── Retry Policy Configuration Types ──────────────────────────────────────────
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum RetryStrategy {
SameModel,
SameProvider,
DifferentProvider,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum BlockScope {
Model,
Provider,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum ApplyTo {
Global,
Request,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum BackoffApplyTo {
SameModel,
SameProvider,
Global,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum LatencyMeasure {
Ttfb,
Total,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
#[serde(untagged)]
pub enum StatusCodeEntry {
Single(u16),
Range(String),
}
impl StatusCodeEntry {
/// Expand a StatusCodeEntry into a list of individual status codes.
/// For Single, returns a vec with one element.
/// For Range (e.g. "502-504"), returns [502, 503, 504].
pub fn expand(&self) -> Result<Vec<u16>, String> {
match self {
StatusCodeEntry::Single(code) => Ok(vec![*code]),
StatusCodeEntry::Range(range_str) => {
let parts: Vec<&str> = range_str.split('-').collect();
if parts.len() != 2 {
return Err(format!(
"Invalid status code range format: '{}'. Expected 'start-end'.",
range_str
));
}
let start: u16 = parts[0]
.trim()
.parse()
.map_err(|_| format!("Invalid start in status code range: '{}'", parts[0]))?;
let end: u16 = parts[1]
.trim()
.parse()
.map_err(|_| format!("Invalid end in status code range: '{}'", parts[1]))?;
if start > end {
return Err(format!(
"Status code range start ({}) must be <= end ({})",
start, end
));
}
Ok((start..=end).collect())
}
}
}
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct StatusCodeConfig {
pub codes: Vec<StatusCodeEntry>,
pub strategy: RetryStrategy,
pub max_attempts: u32,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct TimeoutRetryConfig {
pub strategy: RetryStrategy,
pub max_attempts: u32,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct BackoffConfig {
pub apply_to: BackoffApplyTo,
#[serde(default = "default_base_ms")]
pub base_ms: u64,
#[serde(default = "default_max_ms")]
pub max_ms: u64,
#[serde(default = "default_jitter")]
pub jitter: bool,
}
fn default_base_ms() -> u64 {
100
}
fn default_max_ms() -> u64 {
5000
}
fn default_jitter() -> bool {
true
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct RetryAfterHandlingConfig {
#[serde(default = "default_retry_after_scope")]
pub scope: BlockScope,
#[serde(default = "default_retry_after_apply_to")]
pub apply_to: ApplyTo,
#[serde(default = "default_max_retry_after_seconds")]
pub max_retry_after_seconds: u64,
}
fn default_retry_after_scope() -> BlockScope {
BlockScope::Model
}
fn default_retry_after_apply_to() -> ApplyTo {
ApplyTo::Global
}
fn default_max_retry_after_seconds() -> u64 {
300
}
impl Default for RetryAfterHandlingConfig {
fn default() -> Self {
Self {
scope: BlockScope::Model,
apply_to: ApplyTo::Global,
max_retry_after_seconds: 300,
}
}
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct HighLatencyConfig {
pub threshold_ms: u64,
#[serde(default = "default_latency_measure")]
pub measure: LatencyMeasure,
#[serde(default = "default_min_triggers")]
pub min_triggers: u32,
pub trigger_window_seconds: Option<u64>,
pub strategy: RetryStrategy,
pub max_attempts: u32,
#[serde(default = "default_block_duration")]
pub block_duration_seconds: u64,
#[serde(default = "default_block_scope")]
pub scope: BlockScope,
#[serde(default = "default_high_latency_apply_to")]
pub apply_to: ApplyTo,
}
fn default_latency_measure() -> LatencyMeasure {
LatencyMeasure::Ttfb
}
fn default_min_triggers() -> u32 {
1
}
fn default_block_duration() -> u64 {
300
}
fn default_block_scope() -> BlockScope {
BlockScope::Model
}
fn default_high_latency_apply_to() -> ApplyTo {
ApplyTo::Global
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct RetryPolicy {
#[serde(default)]
pub fallback_models: Vec<String>,
#[serde(default = "default_retry_strategy")]
pub default_strategy: RetryStrategy,
#[serde(default = "default_max_attempts")]
pub default_max_attempts: u32,
#[serde(default)]
pub on_status_codes: Vec<StatusCodeConfig>,
pub on_timeout: Option<TimeoutRetryConfig>,
pub on_high_latency: Option<HighLatencyConfig>,
pub backoff: Option<BackoffConfig>,
pub retry_after_handling: Option<RetryAfterHandlingConfig>,
pub max_retry_duration_ms: Option<u64>,
}
fn default_retry_strategy() -> RetryStrategy {
RetryStrategy::DifferentProvider
}
fn default_max_attempts() -> u32 {
2
}
impl RetryPolicy {
/// Get the effective Retry-After handling config.
/// Always returns a config when retry_policy exists (Retry-After is always-on).
pub fn effective_retry_after_config(&self) -> RetryAfterHandlingConfig {
self.retry_after_handling.clone().unwrap_or_default()
}
}
/// Extract provider prefix from a model identifier.
/// e.g., "openai/gpt-4o" -> "openai"
pub fn extract_provider(model_id: &str) -> &str {
model_id.split('/').next().unwrap_or(model_id)
}
// ── End Retry Policy Configuration Types ─────────────────────────────────────
#[derive(Debug, Clone, Serialize, Deserialize)]
//TODO: use enum for model, but if there is a new model, we need to update the code
pub struct LlmProvider {
@ -492,6 +711,8 @@ pub struct LlmProvider {
pub internal: Option<bool>,
pub passthrough_auth: Option<bool>,
pub headers: Option<HashMap<String, String>>,
/// Retry policy configuration. When None, retry logic is disabled.
pub retry_policy: Option<RetryPolicy>,
}
pub trait IntoModels {
@ -536,6 +757,7 @@ impl Default for LlmProvider {
internal: None,
passthrough_auth: None,
headers: None,
retry_policy: None,
}
}
}

View file

@ -278,6 +278,7 @@ mod tests {
stream: None,
passthrough_auth: None,
headers: None,
retry_policy: None,
}
}