mirror of
https://github.com/katanemo/plano.git
synced 2026-06-08 14:55:14 +02:00
common: add RetryPolicy configuration types
Add retry policy configuration types to support automatic retry and failover for LLM requests: - RetryPolicy: top-level config with fallback_models, default_strategy, default_max_attempts, and per-status-code overrides - BackoffConfig: exponential backoff with base_ms, max_ms, jitter, and scope (per-model, per-provider, or global) - RetryAfterConfig: Retry-After header handling with block scope and duration limits - HighLatencyConfig: latency-based blocking with threshold, measurement type, and trigger conditions - LatencyTriggerConfig: min_triggers and trigger_window for debouncing - RetryStrategy enum: same_model, same_provider, different_provider - StatusCodeEntry: flexible status code matching (single, range, list) Also add retry_policy field to GatewayConfig with Default impl. Signed-off-by: Troy Mitchell <i@troy-y.org>
This commit is contained in:
parent
2548aa71cb
commit
a58a283e20
2 changed files with 223 additions and 0 deletions
|
|
@ -474,6 +474,225 @@ impl serde::Serialize for OrchestrationPreference {
|
|||
}
|
||||
}
|
||||
|
||||
// ── Retry Policy Configuration Types ──────────────────────────────────────────
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum RetryStrategy {
|
||||
SameModel,
|
||||
SameProvider,
|
||||
DifferentProvider,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum BlockScope {
|
||||
Model,
|
||||
Provider,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum ApplyTo {
|
||||
Global,
|
||||
Request,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum BackoffApplyTo {
|
||||
SameModel,
|
||||
SameProvider,
|
||||
Global,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum LatencyMeasure {
|
||||
Ttfb,
|
||||
Total,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
#[serde(untagged)]
|
||||
pub enum StatusCodeEntry {
|
||||
Single(u16),
|
||||
Range(String),
|
||||
}
|
||||
|
||||
impl StatusCodeEntry {
|
||||
/// Expand a StatusCodeEntry into a list of individual status codes.
|
||||
/// For Single, returns a vec with one element.
|
||||
/// For Range (e.g. "502-504"), returns [502, 503, 504].
|
||||
pub fn expand(&self) -> Result<Vec<u16>, String> {
|
||||
match self {
|
||||
StatusCodeEntry::Single(code) => Ok(vec![*code]),
|
||||
StatusCodeEntry::Range(range_str) => {
|
||||
let parts: Vec<&str> = range_str.split('-').collect();
|
||||
if parts.len() != 2 {
|
||||
return Err(format!(
|
||||
"Invalid status code range format: '{}'. Expected 'start-end'.",
|
||||
range_str
|
||||
));
|
||||
}
|
||||
let start: u16 = parts[0]
|
||||
.trim()
|
||||
.parse()
|
||||
.map_err(|_| format!("Invalid start in status code range: '{}'", parts[0]))?;
|
||||
let end: u16 = parts[1]
|
||||
.trim()
|
||||
.parse()
|
||||
.map_err(|_| format!("Invalid end in status code range: '{}'", parts[1]))?;
|
||||
if start > end {
|
||||
return Err(format!(
|
||||
"Status code range start ({}) must be <= end ({})",
|
||||
start, end
|
||||
));
|
||||
}
|
||||
Ok((start..=end).collect())
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct StatusCodeConfig {
|
||||
pub codes: Vec<StatusCodeEntry>,
|
||||
pub strategy: RetryStrategy,
|
||||
pub max_attempts: u32,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct TimeoutRetryConfig {
|
||||
pub strategy: RetryStrategy,
|
||||
pub max_attempts: u32,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct BackoffConfig {
|
||||
pub apply_to: BackoffApplyTo,
|
||||
#[serde(default = "default_base_ms")]
|
||||
pub base_ms: u64,
|
||||
#[serde(default = "default_max_ms")]
|
||||
pub max_ms: u64,
|
||||
#[serde(default = "default_jitter")]
|
||||
pub jitter: bool,
|
||||
}
|
||||
|
||||
fn default_base_ms() -> u64 {
|
||||
100
|
||||
}
|
||||
fn default_max_ms() -> u64 {
|
||||
5000
|
||||
}
|
||||
fn default_jitter() -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct RetryAfterHandlingConfig {
|
||||
#[serde(default = "default_retry_after_scope")]
|
||||
pub scope: BlockScope,
|
||||
#[serde(default = "default_retry_after_apply_to")]
|
||||
pub apply_to: ApplyTo,
|
||||
#[serde(default = "default_max_retry_after_seconds")]
|
||||
pub max_retry_after_seconds: u64,
|
||||
}
|
||||
|
||||
fn default_retry_after_scope() -> BlockScope {
|
||||
BlockScope::Model
|
||||
}
|
||||
fn default_retry_after_apply_to() -> ApplyTo {
|
||||
ApplyTo::Global
|
||||
}
|
||||
fn default_max_retry_after_seconds() -> u64 {
|
||||
300
|
||||
}
|
||||
|
||||
impl Default for RetryAfterHandlingConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
scope: BlockScope::Model,
|
||||
apply_to: ApplyTo::Global,
|
||||
max_retry_after_seconds: 300,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct HighLatencyConfig {
|
||||
pub threshold_ms: u64,
|
||||
#[serde(default = "default_latency_measure")]
|
||||
pub measure: LatencyMeasure,
|
||||
#[serde(default = "default_min_triggers")]
|
||||
pub min_triggers: u32,
|
||||
pub trigger_window_seconds: Option<u64>,
|
||||
pub strategy: RetryStrategy,
|
||||
pub max_attempts: u32,
|
||||
#[serde(default = "default_block_duration")]
|
||||
pub block_duration_seconds: u64,
|
||||
#[serde(default = "default_block_scope")]
|
||||
pub scope: BlockScope,
|
||||
#[serde(default = "default_high_latency_apply_to")]
|
||||
pub apply_to: ApplyTo,
|
||||
}
|
||||
|
||||
fn default_latency_measure() -> LatencyMeasure {
|
||||
LatencyMeasure::Ttfb
|
||||
}
|
||||
fn default_min_triggers() -> u32 {
|
||||
1
|
||||
}
|
||||
fn default_block_duration() -> u64 {
|
||||
300
|
||||
}
|
||||
fn default_block_scope() -> BlockScope {
|
||||
BlockScope::Model
|
||||
}
|
||||
fn default_high_latency_apply_to() -> ApplyTo {
|
||||
ApplyTo::Global
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct RetryPolicy {
|
||||
#[serde(default)]
|
||||
pub fallback_models: Vec<String>,
|
||||
#[serde(default = "default_retry_strategy")]
|
||||
pub default_strategy: RetryStrategy,
|
||||
#[serde(default = "default_max_attempts")]
|
||||
pub default_max_attempts: u32,
|
||||
#[serde(default)]
|
||||
pub on_status_codes: Vec<StatusCodeConfig>,
|
||||
pub on_timeout: Option<TimeoutRetryConfig>,
|
||||
pub on_high_latency: Option<HighLatencyConfig>,
|
||||
pub backoff: Option<BackoffConfig>,
|
||||
pub retry_after_handling: Option<RetryAfterHandlingConfig>,
|
||||
pub max_retry_duration_ms: Option<u64>,
|
||||
}
|
||||
|
||||
fn default_retry_strategy() -> RetryStrategy {
|
||||
RetryStrategy::DifferentProvider
|
||||
}
|
||||
fn default_max_attempts() -> u32 {
|
||||
2
|
||||
}
|
||||
|
||||
impl RetryPolicy {
|
||||
/// Get the effective Retry-After handling config.
|
||||
/// Always returns a config when retry_policy exists (Retry-After is always-on).
|
||||
pub fn effective_retry_after_config(&self) -> RetryAfterHandlingConfig {
|
||||
self.retry_after_handling.clone().unwrap_or_default()
|
||||
}
|
||||
}
|
||||
|
||||
/// Extract provider prefix from a model identifier.
|
||||
/// e.g., "openai/gpt-4o" -> "openai"
|
||||
pub fn extract_provider(model_id: &str) -> &str {
|
||||
model_id.split('/').next().unwrap_or(model_id)
|
||||
}
|
||||
|
||||
// ── End Retry Policy Configuration Types ─────────────────────────────────────
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
//TODO: use enum for model, but if there is a new model, we need to update the code
|
||||
pub struct LlmProvider {
|
||||
|
|
@ -492,6 +711,8 @@ pub struct LlmProvider {
|
|||
pub internal: Option<bool>,
|
||||
pub passthrough_auth: Option<bool>,
|
||||
pub headers: Option<HashMap<String, String>>,
|
||||
/// Retry policy configuration. When None, retry logic is disabled.
|
||||
pub retry_policy: Option<RetryPolicy>,
|
||||
}
|
||||
|
||||
pub trait IntoModels {
|
||||
|
|
@ -536,6 +757,7 @@ impl Default for LlmProvider {
|
|||
internal: None,
|
||||
passthrough_auth: None,
|
||||
headers: None,
|
||||
retry_policy: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -278,6 +278,7 @@ mod tests {
|
|||
stream: None,
|
||||
passthrough_auth: None,
|
||||
headers: None,
|
||||
retry_policy: None,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue