diff --git a/config/plano_config_schema.yaml b/config/plano_config_schema.yaml
index 9560b437..7de77db2 100644
--- a/config/plano_config_schema.yaml
+++ b/config/plano_config_schema.yaml
@@ -213,6 +213,183 @@ properties:
           required:
             - name
             - description
+        retry_policy:
+          type: object
+          description: "Retry policy configuration. When not specified, no retry logic is enabled."
+          properties:
+            fallback_models:
+              type: array
+              description: "Ordered list of model identifiers to fallback to before using Provider_List."
+              items:
+                type: string
+            default_strategy:
+              type: string
+              description: "Default retry strategy for unconfigured status codes. Default: different_provider."
+              enum:
+                - same_model
+                - same_provider
+                - different_provider
+            default_max_attempts:
+              type: integer
+              description: "Default max retry attempts for unconfigured status codes. Default: 2."
+              minimum: 0
+            on_status_codes:
+              type: array
+              description: "Per-status-code retry configuration."
+              items:
+                type: object
+                properties:
+                  codes:
+                    type: array
+                    description: "List of status codes as integers or range strings (e.g. '502-504')."
+                    items:
+                      anyOf:
+                        - type: integer
+                          minimum: 100
+                          maximum: 599
+                        - type: string
+                          description: "Range string in 'start-end' format (e.g. '502-504')."
+                  strategy:
+                    type: string
+                    description: "Retry strategy for these status codes."
+                    enum:
+                      - same_model
+                      - same_provider
+                      - different_provider
+                  max_attempts:
+                    type: integer
+                    description: "Max retry attempts for these status codes."
+                    minimum: 0
+                additionalProperties: false
+                required:
+                  - codes
+                  - strategy
+                  - max_attempts
+            on_timeout:
+              type: object
+              description: "Timeout-specific retry configuration. When omitted, timeouts use default_strategy and default_max_attempts."
+              properties:
+                strategy:
+                  type: string
+                  description: "Retry strategy for timeout errors."
+                  enum:
+                    - same_model
+                    - same_provider
+                    - different_provider
+                max_attempts:
+                  type: integer
+                  description: "Max retry attempts for timeout errors."
+                  minimum: 1
+              additionalProperties: false
+              required:
+                - strategy
+                - max_attempts
+            on_high_latency:
+              type: object
+              description: "High latency proactive failover configuration. When omitted, no latency-based failover is performed."
+              properties:
+                threshold_ms:
+                  type: integer
+                  description: "Latency threshold in milliseconds. When response time exceeds this value, a High_Latency_Event is triggered."
+                  minimum: 1
+                measure:
+                  type: string
+                  description: "What latency metric to measure. Default: ttfb."
+                  enum:
+                    - ttfb
+                    - total
+                strategy:
+                  type: string
+                  description: "Retry strategy when latency threshold is exceeded."
+                  enum:
+                    - same_model
+                    - same_provider
+                    - different_provider
+                max_attempts:
+                  type: integer
+                  description: "Max retry attempts when latency threshold is exceeded."
+                  minimum: 1
+                block_duration_seconds:
+                  type: integer
+                  description: "How long to block the model/provider after detecting high latency, in seconds. Default: 300."
+                  minimum: 1
+                scope:
+                  type: string
+                  description: "What to block: model-level or provider-level. Default: model."
+                  enum:
+                    - model
+                    - provider
+                apply_to:
+                  type: string
+                  description: "Blocking scope: global or request-scoped. Default: global."
+                  enum:
+                    - global
+                    - request
+                min_triggers:
+                  type: integer
+                  description: "Number of High_Latency_Events required before creating a block. Default: 1."
+                  minimum: 1
+                trigger_window_seconds:
+                  type: integer
+                  description: "Sliding time window in seconds for counting triggers. Required when min_triggers > 1."
+                  minimum: 1
+              additionalProperties: false
+              required:
+                - threshold_ms
+                - strategy
+                - max_attempts
+                - block_duration_seconds
+            backoff:
+              type: object
+              description: "Exponential backoff configuration. When omitted, no backoff delays are applied."
+              properties:
+                apply_to:
+                  type: string
+                  description: "REQUIRED. Determines when backoff delays are applied."
+                  enum:
+                    - same_model
+                    - same_provider
+                    - global
+                base_ms:
+                  type: integer
+                  description: "Base delay in milliseconds for exponential backoff. Default: 100."
+                  minimum: 1
+                max_ms:
+                  type: integer
+                  description: "Maximum delay in milliseconds for exponential backoff. Default: 5000."
+                  minimum: 1
+                jitter:
+                  type: boolean
+                  description: "Add random jitter to prevent thundering herd. Default: true."
+              additionalProperties: false
+              required:
+                - apply_to
+            retry_after_handling:
+              type: object
+              description: "Retry-After header handling customization. When omitted, Retry-After is honored with defaults (scope: model, apply_to: global, max_retry_after_seconds: 300)."
+              properties:
+                scope:
+                  type: string
+                  description: "What to block: model-level or provider-level. Default: model."
+                  enum:
+                    - model
+                    - provider
+                apply_to:
+                  type: string
+                  description: "Blocking scope: request-scoped or global. Default: global."
+                  enum:
+                    - request
+                    - global
+                max_retry_after_seconds:
+                  type: integer
+                  description: "Maximum Retry-After value honored in seconds. Default: 300."
+                  minimum: 1
+              additionalProperties: false
+            max_retry_duration_ms:
+              type: integer
+              description: "Maximum total time in milliseconds for all retry attempts combined. Timer starts on first retry."
+              minimum: 0
+          additionalProperties: false
       additionalProperties: false
       required:
         - model
@@ -271,6 +448,183 @@ properties:
           required:
             - name
             - description
+        retry_policy:
+          type: object
+          description: "Retry policy configuration. When not specified, no retry logic is enabled."
+          properties:
+            fallback_models:
+              type: array
+              description: "Ordered list of model identifiers to fallback to before using Provider_List."
+              items:
+                type: string
+            default_strategy:
+              type: string
+              description: "Default retry strategy for unconfigured status codes. Default: different_provider."
+              enum:
+                - same_model
+                - same_provider
+                - different_provider
+            default_max_attempts:
+              type: integer
+              description: "Default max retry attempts for unconfigured status codes. Default: 2."
+              minimum: 0
+            on_status_codes:
+              type: array
+              description: "Per-status-code retry configuration."
+              items:
+                type: object
+                properties:
+                  codes:
+                    type: array
+                    description: "List of status codes as integers or range strings (e.g. '502-504')."
+                    items:
+                      anyOf:
+                        - type: integer
+                          minimum: 100
+                          maximum: 599
+                        - type: string
+                          description: "Range string in 'start-end' format (e.g. '502-504')."
+                  strategy:
+                    type: string
+                    description: "Retry strategy for these status codes."
+                    enum:
+                      - same_model
+                      - same_provider
+                      - different_provider
+                  max_attempts:
+                    type: integer
+                    description: "Max retry attempts for these status codes."
+                    minimum: 0
+                additionalProperties: false
+                required:
+                  - codes
+                  - strategy
+                  - max_attempts
+            on_timeout:
+              type: object
+              description: "Timeout-specific retry configuration. When omitted, timeouts use default_strategy and default_max_attempts."
+              properties:
+                strategy:
+                  type: string
+                  description: "Retry strategy for timeout errors."
+                  enum:
+                    - same_model
+                    - same_provider
+                    - different_provider
+                max_attempts:
+                  type: integer
+                  description: "Max retry attempts for timeout errors."
+                  minimum: 1
+              additionalProperties: false
+              required:
+                - strategy
+                - max_attempts
+            on_high_latency:
+              type: object
+              description: "High latency proactive failover configuration. When omitted, no latency-based failover is performed."
+              properties:
+                threshold_ms:
+                  type: integer
+                  description: "Latency threshold in milliseconds. When response time exceeds this value, a High_Latency_Event is triggered."
+                  minimum: 1
+                measure:
+                  type: string
+                  description: "What latency metric to measure. Default: ttfb."
+                  enum:
+                    - ttfb
+                    - total
+                strategy:
+                  type: string
+                  description: "Retry strategy when latency threshold is exceeded."
+                  enum:
+                    - same_model
+                    - same_provider
+                    - different_provider
+                max_attempts:
+                  type: integer
+                  description: "Max retry attempts when latency threshold is exceeded."
+                  minimum: 1
+                block_duration_seconds:
+                  type: integer
+                  description: "How long to block the model/provider after detecting high latency, in seconds. Default: 300."
+                  minimum: 1
+                scope:
+                  type: string
+                  description: "What to block: model-level or provider-level. Default: model."
+                  enum:
+                    - model
+                    - provider
+                apply_to:
+                  type: string
+                  description: "Blocking scope: global or request-scoped. Default: global."
+                  enum:
+                    - global
+                    - request
+                min_triggers:
+                  type: integer
+                  description: "Number of High_Latency_Events required before creating a block. Default: 1."
+                  minimum: 1
+                trigger_window_seconds:
+                  type: integer
+                  description: "Sliding time window in seconds for counting triggers. Required when min_triggers > 1."
+                  minimum: 1
+              additionalProperties: false
+              required:
+                - threshold_ms
+                - strategy
+                - max_attempts
+                - block_duration_seconds
+            backoff:
+              type: object
+              description: "Exponential backoff configuration. When omitted, no backoff delays are applied."
+              properties:
+                apply_to:
+                  type: string
+                  description: "REQUIRED. Determines when backoff delays are applied."
+                  enum:
+                    - same_model
+                    - same_provider
+                    - global
+                base_ms:
+                  type: integer
+                  description: "Base delay in milliseconds for exponential backoff. Default: 100."
+                  minimum: 1
+                max_ms:
+                  type: integer
+                  description: "Maximum delay in milliseconds for exponential backoff. Default: 5000."
+                  minimum: 1
+                jitter:
+                  type: boolean
+                  description: "Add random jitter to prevent thundering herd. Default: true."
+              additionalProperties: false
+              required:
+                - apply_to
+            retry_after_handling:
+              type: object
+              description: "Retry-After header handling customization. When omitted, Retry-After is honored with defaults (scope: model, apply_to: global, max_retry_after_seconds: 300)."
+              properties:
+                scope:
+                  type: string
+                  description: "What to block: model-level or provider-level. Default: model."
+                  enum:
+                    - model
+                    - provider
+                apply_to:
+                  type: string
+                  description: "Blocking scope: request-scoped or global. Default: global."
+                  enum:
+                    - request
+                    - global
+                max_retry_after_seconds:
+                  type: integer
+                  description: "Maximum Retry-After value honored in seconds. Default: 300."
+                  minimum: 1
+              additionalProperties: false
+            max_retry_duration_ms:
+              type: integer
+              description: "Maximum total time in milliseconds for all retry attempts combined. Timer starts on first retry."
+              minimum: 0
+          additionalProperties: false
       additionalProperties: false
       required:
         - model
diff --git a/crates/Cargo.lock b/crates/Cargo.lock
index c5819de9..27511878 100644
--- a/crates/Cargo.lock
+++ b/crates/Cargo.lock
@@ -293,7 +293,16 @@ version = "0.5.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1"
 dependencies = [
- "bit-vec",
+ "bit-vec 0.6.3",
+]
+
+[[package]]
+name = "bit-set"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3"
+dependencies = [
+ "bit-vec 0.8.0",
 ]
 
 [[package]]
@@ -302,6 +311,12 @@ version = "0.6.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb"
 
+[[package]]
+name = "bit-vec"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7"
+
 [[package]]
 name = "bitflags"
 version = "2.11.0"
@@ -528,6 +543,7 @@ dependencies = [
  "hyper 1.9.0",
  "log",
  "pretty_assertions",
+ "proptest",
  "proxy-wasm",
  "rand 0.8.5",
  "serde",
@@ -928,7 +944,7 @@ version = "0.12.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7493d4c459da9f84325ad297371a6b2b8a162800873a22e3b6b6512e61d18c05"
 dependencies = [
- "bit-set",
+ "bit-set 0.5.3",
  "regex",
 ]
 
@@ -2527,6 +2543,25 @@ dependencies = [
  "thiserror 1.0.69",
 ]
 
+[[package]]
+name = "proptest"
+version = "1.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4b45fcc2344c680f5025fe57779faef368840d0bd1f42f216291f0dc4ace4744"
+dependencies = [
+ "bit-set 0.8.0",
+ "bit-vec 0.8.0",
+ "bitflags",
+ "num-traits",
+ "rand 0.9.4",
+ "rand_chacha 0.9.0",
+ "rand_xorshift",
+ "regex-syntax",
+ "rusty-fork",
+ "tempfile",
+ "unarray",
+]
+
 [[package]]
 name = "prost"
 version = "0.14.3"
@@ -2575,6 +2610,12 @@ dependencies = [
  "winapi",
 ]
 
+[[package]]
+name = "quick-error"
+version = "1.2.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0"
+
 [[package]]
 name = "quinn"
 version = "0.11.9"
@@ -2727,6 +2768,15 @@ version = "0.10.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "63b8176103e19a2643978565ca18b50549f6101881c443590420e4dc998a3c69"
 
+[[package]]
+name = "rand_xorshift"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "513962919efc330f829edb2535844d1b912b0fbe2ca165d613e4e8788bb05a5a"
+dependencies = [
+ "rand_core 0.9.5",
+]
+
 [[package]]
 name = "raw-cpuid"
 version = "11.6.0"
@@ -3056,6 +3106,18 @@ version = "1.0.22"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d"
 
+[[package]]
+name = "rusty-fork"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cc6bf79ff24e648f6da1f8d1f011e9cac26491b619e6b9280f2b47f1774e6ee2"
+dependencies = [
+ "fnv",
+ "quick-error",
+ "tempfile",
+ "wait-timeout",
+]
+
 [[package]]
 name = "ryu"
 version = "1.0.23"
@@ -3984,6 +4046,12 @@ version = "1.19.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb"
 
+[[package]]
+name = "unarray"
+version = "0.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "eaea85b334db583fe3274d12b4cd1880032beab409c0d774be044d4480ab9a94"
+
 [[package]]
 name = "unicase"
 version = "2.9.0"
@@ -4133,6 +4201,15 @@ version = "0.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "5c3082ca00d5a5ef149bb8b555a72ae84c9c59f7250f013ac822ac2e49b19c64"
 
+[[package]]
+name = "wait-timeout"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "09ac3b126d3914f9849036f826e054cbabdc8519970b8998ddaf3b5bd3c65f11"
+dependencies = [
+ "libc",
+]
+
 [[package]]
 name = "want"
 version = "0.3.1"
diff --git a/crates/common/Cargo.toml b/crates/common/Cargo.toml
index dd2cba15..0a5e5651 100644
--- a/crates/common/Cargo.toml
+++ b/crates/common/Cargo.toml
@@ -36,3 +36,4 @@ tokio = { version = "1.44", features = ["sync", "time", "macros", "rt"] }
 hyper = { version = "1.0", features = ["full"] }
 bytes = "1.0"
 http-body-util = "0.1"
+proptest = "1.4"
diff --git a/crates/common/proptest-regressions/configuration.txt b/crates/common/proptest-regressions/configuration.txt
new file mode 100644
index 00000000..1382b74e
--- /dev/null
+++ b/crates/common/proptest-regressions/configuration.txt
@@ -0,0 +1,7 @@
+# Seeds for failure cases proptest has generated in the past. It is
+# automatically read and these particular cases re-run before any
+# novel cases are generated.
+#
+# It is recommended to check this file in to source control so that
+# everyone who runs the test benefits from these saved cases.
+cc e6443c9611ecf84b57514e7d12084d62e6558989f663f1106d3cedd746a20bf3 # shrinks to include_on_status_codes = false, include_backoff = true, include_retry_after = false, include_on_timeout = false, include_on_high_latency = false
diff --git a/crates/common/src/configuration.rs b/crates/common/src/configuration.rs
index 37492904..2fed0290 100644
--- a/crates/common/src/configuration.rs
+++ b/crates/common/src/configuration.rs
@@ -474,6 +474,225 @@ impl serde::Serialize for OrchestrationPreference {
     }
 }
 
+// ── Retry Policy Configuration Types ──────────────────────────────────────────
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(rename_all = "snake_case")]
+pub enum RetryStrategy {
+    SameModel,
+    SameProvider,
+    DifferentProvider,
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(rename_all = "snake_case")]
+pub enum BlockScope {
+    Model,
+    Provider,
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(rename_all = "snake_case")]
+pub enum ApplyTo {
+    Global,
+    Request,
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(rename_all = "snake_case")]
+pub enum BackoffApplyTo {
+    SameModel,
+    SameProvider,
+    Global,
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(rename_all = "snake_case")]
+pub enum LatencyMeasure {
+    Ttfb,
+    Total,
+}
+
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+#[serde(untagged)]
+pub enum StatusCodeEntry {
+    Single(u16),
+    Range(String),
+}
+
+impl StatusCodeEntry {
+    /// Expand a StatusCodeEntry into a list of individual status codes.
+    /// For Single, returns a vec with one element.
+    /// For Range (e.g. "502-504"), returns [502, 503, 504].
+    pub fn expand(&self) -> Result<Vec<u16>, String> {
+        match self {
+            StatusCodeEntry::Single(code) => Ok(vec![*code]),
+            StatusCodeEntry::Range(range_str) => {
+                let parts: Vec<&str> = range_str.split('-').collect();
+                if parts.len() != 2 {
+                    return Err(format!(
+                        "Invalid status code range format: '{}'. Expected 'start-end'.",
+                        range_str
+                    ));
+                }
+                let start: u16 = parts[0]
+                    .trim()
+                    .parse()
+                    .map_err(|_| format!("Invalid start in status code range: '{}'", parts[0]))?;
+                let end: u16 = parts[1]
+                    .trim()
+                    .parse()
+                    .map_err(|_| format!("Invalid end in status code range: '{}'", parts[1]))?;
+                if start > end {
+                    return Err(format!(
+                        "Status code range start ({}) must be <= end ({})",
+                        start, end
+                    ));
+                }
+                Ok((start..=end).collect())
+            }
+        }
+    }
+}
+
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+pub struct StatusCodeConfig {
+    pub codes: Vec<StatusCodeEntry>,
+    pub strategy: RetryStrategy,
+    pub max_attempts: u32,
+}
+
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+pub struct TimeoutRetryConfig {
+    pub strategy: RetryStrategy,
+    pub max_attempts: u32,
+}
+
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+pub struct BackoffConfig {
+    pub apply_to: BackoffApplyTo,
+    #[serde(default = "default_base_ms")]
+    pub base_ms: u64,
+    #[serde(default = "default_max_ms")]
+    pub max_ms: u64,
+    #[serde(default = "default_jitter")]
+    pub jitter: bool,
+}
+
+fn default_base_ms() -> u64 {
+    100
+}
+fn default_max_ms() -> u64 {
+    5000
+}
+fn default_jitter() -> bool {
+    true
+}
+
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+pub struct RetryAfterHandlingConfig {
+    #[serde(default = "default_retry_after_scope")]
+    pub scope: BlockScope,
+    #[serde(default = "default_retry_after_apply_to")]
+    pub apply_to: ApplyTo,
+    #[serde(default = "default_max_retry_after_seconds")]
+    pub max_retry_after_seconds: u64,
+}
+
+fn default_retry_after_scope() -> BlockScope {
+    BlockScope::Model
+}
+fn default_retry_after_apply_to() -> ApplyTo {
+    ApplyTo::Global
+}
+fn default_max_retry_after_seconds() -> u64 {
+    300
+}
+
+impl Default for RetryAfterHandlingConfig {
+    fn default() -> Self {
+        Self {
+            scope: BlockScope::Model,
+            apply_to: ApplyTo::Global,
+            max_retry_after_seconds: 300,
+        }
+    }
+}
+
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+pub struct HighLatencyConfig {
+    pub threshold_ms: u64,
+    #[serde(default = "default_latency_measure")]
+    pub measure: LatencyMeasure,
+    #[serde(default = "default_min_triggers")]
+    pub min_triggers: u32,
+    pub trigger_window_seconds: Option<u64>,
+    pub strategy: RetryStrategy,
+    pub max_attempts: u32,
+    #[serde(default = "default_block_duration")]
+    pub block_duration_seconds: u64,
+    #[serde(default = "default_block_scope")]
+    pub scope: BlockScope,
+    #[serde(default = "default_high_latency_apply_to")]
+    pub apply_to: ApplyTo,
+}
+
+fn default_latency_measure() -> LatencyMeasure {
+    LatencyMeasure::Ttfb
+}
+fn default_min_triggers() -> u32 {
+    1
+}
+fn default_block_duration() -> u64 {
+    300
+}
+fn default_block_scope() -> BlockScope {
+    BlockScope::Model
+}
+fn default_high_latency_apply_to() -> ApplyTo {
+    ApplyTo::Global
+}
+
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+pub struct RetryPolicy {
+    #[serde(default)]
+    pub fallback_models: Vec<String>,
+    #[serde(default = "default_retry_strategy")]
+    pub default_strategy: RetryStrategy,
+    #[serde(default = "default_max_attempts")]
+    pub default_max_attempts: u32,
+    #[serde(default)]
+    pub on_status_codes: Vec<StatusCodeConfig>,
+    pub on_timeout: Option<TimeoutRetryConfig>,
+    pub on_high_latency: Option<HighLatencyConfig>,
+    pub backoff: Option<BackoffConfig>,
+    pub retry_after_handling: Option<RetryAfterHandlingConfig>,
+    pub max_retry_duration_ms: Option<u64>,
+}
+
+fn default_retry_strategy() -> RetryStrategy {
+    RetryStrategy::DifferentProvider
+}
+fn default_max_attempts() -> u32 {
+    2
+}
+
+impl RetryPolicy {
+    /// Get the effective Retry-After handling config.
+    /// Always returns a config when retry_policy exists (Retry-After is always-on).
+    pub fn effective_retry_after_config(&self) -> RetryAfterHandlingConfig {
+        self.retry_after_handling.clone().unwrap_or_default()
+    }
+}
+
+/// Extract provider prefix from a model identifier.
+/// e.g., "openai/gpt-4o" -> "openai"
+pub fn extract_provider(model_id: &str) -> &str {
+    model_id.split('/').next().unwrap_or(model_id)
+}
+
+// ── End Retry Policy Configuration Types ─────────────────────────────────────
+
 #[derive(Debug, Clone, Serialize, Deserialize)]
 //TODO: use enum for model, but if there is a new model, we need to update the code
 pub struct LlmProvider {
@@ -492,6 +711,8 @@ pub struct LlmProvider {
     pub internal: Option<bool>,
     pub passthrough_auth: Option<bool>,
     pub headers: Option<HashMap<String, String>>,
+    /// Retry policy configuration. When None, retry logic is disabled.
+    pub retry_policy: Option<RetryPolicy>,
 }
 
 pub trait IntoModels {
@@ -536,6 +757,7 @@ impl Default for LlmProvider {
             internal: None,
             passthrough_auth: None,
             headers: None,
+            retry_policy: None,
         }
     }
 }
@@ -654,6 +876,307 @@ mod test {
     use super::{IntoModels, LlmProvider, LlmProviderType};
     use crate::api::open_ai::ToolType;
 
+    use proptest::prelude::*;
+
+    // ── Proptest Strategies for Retry Config Types ─────────────────────────
+
+    fn arb_retry_strategy() -> impl Strategy<Value = super::RetryStrategy> {
+        prop_oneof![
+            Just(super::RetryStrategy::SameModel),
+            Just(super::RetryStrategy::SameProvider),
+            Just(super::RetryStrategy::DifferentProvider),
+        ]
+    }
+
+    fn arb_block_scope() -> impl Strategy<Value = super::BlockScope> {
+        prop_oneof![
+            Just(super::BlockScope::Model),
+            Just(super::BlockScope::Provider),
+        ]
+    }
+
+    fn arb_apply_to() -> impl Strategy<Value = super::ApplyTo> {
+        prop_oneof![Just(super::ApplyTo::Global), Just(super::ApplyTo::Request),]
+    }
+
+    fn arb_backoff_apply_to() -> impl Strategy<Value = super::BackoffApplyTo> {
+        prop_oneof![
+            Just(super::BackoffApplyTo::SameModel),
+            Just(super::BackoffApplyTo::SameProvider),
+            Just(super::BackoffApplyTo::Global),
+        ]
+    }
+
+    fn arb_latency_measure() -> impl Strategy<Value = super::LatencyMeasure> {
+        prop_oneof![
+            Just(super::LatencyMeasure::Ttfb),
+            Just(super::LatencyMeasure::Total),
+        ]
+    }
+
+    fn arb_status_code_entry() -> impl Strategy<Value = super::StatusCodeEntry> {
+        prop_oneof![
+            (100u16..=599u16).prop_map(super::StatusCodeEntry::Single),
+            (100u16..=599u16)
+                .prop_flat_map(|start| (Just(start), start..=599u16))
+                .prop_map(|(start, end)| super::StatusCodeEntry::Range(format!(
+                    "{}-{}",
+                    start, end
+                ))),
+        ]
+    }
+
+    fn arb_status_code_config() -> impl Strategy<Value = super::StatusCodeConfig> {
+        (
+            prop::collection::vec(arb_status_code_entry(), 1..=3),
+            arb_retry_strategy(),
+            1u32..=10u32,
+        )
+            .prop_map(|(codes, strategy, max_attempts)| super::StatusCodeConfig {
+                codes,
+                strategy,
+                max_attempts,
+            })
+    }
+
+    fn arb_timeout_retry_config() -> impl Strategy<Value = super::TimeoutRetryConfig> {
+        (arb_retry_strategy(), 1u32..=10u32).prop_map(|(strategy, max_attempts)| {
+            super::TimeoutRetryConfig {
+                strategy,
+                max_attempts,
+            }
+        })
+    }
+
+    fn arb_backoff_config() -> impl Strategy<Value = super::BackoffConfig> {
+        (arb_backoff_apply_to(), 1u64..=1000u64, prop::bool::ANY)
+            .prop_flat_map(|(apply_to, base_ms, jitter)| {
+                let max_ms_min = base_ms + 1;
+                (
+                    Just(apply_to),
+                    Just(base_ms),
+                    max_ms_min..=(base_ms + 50000),
+                    Just(jitter),
+                )
+            })
+            .prop_map(|(apply_to, base_ms, max_ms, jitter)| super::BackoffConfig {
+                apply_to,
+                base_ms,
+                max_ms,
+                jitter,
+            })
+    }
+
+    fn arb_retry_after_handling_config() -> impl Strategy<Value = super::RetryAfterHandlingConfig> {
+        (arb_block_scope(), arb_apply_to(), 1u64..=3600u64).prop_map(
+            |(scope, apply_to, max_retry_after_seconds)| super::RetryAfterHandlingConfig {
+                scope,
+                apply_to,
+                max_retry_after_seconds,
+            },
+        )
+    }
+
+    fn arb_high_latency_config() -> impl Strategy<Value = super::HighLatencyConfig> {
+        (
+            1u64..=60000u64,
+            arb_latency_measure(),
+            1u32..=10u32,
+            arb_retry_strategy(),
+            1u32..=10u32,
+            1u64..=3600u64,
+            arb_block_scope(),
+            arb_apply_to(),
+        )
+            .prop_map(
+                |(
+                    threshold_ms,
+                    measure,
+                    min_triggers,
+                    strategy,
+                    max_attempts,
+                    block_duration_seconds,
+                    scope,
+                    apply_to,
+                )| {
+                    let trigger_window_seconds = if min_triggers > 1 { Some(60u64) } else { None };
+                    super::HighLatencyConfig {
+                        threshold_ms,
+                        measure,
+                        min_triggers,
+                        trigger_window_seconds,
+                        strategy,
+                        max_attempts,
+                        block_duration_seconds,
+                        scope,
+                        apply_to,
+                    }
+                },
+            )
+    }
+
+    fn arb_retry_policy() -> impl Strategy<Value = super::RetryPolicy> {
+        (
+            prop::collection::vec("[a-z]{2,6}/[a-z0-9-]{3,10}", 0..=3),
+            arb_retry_strategy(),
+            1u32..=10u32,
+            prop::collection::vec(arb_status_code_config(), 0..=3),
+            prop::option::of(arb_timeout_retry_config()),
+            prop::option::of(arb_high_latency_config()),
+            prop::option::of(arb_backoff_config()),
+            prop::option::of(arb_retry_after_handling_config()),
+            prop::option::of(1u64..=120000u64),
+        )
+            .prop_map(
+                |(
+                    fallback_models,
+                    default_strategy,
+                    default_max_attempts,
+                    on_status_codes,
+                    on_timeout,
+                    on_high_latency,
+                    backoff,
+                    retry_after_handling,
+                    max_retry_duration_ms,
+                )| {
+                    super::RetryPolicy {
+                        fallback_models,
+                        default_strategy,
+                        default_max_attempts,
+                        on_status_codes,
+                        on_timeout,
+                        on_high_latency,
+                        backoff,
+                        retry_after_handling,
+                        max_retry_duration_ms,
+                    }
+                },
+            )
+    }
+
+    // ── Property Tests ─────────────────────────────────────────────────────
+
+    // Feature: retry-on-ratelimit, Property 1: Configuration Round-Trip Parsing
+    // **Validates: Requirements 1.2**
+    proptest! {
+        #![proptest_config(proptest::prelude::ProptestConfig::with_cases(100))]
+
+        /// Property 1: Configuration Round-Trip Parsing
+        /// Generate arbitrary valid RetryPolicy structs, serialize to YAML,
+        /// re-parse, and assert equivalence.
+        #[test]
+        fn prop_retry_policy_round_trip(policy in arb_retry_policy()) {
+            let yaml = serde_yaml::to_string(&policy)
+                .expect("serialization should succeed");
+            let parsed: super::RetryPolicy = serde_yaml::from_str(&yaml)
+                .expect("deserialization should succeed");
+
+            // Direct structural equality — all types derive PartialEq
+            prop_assert_eq!(&policy, &parsed);
+        }
+
+    }
+
+    // Feature: retry-on-ratelimit, Property 2: Configuration Defaults Applied Correctly
+    // **Validates: Requirements 1.2**
+    proptest! {
+        #![proptest_config(proptest::prelude::ProptestConfig::with_cases(100))]
+
+        /// Property 2: Configuration Defaults Applied Correctly
+        /// Generate RetryPolicy YAML with optional fields omitted, parse,
+        /// and assert correct defaults are applied.
+        #[test]
+        fn prop_retry_policy_defaults(
+            include_on_status_codes in prop::bool::ANY,
+            include_backoff in prop::bool::ANY,
+            include_retry_after in prop::bool::ANY,
+            include_on_timeout in prop::bool::ANY,
+            include_on_high_latency in prop::bool::ANY,
+        ) {
+            // Build a minimal YAML — RetryPolicy has serde defaults for all fields,
+            // so even an empty mapping is valid.
+            let mut parts: Vec<String> = Vec::new();
+
+            // When we include sections, only provide required sub-fields so
+            // we can verify the optional sub-fields get their defaults.
+            if include_on_status_codes {
+                parts.push("on_status_codes:\n  - codes: [429]\n    strategy: same_model\n    max_attempts: 2".to_string());
+            }
+            if include_backoff {
+                parts.push("backoff:\n  apply_to: global".to_string());
+            }
+            if include_retry_after {
+                parts.push("retry_after_handling:\n  scope: provider".to_string());
+            }
+            if include_on_timeout {
+                parts.push("on_timeout:\n  strategy: same_model\n  max_attempts: 1".to_string());
+            }
+            if include_on_high_latency {
+                parts.push("on_high_latency:\n  threshold_ms: 5000\n  strategy: different_provider\n  max_attempts: 2".to_string());
+            }
+
+            let yaml = if parts.is_empty() {
+                "{}".to_string()
+            } else {
+                parts.join("\n")
+            };
+
+            let parsed: super::RetryPolicy = serde_yaml::from_str(&yaml)
+                .expect("deserialization should succeed");
+
+            // Assert top-level defaults
+            prop_assert_eq!(parsed.default_strategy, super::RetryStrategy::DifferentProvider);
+            prop_assert_eq!(parsed.default_max_attempts, 2);
+            prop_assert!(parsed.fallback_models.is_empty());
+            prop_assert_eq!(parsed.max_retry_duration_ms, None);
+
+            // Assert on_status_codes defaults to empty vec
+            if !include_on_status_codes {
+                prop_assert!(parsed.on_status_codes.is_empty());
+            }
+
+            // Assert backoff defaults when present
+            if include_backoff {
+                let backoff = parsed.backoff.as_ref().unwrap();
+                prop_assert_eq!(backoff.base_ms, 100);
+                prop_assert_eq!(backoff.max_ms, 5000);
+                prop_assert_eq!(backoff.jitter, true);
+            } else {
+                prop_assert!(parsed.backoff.is_none());
+            }
+
+            // Assert retry_after_handling defaults when present
+            if include_retry_after {
+                let rah = parsed.retry_after_handling.as_ref().unwrap();
+                prop_assert_eq!(rah.scope, super::BlockScope::Provider); // explicitly set
+                prop_assert_eq!(rah.apply_to, super::ApplyTo::Global); // default
+                prop_assert_eq!(rah.max_retry_after_seconds, 300); // default
+            } else {
+                prop_assert!(parsed.retry_after_handling.is_none());
+            }
+
+            // Assert effective_retry_after_config always returns valid defaults
+            let effective = parsed.effective_retry_after_config();
+            if include_retry_after {
+                prop_assert_eq!(effective.scope, super::BlockScope::Provider);
+            } else {
+                prop_assert_eq!(effective.scope, super::BlockScope::Model);
+            }
+            prop_assert_eq!(effective.apply_to, super::ApplyTo::Global);
+            prop_assert_eq!(effective.max_retry_after_seconds, 300);
+
+            // Assert high latency defaults when present
+            if include_on_high_latency {
+                let hl = parsed.on_high_latency.as_ref().unwrap();
+                prop_assert_eq!(hl.measure, super::LatencyMeasure::Ttfb); // default
+                prop_assert_eq!(hl.min_triggers, 1); // default
+                prop_assert_eq!(hl.block_duration_seconds, 300); // default
+                prop_assert_eq!(hl.scope, super::BlockScope::Model); // default
+                prop_assert_eq!(hl.apply_to, super::ApplyTo::Global); // default
+            }
+        }
+    }
+
     #[test]
     fn test_deserialize_configuration() {
         let ref_config = fs::read_to_string(
@@ -735,6 +1258,60 @@ mod test {
         }
     }
 
+    // Feature: retry-on-ratelimit, Property 4: Status Code Range Expansion
+    // **Validates: Requirements 1.8**
+    proptest! {
+        #![proptest_config(proptest::prelude::ProptestConfig::with_cases(100))]
+
+        /// Property 4: Status Code Range Expansion — degenerate range (start == end)
+        /// A range "N-N" should expand to a single-element vec containing N.
+        #[test]
+        fn prop_status_code_range_expansion(
+            code in 100u16..=599u16,
+        ) {
+            let range_str = format!("{}-{}", code, code);
+            let entry = super::StatusCodeEntry::Range(range_str);
+            let expanded = entry.expand().expect("expand should succeed for valid range");
+            prop_assert_eq!(expanded.len(), 1);
+            prop_assert_eq!(expanded[0], code);
+        }
+
+        /// Property 4: Status Code Range Expansion — Single variant
+        /// Generate arbitrary code (100..=599), expand, assert vec of length 1 containing that code.
+        #[test]
+        fn prop_status_code_single_expansion(code in 100u16..=599u16) {
+            let entry = super::StatusCodeEntry::Single(code);
+            let expanded = entry.expand().expect("expand should succeed for Single");
+            prop_assert_eq!(expanded.len(), 1);
+            prop_assert_eq!(expanded[0], code);
+        }
+    }
+
+    proptest! {
+        #![proptest_config(proptest::prelude::ProptestConfig::with_cases(100))]
+
+        /// Property 4: Status Code Range Expansion — arbitrary start..=end range
+        /// Generate arbitrary valid range strings "start-end" (100 ≤ start ≤ end ≤ 599),
+        /// expand, and assert correct count and bounds.
+        #[test]
+        fn prop_status_code_range_expansion_full(
+            (start, end) in (100u16..=599u16).prop_flat_map(|s| (Just(s), s..=599u16))
+        ) {
+            let range_str = format!("{}-{}", start, end);
+            let entry = super::StatusCodeEntry::Range(range_str);
+            let expanded = entry.expand().expect("expand should succeed for valid range");
+
+            let expected_len = (end - start + 1) as usize;
+            prop_assert_eq!(expanded.len(), expected_len, "length should be end - start + 1");
+            prop_assert_eq!(*expanded.first().unwrap(), start, "first element should be start");
+            prop_assert_eq!(*expanded.last().unwrap(), end, "last element should be end");
+
+            for &code in &expanded {
+                prop_assert!(code >= start && code <= end, "all codes should be in [start, end]");
+            }
+        }
+    }
+
     #[test]
     fn test_into_models_filters_internal_providers() {
         let providers = vec![
@@ -762,7 +1339,6 @@ mod test {
         assert!(model_ids.contains(&"openai-gpt4".to_string()));
         assert!(!model_ids.contains(&"plano-orchestrator".to_string()));
     }
-
     #[test]
     fn test_llm_provider_type_vercel_and_openrouter_roundtrip() {
         // Regression: brightstaff used to reject `provider_interface: vercel`
@@ -807,4 +1383,445 @@ disable_signals: false
         let overrides: super::Overrides = serde_yaml::from_str(yaml_missing).unwrap();
         assert_eq!(overrides.disable_signals, None);
     }
+
+    // ── P0 Edge Case Tests: YAML Config Pattern Parsing ────────────────────
+
+    /// Helper to parse a RetryPolicy from a YAML string.
+    fn parse_retry_policy(yaml: &str) -> super::RetryPolicy {
+        serde_yaml::from_str(yaml).expect("YAML should parse into RetryPolicy")
+    }
+
+    #[test]
+    fn test_pattern1_multi_provider_failover_for_rate_limits() {
+        let yaml = r#"
+    fallback_models: [anthropic/claude-3-5-sonnet]
+    on_status_codes:
+      - codes: [429]
+        strategy: "different_provider"
+        max_attempts: 2
+    "#;
+        let policy = parse_retry_policy(yaml);
+        assert_eq!(policy.fallback_models, vec!["anthropic/claude-3-5-sonnet"]);
+        assert_eq!(policy.on_status_codes.len(), 1);
+        assert_eq!(
+            policy.on_status_codes[0].strategy,
+            super::RetryStrategy::DifferentProvider
+        );
+        assert_eq!(policy.on_status_codes[0].max_attempts, 2);
+    }
+
+    #[test]
+    fn test_pattern2_same_provider_failover_with_model_downgrade() {
+        let yaml = r#"
+    fallback_models: [openai/gpt-4o-mini, anthropic/claude-3-5-sonnet]
+    on_status_codes:
+      - codes: [429]
+        strategy: "same_provider"
+        max_attempts: 2
+    "#;
+        let policy = parse_retry_policy(yaml);
+        assert_eq!(policy.fallback_models.len(), 2);
+        assert_eq!(
+            policy.on_status_codes[0].strategy,
+            super::RetryStrategy::SameProvider
+        );
+    }
+
+    #[test]
+    fn test_pattern3_single_model_with_backoff_on_multiple_error_types() {
+        let yaml = r#"
+    fallback_models: []
+    on_status_codes:
+      - codes: [429]
+        strategy: "same_model"
+        max_attempts: 3
+      - codes: [503]
+        strategy: "same_model"
+        max_attempts: 3
+    backoff:
+      apply_to: "same_model"
+      base_ms: 500
+    "#;
+        let policy = parse_retry_policy(yaml);
+        assert!(policy.fallback_models.is_empty());
+        assert_eq!(policy.on_status_codes.len(), 2);
+        let backoff = policy.backoff.unwrap();
+        assert_eq!(backoff.apply_to, super::BackoffApplyTo::SameModel);
+        assert_eq!(backoff.base_ms, 500);
+        // max_ms defaults to 5000
+        assert_eq!(backoff.max_ms, 5000);
+    }
+
+    #[test]
+    fn test_pattern4_per_status_code_strategy_customization() {
+        let yaml = r#"
+    fallback_models: [openai/gpt-4o-mini, anthropic/claude-3-5-sonnet]
+    default_strategy: "different_provider"
+    default_max_attempts: 2
+    on_status_codes:
+      - codes: [429]
+        strategy: "same_provider"
+        max_attempts: 2
+      - codes: [502]
+        strategy: "different_provider"
+        max_attempts: 3
+      - codes: [503]
+        strategy: "same_model"
+        max_attempts: 2
+      - codes: [504]
+        strategy: "different_provider"
+        max_attempts: 2
+    on_timeout:
+      strategy: "different_provider"
+      max_attempts: 2
+    "#;
+        let policy = parse_retry_policy(yaml);
+        assert_eq!(
+            policy.default_strategy,
+            super::RetryStrategy::DifferentProvider
+        );
+        assert_eq!(policy.default_max_attempts, 2);
+        assert_eq!(policy.on_status_codes.len(), 4);
+        assert_eq!(
+            policy.on_status_codes[2].strategy,
+            super::RetryStrategy::SameModel
+        );
+        let timeout = policy.on_timeout.unwrap();
+        assert_eq!(timeout.strategy, super::RetryStrategy::DifferentProvider);
+        assert_eq!(timeout.max_attempts, 2);
+    }
+
+    #[test]
+    fn test_pattern5_timeout_specific_configuration() {
+        let yaml = r#"
+    fallback_models: [anthropic/claude-3-5-sonnet]
+    default_strategy: "different_provider"
+    default_max_attempts: 2
+    on_status_codes:
+      - codes: [429]
+        strategy: "same_provider"
+        max_attempts: 2
+    on_timeout:
+      strategy: "different_provider"
+      max_attempts: 3
+    "#;
+        let policy = parse_retry_policy(yaml);
+        let timeout = policy.on_timeout.unwrap();
+        assert_eq!(timeout.max_attempts, 3);
+    }
+
+    #[test]
+    fn test_pattern6_no_retry_parses_as_empty() {
+        // Pattern 6: No retry_policy section. We test that an empty YAML
+        // object parses with all defaults.
+        let yaml = "{}";
+        let policy = parse_retry_policy(yaml);
+        assert!(policy.fallback_models.is_empty());
+        assert_eq!(
+            policy.default_strategy,
+            super::RetryStrategy::DifferentProvider
+        );
+        assert_eq!(policy.default_max_attempts, 2);
+        assert!(policy.on_status_codes.is_empty());
+        assert!(policy.on_timeout.is_none());
+        assert!(policy.backoff.is_none());
+        assert!(policy.max_retry_duration_ms.is_none());
+    }
+
+    #[test]
+    fn test_pattern7_backoff_only_for_same_model() {
+        let yaml = r#"
+    fallback_models: [anthropic/claude-3-5-sonnet]
+    on_status_codes:
+      - codes: [429]
+        strategy: "same_model"
+        max_attempts: 2
+    backoff:
+      apply_to: "same_model"
+      base_ms: 100
+      max_ms: 5000
+      jitter: true
+    "#;
+        let policy = parse_retry_policy(yaml);
+        let backoff = policy.backoff.unwrap();
+        assert_eq!(backoff.apply_to, super::BackoffApplyTo::SameModel);
+        assert!(backoff.jitter);
+    }
+
+    #[test]
+    fn test_pattern8_backoff_for_same_provider() {
+        let yaml = r#"
+    fallback_models: [openai/gpt-4o-mini, anthropic/claude-3-5-sonnet]
+    on_status_codes:
+      - codes: [429]
+        strategy: "same_provider"
+        max_attempts: 2
+    backoff:
+      apply_to: "same_provider"
+      base_ms: 200
+      max_ms: 10000
+      jitter: true
+    "#;
+        let policy = parse_retry_policy(yaml);
+        let backoff = policy.backoff.unwrap();
+        assert_eq!(backoff.apply_to, super::BackoffApplyTo::SameProvider);
+        assert_eq!(backoff.base_ms, 200);
+        assert_eq!(backoff.max_ms, 10000);
+    }
+
+    #[test]
+    fn test_pattern9_global_backoff() {
+        let yaml = r#"
+    fallback_models: [anthropic/claude-3-5-sonnet]
+    on_status_codes:
+      - codes: [429]
+        strategy: "different_provider"
+        max_attempts: 2
+    backoff:
+      apply_to: "global"
+      base_ms: 50
+      max_ms: 2000
+      jitter: true
+    "#;
+        let policy = parse_retry_policy(yaml);
+        let backoff = policy.backoff.unwrap();
+        assert_eq!(backoff.apply_to, super::BackoffApplyTo::Global);
+        assert_eq!(backoff.base_ms, 50);
+        assert_eq!(backoff.max_ms, 2000);
+    }
+
+    #[test]
+    fn test_pattern10_deterministic_backoff_without_jitter() {
+        let yaml = r#"
+    fallback_models: []
+    on_status_codes:
+      - codes: [429]
+        strategy: "same_model"
+        max_attempts: 3
+    backoff:
+      apply_to: "same_model"
+      base_ms: 1000
+      max_ms: 30000
+      jitter: false
+    "#;
+        let policy = parse_retry_policy(yaml);
+        let backoff = policy.backoff.unwrap();
+        assert!(!backoff.jitter);
+        assert_eq!(backoff.base_ms, 1000);
+        assert_eq!(backoff.max_ms, 30000);
+    }
+
+    #[test]
+    fn test_pattern11_no_backoff_fast_failover() {
+        let yaml = r#"
+    fallback_models: [anthropic/claude-3-5-sonnet]
+    on_status_codes:
+      - codes: [429]
+        strategy: "different_provider"
+        max_attempts: 2
+    "#;
+        let policy = parse_retry_policy(yaml);
+        assert!(policy.backoff.is_none());
+    }
+
+    #[test]
+    fn test_pattern17_mixed_integer_and_range_codes() {
+        let yaml = r#"
+    fallback_models: [anthropic/claude-3-5-sonnet]
+    default_strategy: "different_provider"
+    default_max_attempts: 2
+    on_status_codes:
+      - codes: [429, "430-450", 526]
+        strategy: "same_provider"
+        max_attempts: 2
+      - codes: ["502-504"]
+        strategy: "different_provider"
+        max_attempts: 3
+    "#;
+        let policy = parse_retry_policy(yaml);
+        assert_eq!(policy.on_status_codes.len(), 2);
+
+        // Verify first entry: 429 + range 430-450 + 526
+        let first = &policy.on_status_codes[0];
+        assert_eq!(first.codes.len(), 3);
+        let expanded: Vec<u16> = first
+            .codes
+            .iter()
+            .flat_map(|c| c.expand().unwrap())
+            .collect();
+        // 429 + (430..=450 = 21 codes) + 526 = 23 codes
+        assert_eq!(expanded.len(), 23);
+        assert!(expanded.contains(&429));
+        assert!(expanded.contains(&430));
+        assert!(expanded.contains(&450));
+        assert!(expanded.contains(&526));
+        assert!(!expanded.contains(&451));
+
+        // Verify second entry: range 502-504
+        let second = &policy.on_status_codes[1];
+        let expanded2: Vec<u16> = second
+            .codes
+            .iter()
+            .flat_map(|c| c.expand().unwrap())
+            .collect();
+        assert_eq!(expanded2, vec![502, 503, 504]);
+    }
+
+    #[test]
+    fn test_pattern12_model_level_retry_after_blocking() {
+        let yaml = r#"
+    fallback_models: [openai/gpt-4o-mini, anthropic/claude-3-5-sonnet]
+    on_status_codes:
+      - codes: [429]
+        strategy: "different_provider"
+        max_attempts: 2
+      - codes: [503]
+        strategy: "different_provider"
+        max_attempts: 2
+    retry_after_handling:
+      scope: "model"
+      apply_to: "global"
+    "#;
+        let policy = parse_retry_policy(yaml);
+        assert_eq!(policy.fallback_models.len(), 2);
+        assert_eq!(policy.on_status_codes.len(), 2);
+        let rah = policy.retry_after_handling.unwrap();
+        assert_eq!(rah.scope, super::BlockScope::Model);
+        assert_eq!(rah.apply_to, super::ApplyTo::Global);
+        // max_retry_after_seconds defaults to 300
+        assert_eq!(rah.max_retry_after_seconds, 300);
+    }
+
+    #[test]
+    fn test_pattern13_provider_level_retry_after_blocking() {
+        let yaml = r#"
+    fallback_models: [anthropic/claude-3-5-sonnet]
+    on_status_codes:
+      - codes: [429]
+        strategy: "different_provider"
+        max_attempts: 2
+      - codes: [503]
+        strategy: "different_provider"
+        max_attempts: 2
+      - codes: [502]
+        strategy: "different_provider"
+        max_attempts: 2
+    retry_after_handling:
+      scope: "provider"
+      apply_to: "global"
+    "#;
+        let policy = parse_retry_policy(yaml);
+        assert_eq!(policy.on_status_codes.len(), 3);
+        let rah = policy.retry_after_handling.unwrap();
+        assert_eq!(rah.scope, super::BlockScope::Provider);
+        assert_eq!(rah.apply_to, super::ApplyTo::Global);
+        assert_eq!(rah.max_retry_after_seconds, 300);
+    }
+
+    #[test]
+    fn test_pattern14_request_level_retry_after() {
+        let yaml = r#"
+    fallback_models: [anthropic/claude-3-5-sonnet]
+    on_status_codes:
+      - codes: [429]
+        strategy: "different_provider"
+        max_attempts: 2
+      - codes: [503]
+        strategy: "different_provider"
+        max_attempts: 2
+    retry_after_handling:
+      scope: "model"
+      apply_to: "request"
+    "#;
+        let policy = parse_retry_policy(yaml);
+        let rah = policy.retry_after_handling.unwrap();
+        assert_eq!(rah.scope, super::BlockScope::Model);
+        assert_eq!(rah.apply_to, super::ApplyTo::Request);
+        assert_eq!(rah.max_retry_after_seconds, 300);
+    }
+
+    #[test]
+    fn test_pattern15_no_custom_retry_after_config_defaults_plus_backoff() {
+        let yaml = r#"
+    fallback_models: []
+    on_status_codes:
+      - codes: [429]
+        strategy: "same_model"
+        max_attempts: 3
+      - codes: [503]
+        strategy: "same_model"
+        max_attempts: 3
+    backoff:
+      apply_to: "same_model"
+      base_ms: 1000
+      max_ms: 30000
+      jitter: true
+    "#;
+        let policy = parse_retry_policy(yaml);
+        // No retry_after_handling section → None
+        assert!(policy.retry_after_handling.is_none());
+        // But effective config should return defaults
+        let effective = policy.effective_retry_after_config();
+        assert_eq!(effective.scope, super::BlockScope::Model);
+        assert_eq!(effective.apply_to, super::ApplyTo::Global);
+        assert_eq!(effective.max_retry_after_seconds, 300);
+        // Backoff is present
+        let backoff = policy.backoff.unwrap();
+        assert_eq!(backoff.apply_to, super::BackoffApplyTo::SameModel);
+        assert_eq!(backoff.base_ms, 1000);
+        assert_eq!(backoff.max_ms, 30000);
+        assert!(backoff.jitter);
+    }
+
+    #[test]
+    fn test_pattern16_fallback_models_list_for_targeted_failover() {
+        let yaml = r#"
+    fallback_models: [openai/gpt-4o-mini, anthropic/claude-3-5-sonnet, anthropic/claude-3-opus]
+    default_strategy: "different_provider"
+    default_max_attempts: 2
+    on_status_codes:
+      - codes: [429]
+        strategy: "same_provider"
+        max_attempts: 2
+    "#;
+        let policy = parse_retry_policy(yaml);
+        assert_eq!(
+            policy.fallback_models,
+            vec![
+                "openai/gpt-4o-mini",
+                "anthropic/claude-3-5-sonnet",
+                "anthropic/claude-3-opus",
+            ]
+        );
+        assert_eq!(
+            policy.default_strategy,
+            super::RetryStrategy::DifferentProvider
+        );
+        assert_eq!(policy.default_max_attempts, 2);
+        assert_eq!(policy.on_status_codes.len(), 1);
+        assert_eq!(
+            policy.on_status_codes[0].strategy,
+            super::RetryStrategy::SameProvider
+        );
+    }
+
+    #[test]
+    fn test_backoff_without_apply_to_fails_deserialization() {
+        // backoff.apply_to is a required field (no serde default), so YAML
+        // without it should fail to deserialize.
+        let yaml = r#"
+    on_status_codes:
+      - codes: [429]
+        strategy: "same_model"
+        max_attempts: 2
+    backoff:
+      base_ms: 100
+      max_ms: 5000
+    "#;
+        let result: Result<super::RetryPolicy, _> = serde_yaml::from_str(yaml);
+        assert!(
+            result.is_err(),
+            "backoff without apply_to should fail deserialization"
+        );
+    }
 }
diff --git a/crates/common/src/llm_providers.rs b/crates/common/src/llm_providers.rs
index b4355a2f..e369f669 100644
--- a/crates/common/src/llm_providers.rs
+++ b/crates/common/src/llm_providers.rs
@@ -278,6 +278,7 @@ mod tests {
             stream: None,
             passthrough_auth: None,
             headers: None,
+            retry_policy: None,
         }
     }