diff --git a/crates/common/src/retry/error_response.rs b/crates/common/src/retry/error_response.rs new file mode 100644 index 00000000..c27b09f4 --- /dev/null +++ b/crates/common/src/retry/error_response.rs @@ -0,0 +1,611 @@ +use bytes::Bytes; +use http_body_util::Full; +use hyper::header::HeaderValue; +use hyper::Response; +use serde_json::json; + +use super::{AttemptErrorType, RetryExhaustedError}; + +/// Build an HTTP response from a `RetryExhaustedError`. +/// +/// The response body is a JSON object matching the design's error response format. +/// The HTTP status code is derived from the most recent attempt's error: +/// - For `HttpError`: the upstream status code +/// - For `Timeout` or `HighLatency`: 504 Gateway Timeout +/// +/// The `request_id` is preserved in the `x-request-id` response header. +/// +/// Optional fields `observed_max_retry_after_seconds` and +/// `shortest_remaining_block_seconds` are included only when their +/// corresponding values are `Some`. +pub fn build_error_response( + error: &RetryExhaustedError, + request_id: &str, +) -> Response> { + let status_code = determine_status_code(error); + + let attempts_json: Vec = error + .attempts + .iter() + .map(|a| { + let error_type_str = match &a.error_type { + AttemptErrorType::HttpError { status_code, .. } => { + format!("http_{}", status_code) + } + AttemptErrorType::Timeout { duration_ms } => { + format!("timeout_{}ms", duration_ms) + } + AttemptErrorType::HighLatency { + measured_ms, + threshold_ms, + } => { + format!( + "high_latency_{}ms_threshold_{}ms", + measured_ms, threshold_ms + ) + } + }; + json!({ + "model": a.model_id, + "error_type": error_type_str, + "attempt": a.attempt_number, + }) + }) + .collect(); + + let message = build_message(error); + + let mut error_obj = serde_json::Map::new(); + error_obj.insert("message".to_string(), json!(message)); + error_obj.insert("type".to_string(), json!("retry_exhausted")); + error_obj.insert("attempts".to_string(), json!(attempts_json)); + error_obj.insert("total_attempts".to_string(), json!(error.attempts.len())); + + if let Some(max_ra) = error.max_retry_after_seconds { + error_obj.insert( + "observed_max_retry_after_seconds".to_string(), + json!(max_ra), + ); + } + + if let Some(shortest) = error.shortest_remaining_block_seconds { + error_obj.insert( + "shortest_remaining_block_seconds".to_string(), + json!(shortest), + ); + } + + error_obj.insert( + "retry_budget_exhausted".to_string(), + json!(error.retry_budget_exhausted), + ); + + let body_json = json!({ "error": error_obj }); + let body_bytes = serde_json::to_vec(&body_json).unwrap_or_default(); + + let mut response = Response::builder() + .status(status_code) + .header("content-type", "application/json") + .body(Full::new(Bytes::from(body_bytes))) + .unwrap(); + + if let Ok(val) = HeaderValue::from_str(request_id) { + response.headers_mut().insert("x-request-id", val); + } + + response +} + +/// Determine the HTTP status code from the most recent attempt error. +/// Returns 504 for timeouts and high latency exhaustion, otherwise the +/// upstream HTTP status code. Falls back to 502 if no attempts exist. +fn determine_status_code(error: &RetryExhaustedError) -> u16 { + match error.attempts.last() { + Some(last) => match &last.error_type { + AttemptErrorType::HttpError { status_code, .. } => *status_code, + AttemptErrorType::Timeout { .. } => 504, + AttemptErrorType::HighLatency { .. } => 504, + }, + None => 502, + } +} + +/// Build a human-readable message describing the exhaustion cause. +fn build_message(error: &RetryExhaustedError) -> String { + if error.retry_budget_exhausted { + return "All retry attempts exhausted: retry budget exceeded".to_string(); + } + + match error.attempts.last() { + Some(last) => match &last.error_type { + AttemptErrorType::Timeout { .. } => { + "All retry attempts exhausted: upstream request timed out".to_string() + } + AttemptErrorType::HighLatency { .. } => { + "All retry attempts exhausted: upstream high latency detected".to_string() + } + _ => "All retry attempts exhausted".to_string(), + }, + None => "All retry attempts exhausted".to_string(), + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::retry::{AttemptError, AttemptErrorType, RetryExhaustedError}; + use http_body_util::BodyExt; + use proptest::prelude::*; + + /// Helper to extract the JSON body from a response. + async fn response_json(resp: Response>) -> serde_json::Value { + let body = resp.into_body().collect().await.unwrap().to_bytes(); + serde_json::from_slice(&body).unwrap() + } + + #[tokio::test] + async fn test_basic_http_error_response() { + let error = RetryExhaustedError { + attempts: vec![ + AttemptError { + model_id: "openai/gpt-4o".to_string(), + error_type: AttemptErrorType::HttpError { + status_code: 429, + body: b"rate limited".to_vec(), + }, + attempt_number: 1, + }, + AttemptError { + model_id: "anthropic/claude-3-5-sonnet".to_string(), + error_type: AttemptErrorType::HttpError { + status_code: 503, + body: b"unavailable".to_vec(), + }, + attempt_number: 2, + }, + ], + max_retry_after_seconds: Some(30), + shortest_remaining_block_seconds: Some(12), + retry_budget_exhausted: false, + }; + + let resp = build_error_response(&error, "req-123"); + assert_eq!(resp.status().as_u16(), 503); // most recent error + assert_eq!( + resp.headers() + .get("x-request-id") + .unwrap() + .to_str() + .unwrap(), + "req-123" + ); + assert_eq!( + resp.headers() + .get("content-type") + .unwrap() + .to_str() + .unwrap(), + "application/json" + ); + + let json = response_json(resp).await; + let err = &json["error"]; + assert_eq!(err["type"], "retry_exhausted"); + assert_eq!(err["total_attempts"], 2); + assert_eq!(err["observed_max_retry_after_seconds"], 30); + assert_eq!(err["shortest_remaining_block_seconds"], 12); + assert_eq!(err["retry_budget_exhausted"], false); + + let attempts = err["attempts"].as_array().unwrap(); + assert_eq!(attempts.len(), 2); + assert_eq!(attempts[0]["model"], "openai/gpt-4o"); + assert_eq!(attempts[0]["error_type"], "http_429"); + assert_eq!(attempts[0]["attempt"], 1); + assert_eq!(attempts[1]["model"], "anthropic/claude-3-5-sonnet"); + assert_eq!(attempts[1]["error_type"], "http_503"); + assert_eq!(attempts[1]["attempt"], 2); + } + + #[tokio::test] + async fn test_timeout_returns_504() { + let error = RetryExhaustedError { + attempts: vec![AttemptError { + model_id: "openai/gpt-4o".to_string(), + error_type: AttemptErrorType::Timeout { duration_ms: 30000 }, + attempt_number: 1, + }], + max_retry_after_seconds: None, + shortest_remaining_block_seconds: None, + retry_budget_exhausted: false, + }; + + let resp = build_error_response(&error, "req-timeout"); + assert_eq!(resp.status().as_u16(), 504); + + let json = response_json(resp).await; + let err = &json["error"]; + assert_eq!(err["attempts"][0]["error_type"], "timeout_30000ms"); + assert!(err["message"].as_str().unwrap().contains("timed out")); + } + + #[tokio::test] + async fn test_high_latency_returns_504() { + let error = RetryExhaustedError { + attempts: vec![AttemptError { + model_id: "openai/gpt-4o".to_string(), + error_type: AttemptErrorType::HighLatency { + measured_ms: 8000, + threshold_ms: 5000, + }, + attempt_number: 1, + }], + max_retry_after_seconds: None, + shortest_remaining_block_seconds: None, + retry_budget_exhausted: false, + }; + + let resp = build_error_response(&error, "req-latency"); + assert_eq!(resp.status().as_u16(), 504); + + let json = response_json(resp).await; + let err = &json["error"]; + assert_eq!( + err["attempts"][0]["error_type"], + "high_latency_8000ms_threshold_5000ms" + ); + assert!(err["message"].as_str().unwrap().contains("high latency")); + } + + #[tokio::test] + async fn test_optional_fields_omitted_when_none() { + let error = RetryExhaustedError { + attempts: vec![AttemptError { + model_id: "openai/gpt-4o".to_string(), + error_type: AttemptErrorType::HttpError { + status_code: 429, + body: vec![], + }, + attempt_number: 1, + }], + max_retry_after_seconds: None, + shortest_remaining_block_seconds: None, + retry_budget_exhausted: false, + }; + + let resp = build_error_response(&error, "req-456"); + let json = response_json(resp).await; + let err = &json["error"]; + + // These fields should not be present + assert!(err.get("observed_max_retry_after_seconds").is_none()); + assert!(err.get("shortest_remaining_block_seconds").is_none()); + + // These should always be present + assert!(err.get("retry_budget_exhausted").is_some()); + assert!(err.get("total_attempts").is_some()); + assert!(err.get("type").is_some()); + assert!(err.get("message").is_some()); + assert!(err.get("attempts").is_some()); + } + + #[tokio::test] + async fn test_retry_budget_exhausted_message() { + let error = RetryExhaustedError { + attempts: vec![AttemptError { + model_id: "openai/gpt-4o".to_string(), + error_type: AttemptErrorType::HttpError { + status_code: 429, + body: vec![], + }, + attempt_number: 1, + }], + max_retry_after_seconds: None, + shortest_remaining_block_seconds: None, + retry_budget_exhausted: true, + }; + + let resp = build_error_response(&error, "req-budget"); + let json = response_json(resp).await; + let err = &json["error"]; + assert_eq!(err["retry_budget_exhausted"], true); + assert!(err["message"].as_str().unwrap().contains("budget exceeded")); + } + + #[tokio::test] + async fn test_empty_attempts_returns_502() { + let error = RetryExhaustedError { + attempts: vec![], + max_retry_after_seconds: None, + shortest_remaining_block_seconds: None, + retry_budget_exhausted: false, + }; + + let resp = build_error_response(&error, "req-empty"); + assert_eq!(resp.status().as_u16(), 502); + + let json = response_json(resp).await; + assert_eq!(json["error"]["total_attempts"], 0); + assert_eq!(json["error"]["attempts"].as_array().unwrap().len(), 0); + } + + #[tokio::test] + async fn test_request_id_preserved_in_header() { + let error = RetryExhaustedError { + attempts: vec![AttemptError { + model_id: "m".to_string(), + error_type: AttemptErrorType::HttpError { + status_code: 500, + body: vec![], + }, + attempt_number: 1, + }], + max_retry_after_seconds: None, + shortest_remaining_block_seconds: None, + retry_budget_exhausted: false, + }; + + let resp = build_error_response(&error, "unique-request-id-abc-123"); + assert_eq!( + resp.headers() + .get("x-request-id") + .unwrap() + .to_str() + .unwrap(), + "unique-request-id-abc-123" + ); + } + + #[tokio::test] + async fn test_mixed_error_types_in_attempts() { + let error = RetryExhaustedError { + attempts: vec![ + AttemptError { + model_id: "openai/gpt-4o".to_string(), + error_type: AttemptErrorType::HttpError { + status_code: 429, + body: vec![], + }, + attempt_number: 1, + }, + AttemptError { + model_id: "anthropic/claude".to_string(), + error_type: AttemptErrorType::Timeout { duration_ms: 5000 }, + attempt_number: 2, + }, + AttemptError { + model_id: "gemini/pro".to_string(), + error_type: AttemptErrorType::HighLatency { + measured_ms: 10000, + threshold_ms: 3000, + }, + attempt_number: 3, + }, + ], + max_retry_after_seconds: Some(60), + shortest_remaining_block_seconds: Some(5), + retry_budget_exhausted: false, + }; + + // Last attempt is HighLatency → 504 + let resp = build_error_response(&error, "req-mixed"); + assert_eq!(resp.status().as_u16(), 504); + + let json = response_json(resp).await; + let err = &json["error"]; + assert_eq!(err["total_attempts"], 3); + assert_eq!(err["observed_max_retry_after_seconds"], 60); + assert_eq!(err["shortest_remaining_block_seconds"], 5); + + let attempts = err["attempts"].as_array().unwrap(); + assert_eq!(attempts[0]["error_type"], "http_429"); + assert_eq!(attempts[1]["error_type"], "timeout_5000ms"); + assert_eq!( + attempts[2]["error_type"], + "high_latency_10000ms_threshold_3000ms" + ); + } + + // ── Proptest strategies ──────────────────────────────────────────────── + + /// Generate an arbitrary AttemptErrorType. + fn arb_attempt_error_type() -> impl Strategy { + prop_oneof![ + ( + 100u16..=599u16, + proptest::collection::vec(any::(), 0..32) + ) + .prop_map(|(status_code, body)| AttemptErrorType::HttpError { status_code, body }), + (1u64..=120_000u64).prop_map(|duration_ms| AttemptErrorType::Timeout { duration_ms }), + (1u64..=120_000u64, 1u64..=120_000u64).prop_map(|(measured_ms, threshold_ms)| { + AttemptErrorType::HighLatency { + measured_ms, + threshold_ms, + } + }), + ] + } + + /// Generate an arbitrary AttemptError with a model_id from a small set of + /// realistic provider/model identifiers. + fn arb_attempt_error() -> impl Strategy { + let model_ids = prop_oneof![ + Just("openai/gpt-4o".to_string()), + Just("openai/gpt-4o-mini".to_string()), + Just("anthropic/claude-3-5-sonnet".to_string()), + Just("gemini/pro".to_string()), + Just("azure/gpt-4o".to_string()), + ]; + (model_ids, arb_attempt_error_type(), 1u32..=10u32).prop_map( + |(model_id, error_type, attempt_number)| AttemptError { + model_id, + error_type, + attempt_number, + }, + ) + } + + /// Generate an arbitrary RetryExhaustedError with 1..=8 attempts. + fn arb_retry_exhausted_error() -> impl Strategy { + ( + proptest::collection::vec(arb_attempt_error(), 1..=8), + proptest::option::of(1u64..=600u64), + proptest::option::of(1u64..=600u64), + any::(), + ) + .prop_map( + |( + attempts, + max_retry_after_seconds, + shortest_remaining_block_seconds, + retry_budget_exhausted, + )| { + RetryExhaustedError { + attempts, + max_retry_after_seconds, + shortest_remaining_block_seconds, + retry_budget_exhausted, + } + }, + ) + } + + /// Generate an arbitrary request_id (non-empty ASCII string valid for HTTP headers). + fn arb_request_id() -> impl Strategy { + "[a-zA-Z0-9_-]{1,64}" + } + + // Feature: retry-on-ratelimit, Property 21: Error Response Contains Attempt Details + // **Validates: Requirements 10.4, 10.5, 10.7** + proptest! { + #![proptest_config(ProptestConfig::with_cases(100))] + + /// Property 21: For any exhausted retry sequence, the error response + /// must include all attempted model identifiers and their error types, + /// and must preserve the original request_id. + #[test] + fn prop_error_response_contains_attempt_details( + error in arb_retry_exhausted_error(), + request_id in arb_request_id(), + ) { + let rt = tokio::runtime::Builder::new_current_thread() + .enable_all() + .build() + .unwrap(); + rt.block_on(async { + let resp = build_error_response(&error, &request_id); + + // request_id preserved in x-request-id header + let header_val = resp.headers().get("x-request-id") + .expect("x-request-id header must be present"); + prop_assert_eq!(header_val.to_str().unwrap(), request_id.as_str()); + + // Content-Type is application/json + let ct = resp.headers().get("content-type") + .expect("content-type header must be present"); + prop_assert_eq!(ct.to_str().unwrap(), "application/json"); + + // Parse JSON body + let body = resp.into_body().collect().await.unwrap().to_bytes(); + let json: serde_json::Value = serde_json::from_slice(&body) + .expect("response body must be valid JSON"); + + let err_obj = &json["error"]; + + // type is always "retry_exhausted" + prop_assert_eq!(err_obj["type"].as_str().unwrap(), "retry_exhausted"); + + // total_attempts matches input + prop_assert_eq!( + err_obj["total_attempts"].as_u64().unwrap(), + error.attempts.len() as u64 + ); + + // retry_budget_exhausted matches input + prop_assert_eq!( + err_obj["retry_budget_exhausted"].as_bool().unwrap(), + error.retry_budget_exhausted + ); + + // attempts array has correct length + let attempts_arr = err_obj["attempts"].as_array() + .expect("attempts must be an array"); + prop_assert_eq!(attempts_arr.len(), error.attempts.len()); + + // Every attempt's model_id and error_type are present and correct + for (i, attempt) in error.attempts.iter().enumerate() { + let json_attempt = &attempts_arr[i]; + + // model_id preserved + prop_assert_eq!( + json_attempt["model"].as_str().unwrap(), + attempt.model_id.as_str() + ); + + // attempt_number preserved + prop_assert_eq!( + json_attempt["attempt"].as_u64().unwrap(), + attempt.attempt_number as u64 + ); + + // error_type string matches the variant + let error_type_str = json_attempt["error_type"].as_str().unwrap(); + match &attempt.error_type { + AttemptErrorType::HttpError { status_code, .. } => { + prop_assert_eq!( + error_type_str, + &format!("http_{}", status_code) + ); + } + AttemptErrorType::Timeout { duration_ms } => { + prop_assert_eq!( + error_type_str, + &format!("timeout_{}ms", duration_ms) + ); + } + AttemptErrorType::HighLatency { measured_ms, threshold_ms } => { + prop_assert_eq!( + error_type_str, + &format!("high_latency_{}ms_threshold_{}ms", measured_ms, threshold_ms) + ); + } + } + } + + // Optional fields: observed_max_retry_after_seconds + match error.max_retry_after_seconds { + Some(v) => { + prop_assert_eq!( + err_obj["observed_max_retry_after_seconds"].as_u64().unwrap(), + v + ); + } + None => { + prop_assert!(err_obj.get("observed_max_retry_after_seconds").is_none() + || err_obj["observed_max_retry_after_seconds"].is_null()); + } + } + + // Optional fields: shortest_remaining_block_seconds + match error.shortest_remaining_block_seconds { + Some(v) => { + prop_assert_eq!( + err_obj["shortest_remaining_block_seconds"].as_u64().unwrap(), + v + ); + } + None => { + prop_assert!(err_obj.get("shortest_remaining_block_seconds").is_none() + || err_obj["shortest_remaining_block_seconds"].is_null()); + } + } + + // message is a non-empty string + let message = err_obj["message"].as_str() + .expect("message must be a string"); + prop_assert!(!message.is_empty()); + + Ok(()) + })?; + } + } +}