fix: surface real upstream error messages from orchestrator HTTP client

`post_and_extract_content` was unconditionally deserializing the upstream response body as a `ChatCompletionsResponse`, which meant 4xx/5xx error bodies (OpenAI-style `{"error": {...}}` envelopes) failed with confusing messages like `missing field 'id' at line 1 column 391`. The real upstream message (e.g. "This model's maximum context length is 32768 tokens...") only appeared once as a warn log and then got buried in the generic "Failed to parse JSON response" path. Now we: - Check the HTTP status before attempting to parse the success body. - On non-2xx, extract a human-readable message from the OpenAI-style error envelope (or fall back to a UTF-8-safe truncated raw body). - Return a dedicated `HttpError::Upstream { status, message }` variant so callers can log / surface / retry based on the real status code. - Truncate raw bodies in warn logs to 512 bytes (UTF-8-safe) to avoid flooding logs with oversized JSON or HTML error pages.
2026-06-02 14:35:14 +02:00 · 2026-04-17 18:41:15 -07:00 · 2026-04-17 18:41:15 -07:00 · c90b699c90
commit c90b699c90
parent 321c28da37
1 changed files with 129 additions and 4 deletions
--- a/crates/brightstaff/src/router/http.rs
+++ b/crates/brightstaff/src/router/http.rs
@ -1,8 +1,14 @@
 use hermesllm::apis::openai::ChatCompletionsResponse;
 use hyper::header;
 use serde::Deserialize;
 use thiserror::Error;
 use tracing::warn;
 /// Max bytes of raw upstream body we include in a log message or error text
 /// when the body is not a recognizable error envelope. Keeps logs from being
 /// flooded by huge HTML error pages.
 const RAW_BODY_LOG_LIMIT: usize = 512;
 #[derive(Debug, Error)]
 pub enum HttpError {
    #[error("Failed to send request: {0}")]
@ -10,13 +16,64 @@ pub enum HttpError {
    #[error("Failed to parse JSON response: {0}")]
    Json(serde_json::Error, String),
    #[error("Upstream returned {status}: {message}")]
    Upstream { status: u16, message: String },
 }
 /// Shape of an OpenAI-style error response body, e.g.
 /// `{"error": {"message": "...", "type": "...", "param": "...", "code": ...}}`.
 #[derive(Debug, Deserialize)]
 struct UpstreamErrorEnvelope {
    error: UpstreamErrorBody,
 }
 #[derive(Debug, Deserialize)]
 struct UpstreamErrorBody {
    message: String,
    #[serde(default, rename = "type")]
    err_type: Option<String>,
    #[serde(default)]
    param: Option<String>,
 }
 /// Extract a human-readable error message from an upstream response body.
 /// Tries to parse an OpenAI-style `{"error": {"message": ...}}` envelope; if
 /// that fails, falls back to the first `RAW_BODY_LOG_LIMIT` bytes of the raw
 /// body (UTF-8 safe).
 fn extract_upstream_error_message(body: &str) -> String {
    if let Ok(env) = serde_json::from_str::<UpstreamErrorEnvelope>(body) {
        let mut msg = env.error.message;
        if let Some(param) = env.error.param {
            msg.push_str(&format!(" (param={param})"));
        }
        if let Some(err_type) = env.error.err_type {
            msg.push_str(&format!(" [type={err_type}]"));
        }
        return msg;
    }
    truncate_for_log(body).to_string()
 }
 fn truncate_for_log(s: &str) -> &str {
    if s.len() <= RAW_BODY_LOG_LIMIT {
        return s;
    }
    let mut end = RAW_BODY_LOG_LIMIT;
    while end > 0 && !s.is_char_boundary(end) {
        end -= 1;
    }
    &s[..end]
 }
 /// Sends a POST request to the given URL and extracts the text content
 /// from the first choice of the `ChatCompletionsResponse`.
 ///
-/// Returns `Some((content, elapsed))` on success, or `None` if the response
+/// Returns `Some((content, elapsed))` on success, `None` if the response
-/// had no choices or the first choice had no content.
+/// had no choices or the first choice had no content. Returns
 /// `HttpError::Upstream` for any non-2xx status, carrying a message
 /// extracted from the OpenAI-style error envelope (or a truncated raw body
 /// if the body is not in that shape).
 pub async fn post_and_extract_content(
    client: &reqwest::Client,
    url: &str,
@ -26,17 +83,36 @@ pub async fn post_and_extract_content(
    let start_time = std::time::Instant::now();
    let res = client.post(url).headers(headers).body(body).send().await?;
    let status = res.status();
    let body = res.text().await?;
    let elapsed = start_time.elapsed();
    if !status.is_success() {
        let message = extract_upstream_error_message(&body);
        warn!(
            status = status.as_u16(),
            message = %message,
            body_size = body.len(),
            "upstream returned error response"
        );
        return Err(HttpError::Upstream {
            status: status.as_u16(),
            message,
        });
    }
    let response: ChatCompletionsResponse = serde_json::from_str(&body).map_err(|err| {
-        warn!(error = %err, body = %body, "failed to parse json response");
+        warn!(
            error = %err,
            body = %truncate_for_log(&body),
            "failed to parse json response",
        );
        HttpError::Json(err, format!("Failed to parse JSON: {}", body))
    })?;
    if response.choices.is_empty() {
-        warn!(body = %body, "no choices in response");
+        warn!(body = %truncate_for_log(&body), "no choices in response");
        return Ok(None);
    }
@ -46,3 +122,52 @@ pub async fn post_and_extract_content(
        .as_ref()
        .map(|c| (c.clone(), elapsed)))
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    #[test]
    fn extracts_message_from_openai_style_error_envelope() {
        let body = r#"{"error":{"code":400,"message":"This model's maximum context length is 32768 tokens. However, you requested 0 output tokens and your prompt contains at least 32769 input tokens, for a total of at least 32769 tokens.","param":"input_tokens","type":"BadRequestError"}}"#;
        let msg = extract_upstream_error_message(body);
        assert!(
            msg.starts_with("This model's maximum context length is 32768 tokens."),
            "unexpected message: {msg}"
        );
        assert!(msg.contains("(param=input_tokens)"));
        assert!(msg.contains("[type=BadRequestError]"));
    }
    #[test]
    fn extracts_message_without_optional_fields() {
        let body = r#"{"error":{"message":"something broke"}}"#;
        let msg = extract_upstream_error_message(body);
        assert_eq!(msg, "something broke");
    }
    #[test]
    fn falls_back_to_raw_body_when_not_error_envelope() {
        let body = "<html><body>502 Bad Gateway</body></html>";
        let msg = extract_upstream_error_message(body);
        assert_eq!(msg, body);
    }
    #[test]
    fn truncates_non_envelope_bodies_in_logs() {
        let body = "x".repeat(RAW_BODY_LOG_LIMIT * 3);
        let msg = extract_upstream_error_message(&body);
        assert_eq!(msg.len(), RAW_BODY_LOG_LIMIT);
    }
    #[test]
    fn truncate_for_log_respects_utf8_boundaries() {
        // 2-byte characters; picking a length that would split mid-char.
        let body = "é".repeat(RAW_BODY_LOG_LIMIT);
        let out = truncate_for_log(&body);
        // Should be a valid &str (implicit — would panic if we returned
        // a non-boundary slice) and at most RAW_BODY_LOG_LIMIT bytes.
        assert!(out.len() <= RAW_BODY_LOG_LIMIT);
        assert!(out.chars().all(|c| c == 'é'));
    }
 }