From c90b699c903447040484315c79008ff102aee487 Mon Sep 17 00:00:00 2001
From: Adil Hafeez <adil.hafeez@gmail.com>
Date: Fri, 17 Apr 2026 18:41:15 -0700
Subject: [PATCH] fix: surface real upstream error messages from orchestrator
 HTTP client

`post_and_extract_content` was unconditionally deserializing the upstream
response body as a `ChatCompletionsResponse`, which meant 4xx/5xx error
bodies (OpenAI-style `{"error": {...}}` envelopes) failed with confusing
messages like `missing field 'id' at line 1 column 391`. The real
upstream message (e.g. "This model's maximum context length is 32768
tokens...") only appeared once as a warn log and then got buried in the
generic "Failed to parse JSON response" path.

Now we:
- Check the HTTP status before attempting to parse the success body.
- On non-2xx, extract a human-readable message from the OpenAI-style
  error envelope (or fall back to a UTF-8-safe truncated raw body).
- Return a dedicated `HttpError::Upstream { status, message }` variant
  so callers can log / surface / retry based on the real status code.
- Truncate raw bodies in warn logs to 512 bytes (UTF-8-safe) to avoid
  flooding logs with oversized JSON or HTML error pages.
---
 crates/brightstaff/src/router/http.rs | 133 +++++++++++++++++++++++++-
 1 file changed, 129 insertions(+), 4 deletions(-)
diff --git a/crates/brightstaff/src/router/http.rs b/crates/brightstaff/src/router/http.rs
index ad1b711c..e1f2be1e 100644
--- a/crates/brightstaff/src/router/http.rs
+++ b/crates/brightstaff/src/router/http.rs
@@ -1,8 +1,14 @@
 use hermesllm::apis::openai::ChatCompletionsResponse;
 use hyper::header;
+use serde::Deserialize;
 use thiserror::Error;
 use tracing::warn;
 
+/// Max bytes of raw upstream body we include in a log message or error text
+/// when the body is not a recognizable error envelope. Keeps logs from being
+/// flooded by huge HTML error pages.
+const RAW_BODY_LOG_LIMIT: usize = 512;
+
 #[derive(Debug, Error)]
 pub enum HttpError {
     #[error("Failed to send request: {0}")]
@@ -10,13 +16,64 @@ pub enum HttpError {
 
     #[error("Failed to parse JSON response: {0}")]
     Json(serde_json::Error, String),
+
+    #[error("Upstream returned {status}: {message}")]
+    Upstream { status: u16, message: String },
+}
+
+/// Shape of an OpenAI-style error response body, e.g.
+/// `{"error": {"message": "...", "type": "...", "param": "...", "code": ...}}`.
+#[derive(Debug, Deserialize)]
+struct UpstreamErrorEnvelope {
+    error: UpstreamErrorBody,
+}
+
+#[derive(Debug, Deserialize)]
+struct UpstreamErrorBody {
+    message: String,
+    #[serde(default, rename = "type")]
+    err_type: Option<String>,
+    #[serde(default)]
+    param: Option<String>,
+}
+
+/// Extract a human-readable error message from an upstream response body.
+/// Tries to parse an OpenAI-style `{"error": {"message": ...}}` envelope; if
+/// that fails, falls back to the first `RAW_BODY_LOG_LIMIT` bytes of the raw
+/// body (UTF-8 safe).
+fn extract_upstream_error_message(body: &str) -> String {
+    if let Ok(env) = serde_json::from_str::<UpstreamErrorEnvelope>(body) {
+        let mut msg = env.error.message;
+        if let Some(param) = env.error.param {
+            msg.push_str(&format!(" (param={param})"));
+        }
+        if let Some(err_type) = env.error.err_type {
+            msg.push_str(&format!(" [type={err_type}]"));
+        }
+        return msg;
+    }
+    truncate_for_log(body).to_string()
+}
+
+fn truncate_for_log(s: &str) -> &str {
+    if s.len() <= RAW_BODY_LOG_LIMIT {
+        return s;
+    }
+    let mut end = RAW_BODY_LOG_LIMIT;
+    while end > 0 && !s.is_char_boundary(end) {
+        end -= 1;
+    }
+    &s[..end]
 }
 
 /// Sends a POST request to the given URL and extracts the text content
 /// from the first choice of the `ChatCompletionsResponse`.
 ///
-/// Returns `Some((content, elapsed))` on success, or `None` if the response
-/// had no choices or the first choice had no content.
+/// Returns `Some((content, elapsed))` on success, `None` if the response
+/// had no choices or the first choice had no content. Returns
+/// `HttpError::Upstream` for any non-2xx status, carrying a message
+/// extracted from the OpenAI-style error envelope (or a truncated raw body
+/// if the body is not in that shape).
 pub async fn post_and_extract_content(
     client: &reqwest::Client,
     url: &str,
@@ -26,17 +83,36 @@ pub async fn post_and_extract_content(
     let start_time = std::time::Instant::now();
 
     let res = client.post(url).headers(headers).body(body).send().await?;
+    let status = res.status();
 
     let body = res.text().await?;
     let elapsed = start_time.elapsed();
 
+    if !status.is_success() {
+        let message = extract_upstream_error_message(&body);
+        warn!(
+            status = status.as_u16(),
+            message = %message,
+            body_size = body.len(),
+            "upstream returned error response"
+        );
+        return Err(HttpError::Upstream {
+            status: status.as_u16(),
+            message,
+        });
+    }
+
     let response: ChatCompletionsResponse = serde_json::from_str(&body).map_err(|err| {
-        warn!(error = %err, body = %body, "failed to parse json response");
+        warn!(
+            error = %err,
+            body = %truncate_for_log(&body),
+            "failed to parse json response",
+        );
         HttpError::Json(err, format!("Failed to parse JSON: {}", body))
     })?;
 
     if response.choices.is_empty() {
-        warn!(body = %body, "no choices in response");
+        warn!(body = %truncate_for_log(&body), "no choices in response");
         return Ok(None);
     }
 
@@ -46,3 +122,52 @@ pub async fn post_and_extract_content(
         .as_ref()
         .map(|c| (c.clone(), elapsed)))
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn extracts_message_from_openai_style_error_envelope() {
+        let body = r#"{"error":{"code":400,"message":"This model's maximum context length is 32768 tokens. However, you requested 0 output tokens and your prompt contains at least 32769 input tokens, for a total of at least 32769 tokens.","param":"input_tokens","type":"BadRequestError"}}"#;
+        let msg = extract_upstream_error_message(body);
+        assert!(
+            msg.starts_with("This model's maximum context length is 32768 tokens."),
+            "unexpected message: {msg}"
+        );
+        assert!(msg.contains("(param=input_tokens)"));
+        assert!(msg.contains("[type=BadRequestError]"));
+    }
+
+    #[test]
+    fn extracts_message_without_optional_fields() {
+        let body = r#"{"error":{"message":"something broke"}}"#;
+        let msg = extract_upstream_error_message(body);
+        assert_eq!(msg, "something broke");
+    }
+
+    #[test]
+    fn falls_back_to_raw_body_when_not_error_envelope() {
+        let body = "<html><body>502 Bad Gateway</body></html>";
+        let msg = extract_upstream_error_message(body);
+        assert_eq!(msg, body);
+    }
+
+    #[test]
+    fn truncates_non_envelope_bodies_in_logs() {
+        let body = "x".repeat(RAW_BODY_LOG_LIMIT * 3);
+        let msg = extract_upstream_error_message(&body);
+        assert_eq!(msg.len(), RAW_BODY_LOG_LIMIT);
+    }
+
+    #[test]
+    fn truncate_for_log_respects_utf8_boundaries() {
+        // 2-byte characters; picking a length that would split mid-char.
+        let body = "é".repeat(RAW_BODY_LOG_LIMIT);
+        let out = truncate_for_log(&body);
+        // Should be a valid &str (implicit — would panic if we returned
+        // a non-boundary slice) and at most RAW_BODY_LOG_LIMIT bytes.
+        assert!(out.len() <= RAW_BODY_LOG_LIMIT);
+        assert!(out.chars().all(|c| c == 'é'));
+    }
+}