mirror of
https://github.com/katanemo/plano.git
synced 2026-06-02 14:35:14 +02:00
fix: surface real upstream error messages from orchestrator HTTP client
`post_and_extract_content` was unconditionally deserializing the upstream
response body as a `ChatCompletionsResponse`, which meant 4xx/5xx error
bodies (OpenAI-style `{"error": {...}}` envelopes) failed with confusing
messages like `missing field 'id' at line 1 column 391`. The real
upstream message (e.g. "This model's maximum context length is 32768
tokens...") only appeared once as a warn log and then got buried in the
generic "Failed to parse JSON response" path.
Now we:
- Check the HTTP status before attempting to parse the success body.
- On non-2xx, extract a human-readable message from the OpenAI-style
error envelope (or fall back to a UTF-8-safe truncated raw body).
- Return a dedicated `HttpError::Upstream { status, message }` variant
so callers can log / surface / retry based on the real status code.
- Truncate raw bodies in warn logs to 512 bytes (UTF-8-safe) to avoid
flooding logs with oversized JSON or HTML error pages.
This commit is contained in:
parent
321c28da37
commit
c90b699c90
1 changed files with 129 additions and 4 deletions
|
|
@ -1,8 +1,14 @@
|
||||||
use hermesllm::apis::openai::ChatCompletionsResponse;
|
use hermesllm::apis::openai::ChatCompletionsResponse;
|
||||||
use hyper::header;
|
use hyper::header;
|
||||||
|
use serde::Deserialize;
|
||||||
use thiserror::Error;
|
use thiserror::Error;
|
||||||
use tracing::warn;
|
use tracing::warn;
|
||||||
|
|
||||||
|
/// Max bytes of raw upstream body we include in a log message or error text
|
||||||
|
/// when the body is not a recognizable error envelope. Keeps logs from being
|
||||||
|
/// flooded by huge HTML error pages.
|
||||||
|
const RAW_BODY_LOG_LIMIT: usize = 512;
|
||||||
|
|
||||||
#[derive(Debug, Error)]
|
#[derive(Debug, Error)]
|
||||||
pub enum HttpError {
|
pub enum HttpError {
|
||||||
#[error("Failed to send request: {0}")]
|
#[error("Failed to send request: {0}")]
|
||||||
|
|
@ -10,13 +16,64 @@ pub enum HttpError {
|
||||||
|
|
||||||
#[error("Failed to parse JSON response: {0}")]
|
#[error("Failed to parse JSON response: {0}")]
|
||||||
Json(serde_json::Error, String),
|
Json(serde_json::Error, String),
|
||||||
|
|
||||||
|
#[error("Upstream returned {status}: {message}")]
|
||||||
|
Upstream { status: u16, message: String },
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Shape of an OpenAI-style error response body, e.g.
|
||||||
|
/// `{"error": {"message": "...", "type": "...", "param": "...", "code": ...}}`.
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
|
struct UpstreamErrorEnvelope {
|
||||||
|
error: UpstreamErrorBody,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
|
struct UpstreamErrorBody {
|
||||||
|
message: String,
|
||||||
|
#[serde(default, rename = "type")]
|
||||||
|
err_type: Option<String>,
|
||||||
|
#[serde(default)]
|
||||||
|
param: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Extract a human-readable error message from an upstream response body.
|
||||||
|
/// Tries to parse an OpenAI-style `{"error": {"message": ...}}` envelope; if
|
||||||
|
/// that fails, falls back to the first `RAW_BODY_LOG_LIMIT` bytes of the raw
|
||||||
|
/// body (UTF-8 safe).
|
||||||
|
fn extract_upstream_error_message(body: &str) -> String {
|
||||||
|
if let Ok(env) = serde_json::from_str::<UpstreamErrorEnvelope>(body) {
|
||||||
|
let mut msg = env.error.message;
|
||||||
|
if let Some(param) = env.error.param {
|
||||||
|
msg.push_str(&format!(" (param={param})"));
|
||||||
|
}
|
||||||
|
if let Some(err_type) = env.error.err_type {
|
||||||
|
msg.push_str(&format!(" [type={err_type}]"));
|
||||||
|
}
|
||||||
|
return msg;
|
||||||
|
}
|
||||||
|
truncate_for_log(body).to_string()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn truncate_for_log(s: &str) -> &str {
|
||||||
|
if s.len() <= RAW_BODY_LOG_LIMIT {
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
let mut end = RAW_BODY_LOG_LIMIT;
|
||||||
|
while end > 0 && !s.is_char_boundary(end) {
|
||||||
|
end -= 1;
|
||||||
|
}
|
||||||
|
&s[..end]
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Sends a POST request to the given URL and extracts the text content
|
/// Sends a POST request to the given URL and extracts the text content
|
||||||
/// from the first choice of the `ChatCompletionsResponse`.
|
/// from the first choice of the `ChatCompletionsResponse`.
|
||||||
///
|
///
|
||||||
/// Returns `Some((content, elapsed))` on success, or `None` if the response
|
/// Returns `Some((content, elapsed))` on success, `None` if the response
|
||||||
/// had no choices or the first choice had no content.
|
/// had no choices or the first choice had no content. Returns
|
||||||
|
/// `HttpError::Upstream` for any non-2xx status, carrying a message
|
||||||
|
/// extracted from the OpenAI-style error envelope (or a truncated raw body
|
||||||
|
/// if the body is not in that shape).
|
||||||
pub async fn post_and_extract_content(
|
pub async fn post_and_extract_content(
|
||||||
client: &reqwest::Client,
|
client: &reqwest::Client,
|
||||||
url: &str,
|
url: &str,
|
||||||
|
|
@ -26,17 +83,36 @@ pub async fn post_and_extract_content(
|
||||||
let start_time = std::time::Instant::now();
|
let start_time = std::time::Instant::now();
|
||||||
|
|
||||||
let res = client.post(url).headers(headers).body(body).send().await?;
|
let res = client.post(url).headers(headers).body(body).send().await?;
|
||||||
|
let status = res.status();
|
||||||
|
|
||||||
let body = res.text().await?;
|
let body = res.text().await?;
|
||||||
let elapsed = start_time.elapsed();
|
let elapsed = start_time.elapsed();
|
||||||
|
|
||||||
|
if !status.is_success() {
|
||||||
|
let message = extract_upstream_error_message(&body);
|
||||||
|
warn!(
|
||||||
|
status = status.as_u16(),
|
||||||
|
message = %message,
|
||||||
|
body_size = body.len(),
|
||||||
|
"upstream returned error response"
|
||||||
|
);
|
||||||
|
return Err(HttpError::Upstream {
|
||||||
|
status: status.as_u16(),
|
||||||
|
message,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
let response: ChatCompletionsResponse = serde_json::from_str(&body).map_err(|err| {
|
let response: ChatCompletionsResponse = serde_json::from_str(&body).map_err(|err| {
|
||||||
warn!(error = %err, body = %body, "failed to parse json response");
|
warn!(
|
||||||
|
error = %err,
|
||||||
|
body = %truncate_for_log(&body),
|
||||||
|
"failed to parse json response",
|
||||||
|
);
|
||||||
HttpError::Json(err, format!("Failed to parse JSON: {}", body))
|
HttpError::Json(err, format!("Failed to parse JSON: {}", body))
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
if response.choices.is_empty() {
|
if response.choices.is_empty() {
|
||||||
warn!(body = %body, "no choices in response");
|
warn!(body = %truncate_for_log(&body), "no choices in response");
|
||||||
return Ok(None);
|
return Ok(None);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -46,3 +122,52 @@ pub async fn post_and_extract_content(
|
||||||
.as_ref()
|
.as_ref()
|
||||||
.map(|c| (c.clone(), elapsed)))
|
.map(|c| (c.clone(), elapsed)))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn extracts_message_from_openai_style_error_envelope() {
|
||||||
|
let body = r#"{"error":{"code":400,"message":"This model's maximum context length is 32768 tokens. However, you requested 0 output tokens and your prompt contains at least 32769 input tokens, for a total of at least 32769 tokens.","param":"input_tokens","type":"BadRequestError"}}"#;
|
||||||
|
let msg = extract_upstream_error_message(body);
|
||||||
|
assert!(
|
||||||
|
msg.starts_with("This model's maximum context length is 32768 tokens."),
|
||||||
|
"unexpected message: {msg}"
|
||||||
|
);
|
||||||
|
assert!(msg.contains("(param=input_tokens)"));
|
||||||
|
assert!(msg.contains("[type=BadRequestError]"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn extracts_message_without_optional_fields() {
|
||||||
|
let body = r#"{"error":{"message":"something broke"}}"#;
|
||||||
|
let msg = extract_upstream_error_message(body);
|
||||||
|
assert_eq!(msg, "something broke");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn falls_back_to_raw_body_when_not_error_envelope() {
|
||||||
|
let body = "<html><body>502 Bad Gateway</body></html>";
|
||||||
|
let msg = extract_upstream_error_message(body);
|
||||||
|
assert_eq!(msg, body);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn truncates_non_envelope_bodies_in_logs() {
|
||||||
|
let body = "x".repeat(RAW_BODY_LOG_LIMIT * 3);
|
||||||
|
let msg = extract_upstream_error_message(&body);
|
||||||
|
assert_eq!(msg.len(), RAW_BODY_LOG_LIMIT);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn truncate_for_log_respects_utf8_boundaries() {
|
||||||
|
// 2-byte characters; picking a length that would split mid-char.
|
||||||
|
let body = "é".repeat(RAW_BODY_LOG_LIMIT);
|
||||||
|
let out = truncate_for_log(&body);
|
||||||
|
// Should be a valid &str (implicit — would panic if we returned
|
||||||
|
// a non-boundary slice) and at most RAW_BODY_LOG_LIMIT bytes.
|
||||||
|
assert!(out.len() <= RAW_BODY_LOG_LIMIT);
|
||||||
|
assert!(out.chars().all(|c| c == 'é'));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue