fix(router): harden routing response parsing

Make router HTTP parsing tolerant of chat-completions-like payloads and accept plain-text route outputs so model selection remains stable across affinity-pinned loops.
This commit is contained in:
Spherrrical 2026-04-09 10:12:19 -07:00
parent 8dedf0bec1
commit fbc247ab05
2 changed files with 65 additions and 7 deletions

View file

@ -1,5 +1,5 @@
use hermesllm::apis::openai::ChatCompletionsResponse;
use hyper::header;
use serde::Deserialize;
use thiserror::Error;
use tracing::warn;
@ -12,8 +12,23 @@ pub enum HttpError {
Json(serde_json::Error, String),
}
#[derive(Debug, Deserialize)]
struct RouterChatCompletionResponse {
choices: Vec<RouterChoice>,
}
#[derive(Debug, Deserialize)]
struct RouterChoice {
message: RouterMessage,
}
#[derive(Debug, Deserialize)]
struct RouterMessage {
content: Option<String>,
}
/// Sends a POST request to the given URL and extracts the text content
/// from the first choice of the `ChatCompletionsResponse`.
/// from the first choice of a chat-completions-like response.
///
/// Returns `Some((content, elapsed))` on success, or `None` if the response
/// had no choices or the first choice had no content.
@ -30,7 +45,7 @@ pub async fn post_and_extract_content(
let body = res.text().await?;
let elapsed = start_time.elapsed();
let response: ChatCompletionsResponse = serde_json::from_str(&body).map_err(|err| {
let response: RouterChatCompletionResponse = serde_json::from_str(&body).map_err(|err| {
warn!(error = %err, body = %body, "failed to parse json response");
HttpError::Json(err, format!("Failed to parse JSON: {}", body))
})?;

View file

@ -181,9 +181,7 @@ impl RouterModel for RouterModelV1 {
return Ok(None);
}
let router_resp_fixed = fix_json_response(content);
let router_response: LlmRouterResponse = serde_json::from_str(router_resp_fixed.as_str())?;
let selected_route = router_response.route.unwrap_or_default().to_string();
let selected_route = parse_selected_route(&router_resp_fixed)?;
if selected_route.is_empty() || selected_route == "other" {
return Ok(None);
@ -264,7 +262,7 @@ fn convert_to_router_preferences(
}
fn fix_json_response(body: &str) -> String {
let mut updated_body = body.to_string();
let mut updated_body = body.trim().to_string();
updated_body = updated_body.replace("'", "\"");
@ -286,9 +284,38 @@ fn fix_json_response(body: &str) -> String {
.to_string();
}
updated_body = updated_body.trim().to_string();
updated_body
}
fn parse_selected_route(content: &str) -> Result<String> {
if content.is_empty() {
return Ok(String::new());
}
// If output is JSON-shaped, preserve strict JSON parsing behavior.
let looks_like_json = content.starts_with('{')
|| content.starts_with('[')
|| content.starts_with('"')
|| content.starts_with("```");
if looks_like_json {
let router_response: LlmRouterResponse = serde_json::from_str(content)?;
return Ok(router_response.route.unwrap_or_default());
}
// Accept common plain-text formats from routing LLMs.
let raw = content.trim().trim_matches('"');
if let Some((key, value)) = raw.split_once(':') {
if key.trim().eq_ignore_ascii_case("route") {
return Ok(value.trim().to_string());
}
}
Ok(raw.to_string())
}
impl std::fmt::Debug for dyn RouterModel {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "RouterModel")
@ -838,5 +865,21 @@ Based on your analysis, provide your response in the following JSON formats if y
result,
Some(("Image generation".to_string(), "gpt-4o".to_string()))
);
// Case 8: Plain text route response
let input = "Image generation";
let result = router.parse_response(input, &None).unwrap();
assert_eq!(
result,
Some(("Image generation".to_string(), "gpt-4o".to_string()))
);
// Case 9: Plain text with route prefix
let input = "route: Image generation";
let result = router.parse_response(input, &None).unwrap();
assert_eq!(
result,
Some(("Image generation".to_string(), "gpt-4o".to_string()))
);
}
}