fix(hermesllm): preserve output_text for Responses API multi-turn (#978)

This commit is contained in:
Musa 2026-06-29 13:58:35 -07:00 committed by GitHub
parent bb4008f737
commit 474b74aa18
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 104 additions and 36 deletions

View file

@ -183,9 +183,13 @@ pub enum MessageRole {
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum InputContent {
/// Text input
#[serde(rename = "input_text", alias = "text", alias = "output_text")]
/// Text input (input-role message content)
#[serde(rename = "input_text", alias = "text")]
InputText { text: String },
/// Text produced by the model in a prior turn. This must round-trip as
/// `output_text` because the Responses API rejects `input_text` for
/// output-role (assistant) message content.
OutputText { text: String },
/// Image input via URL
InputImage {
image_url: String,
@ -1051,6 +1055,7 @@ pub struct ListInputItemsResponse {
fn append_input_content_text(buffer: &mut String, content: &InputContent) {
match content {
InputContent::InputText { text } => buffer.push_str(text),
InputContent::OutputText { text } => buffer.push_str(text),
InputContent::InputImage { .. } => buffer.push_str("[Image]"),
InputContent::InputFile { .. } => buffer.push_str("[File]"),
InputContent::InputAudio { .. } => buffer.push_str("[Audio]"),
@ -1642,6 +1647,62 @@ mod tests {
}
}
#[test]
fn test_input_content_preserves_output_text_round_trip() {
// Multi-turn request: a user turn carrying input_text and a prior
// assistant turn carrying output_text. The Responses API rejects
// input_text for output-role content, so the assistant turn must
// survive a serialize round-trip as output_text (not be rewritten).
let request = json!({
"model": "gpt-5.3-codex",
"input": [
{
"role": "user",
"content": [
{ "type": "input_text", "text": "hello" }
]
},
{
"role": "assistant",
"content": [
{ "type": "output_text", "text": "hi there" }
]
}
]
});
let bytes = serde_json::to_vec(&request).unwrap();
let parsed = ResponsesAPIRequest::try_from(bytes.as_slice()).unwrap();
let items = match &parsed.input {
InputParam::Items(items) => items,
_ => panic!("expected array input"),
};
assert_eq!(items.len(), 2);
// Assistant output_text must deserialize into the OutputText variant.
let assistant = items
.iter()
.find_map(|item| match item {
InputItem::Message(msg) if matches!(msg.role, MessageRole::Assistant) => Some(msg),
_ => None,
})
.expect("assistant message present");
match &assistant.content {
MessageContent::Items(contents) => {
assert!(matches!(contents[0], InputContent::OutputText { .. }));
}
_ => panic!("expected array content"),
}
// Round-trip serialize and assert the type tags are preserved:
// user content stays input_text, assistant content stays output_text.
let serialized = serde_json::to_value(&parsed).unwrap();
let input = &serialized["input"];
assert_eq!(input[0]["content"][0]["type"], "input_text");
assert_eq!(input[1]["content"][0]["type"], "output_text");
}
#[test]
fn test_request_deserializes_text_config_without_format() {
let request = json!({

View file

@ -112,33 +112,37 @@ impl TryFrom<ResponsesInputConverter> for Vec<Message> {
) => {
// Check if it's a single text item (can use simple text format)
if content_items.len() == 1 {
if let InputContent::InputText { text } = &content_items[0]
{
MessageContent::Text(text.clone())
} else {
// Single non-text item - use parts format
MessageContent::Parts(
content_items
.iter()
.filter_map(|c| match c {
InputContent::InputText { text } => {
Some(crate::apis::openai::ContentPart::Text {
text: text.clone(),
})
}
InputContent::InputImage { image_url, .. } => {
Some(crate::apis::openai::ContentPart::ImageUrl {
image_url: crate::apis::openai::ImageUrl {
url: image_url.clone(),
detail: None,
},
})
}
InputContent::InputFile { .. } => None, // Skip files for now
InputContent::InputAudio { .. } => None, // Skip audio for now
})
.collect(),
)
match &content_items[0] {
InputContent::InputText { text }
| InputContent::OutputText { text } => {
MessageContent::Text(text.clone())
}
_ => {
// Single non-text item - use parts format
MessageContent::Parts(
content_items
.iter()
.filter_map(|c| match c {
InputContent::InputText { text }
| InputContent::OutputText { text } => {
Some(crate::apis::openai::ContentPart::Text {
text: text.clone(),
})
}
InputContent::InputImage { image_url, .. } => {
Some(crate::apis::openai::ContentPart::ImageUrl {
image_url: crate::apis::openai::ImageUrl {
url: image_url.clone(),
detail: None,
},
})
}
InputContent::InputFile { .. } => None, // Skip files for now
InputContent::InputAudio { .. } => None, // Skip audio for now
})
.collect(),
)
}
}
} else {
// Multiple content items - convert to parts
@ -146,7 +150,8 @@ impl TryFrom<ResponsesInputConverter> for Vec<Message> {
content_items
.iter()
.filter_map(|c| match c {
InputContent::InputText { text } => {
InputContent::InputText { text }
| InputContent::OutputText { text } => {
Some(crate::apis::openai::ContentPart::Text {
text: text.clone(),
})

View file

@ -18,7 +18,9 @@ pub fn convert_responses_output_to_input_items(output: &OutputItem) -> Option<In
.iter()
.filter_map(|c| match c {
OutputContent::OutputText { text, .. } => {
Some(InputContent::InputText { text: text.clone() })
// Assistant (output-role) content must round-trip as
// output_text; the Responses API rejects input_text here.
Some(InputContent::OutputText { text: text.clone() })
}
OutputContent::OutputAudio { data, .. } => Some(InputContent::InputAudio {
data: data.clone(),
@ -59,7 +61,7 @@ pub fn convert_responses_output_to_input_items(output: &OutputItem) -> Option<In
Some(InputItem::Message(InputMessage {
role: MessageRole::Assistant,
content: MessageContent::Items(vec![InputContent::InputText {
content: MessageContent::Items(vec![InputContent::OutputText {
text: tool_call_text,
}]),
}))
@ -104,8 +106,8 @@ mod tests {
MessageContent::Items(items) => {
assert_eq!(items.len(), 1);
match &items[0] {
InputContent::InputText { text } => assert_eq!(text, "Hello!"),
_ => panic!("Expected InputText"),
InputContent::OutputText { text } => assert_eq!(text, "Hello!"),
_ => panic!("Expected OutputText"),
}
}
_ => panic!("Expected MessageContent::Items"),
@ -132,10 +134,10 @@ mod tests {
assert!(matches!(msg.role, MessageRole::Assistant));
match &msg.content {
MessageContent::Items(items) => match &items[0] {
InputContent::InputText { text } => {
InputContent::OutputText { text } => {
assert!(text.contains("get_weather"));
}
_ => panic!("Expected InputText"),
_ => panic!("Expected OutputText"),
},
_ => panic!("Expected MessageContent::Items"),
}