mirror of
https://github.com/katanemo/plano.git
synced 2026-07-02 15:51:02 +02:00
fix(hermesllm): preserve output_text for Responses API multi-turn (#978)
This commit is contained in:
parent
bb4008f737
commit
474b74aa18
3 changed files with 104 additions and 36 deletions
|
|
@ -183,9 +183,13 @@ pub enum MessageRole {
|
|||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(tag = "type", rename_all = "snake_case")]
|
||||
pub enum InputContent {
|
||||
/// Text input
|
||||
#[serde(rename = "input_text", alias = "text", alias = "output_text")]
|
||||
/// Text input (input-role message content)
|
||||
#[serde(rename = "input_text", alias = "text")]
|
||||
InputText { text: String },
|
||||
/// Text produced by the model in a prior turn. This must round-trip as
|
||||
/// `output_text` because the Responses API rejects `input_text` for
|
||||
/// output-role (assistant) message content.
|
||||
OutputText { text: String },
|
||||
/// Image input via URL
|
||||
InputImage {
|
||||
image_url: String,
|
||||
|
|
@ -1051,6 +1055,7 @@ pub struct ListInputItemsResponse {
|
|||
fn append_input_content_text(buffer: &mut String, content: &InputContent) {
|
||||
match content {
|
||||
InputContent::InputText { text } => buffer.push_str(text),
|
||||
InputContent::OutputText { text } => buffer.push_str(text),
|
||||
InputContent::InputImage { .. } => buffer.push_str("[Image]"),
|
||||
InputContent::InputFile { .. } => buffer.push_str("[File]"),
|
||||
InputContent::InputAudio { .. } => buffer.push_str("[Audio]"),
|
||||
|
|
@ -1642,6 +1647,62 @@ mod tests {
|
|||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_input_content_preserves_output_text_round_trip() {
|
||||
// Multi-turn request: a user turn carrying input_text and a prior
|
||||
// assistant turn carrying output_text. The Responses API rejects
|
||||
// input_text for output-role content, so the assistant turn must
|
||||
// survive a serialize round-trip as output_text (not be rewritten).
|
||||
let request = json!({
|
||||
"model": "gpt-5.3-codex",
|
||||
"input": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{ "type": "input_text", "text": "hello" }
|
||||
]
|
||||
},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": [
|
||||
{ "type": "output_text", "text": "hi there" }
|
||||
]
|
||||
}
|
||||
]
|
||||
});
|
||||
|
||||
let bytes = serde_json::to_vec(&request).unwrap();
|
||||
let parsed = ResponsesAPIRequest::try_from(bytes.as_slice()).unwrap();
|
||||
|
||||
let items = match &parsed.input {
|
||||
InputParam::Items(items) => items,
|
||||
_ => panic!("expected array input"),
|
||||
};
|
||||
assert_eq!(items.len(), 2);
|
||||
|
||||
// Assistant output_text must deserialize into the OutputText variant.
|
||||
let assistant = items
|
||||
.iter()
|
||||
.find_map(|item| match item {
|
||||
InputItem::Message(msg) if matches!(msg.role, MessageRole::Assistant) => Some(msg),
|
||||
_ => None,
|
||||
})
|
||||
.expect("assistant message present");
|
||||
match &assistant.content {
|
||||
MessageContent::Items(contents) => {
|
||||
assert!(matches!(contents[0], InputContent::OutputText { .. }));
|
||||
}
|
||||
_ => panic!("expected array content"),
|
||||
}
|
||||
|
||||
// Round-trip serialize and assert the type tags are preserved:
|
||||
// user content stays input_text, assistant content stays output_text.
|
||||
let serialized = serde_json::to_value(&parsed).unwrap();
|
||||
let input = &serialized["input"];
|
||||
assert_eq!(input[0]["content"][0]["type"], "input_text");
|
||||
assert_eq!(input[1]["content"][0]["type"], "output_text");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_request_deserializes_text_config_without_format() {
|
||||
let request = json!({
|
||||
|
|
|
|||
|
|
@ -112,33 +112,37 @@ impl TryFrom<ResponsesInputConverter> for Vec<Message> {
|
|||
) => {
|
||||
// Check if it's a single text item (can use simple text format)
|
||||
if content_items.len() == 1 {
|
||||
if let InputContent::InputText { text } = &content_items[0]
|
||||
{
|
||||
MessageContent::Text(text.clone())
|
||||
} else {
|
||||
// Single non-text item - use parts format
|
||||
MessageContent::Parts(
|
||||
content_items
|
||||
.iter()
|
||||
.filter_map(|c| match c {
|
||||
InputContent::InputText { text } => {
|
||||
Some(crate::apis::openai::ContentPart::Text {
|
||||
text: text.clone(),
|
||||
})
|
||||
}
|
||||
InputContent::InputImage { image_url, .. } => {
|
||||
Some(crate::apis::openai::ContentPart::ImageUrl {
|
||||
image_url: crate::apis::openai::ImageUrl {
|
||||
url: image_url.clone(),
|
||||
detail: None,
|
||||
},
|
||||
})
|
||||
}
|
||||
InputContent::InputFile { .. } => None, // Skip files for now
|
||||
InputContent::InputAudio { .. } => None, // Skip audio for now
|
||||
})
|
||||
.collect(),
|
||||
)
|
||||
match &content_items[0] {
|
||||
InputContent::InputText { text }
|
||||
| InputContent::OutputText { text } => {
|
||||
MessageContent::Text(text.clone())
|
||||
}
|
||||
_ => {
|
||||
// Single non-text item - use parts format
|
||||
MessageContent::Parts(
|
||||
content_items
|
||||
.iter()
|
||||
.filter_map(|c| match c {
|
||||
InputContent::InputText { text }
|
||||
| InputContent::OutputText { text } => {
|
||||
Some(crate::apis::openai::ContentPart::Text {
|
||||
text: text.clone(),
|
||||
})
|
||||
}
|
||||
InputContent::InputImage { image_url, .. } => {
|
||||
Some(crate::apis::openai::ContentPart::ImageUrl {
|
||||
image_url: crate::apis::openai::ImageUrl {
|
||||
url: image_url.clone(),
|
||||
detail: None,
|
||||
},
|
||||
})
|
||||
}
|
||||
InputContent::InputFile { .. } => None, // Skip files for now
|
||||
InputContent::InputAudio { .. } => None, // Skip audio for now
|
||||
})
|
||||
.collect(),
|
||||
)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Multiple content items - convert to parts
|
||||
|
|
@ -146,7 +150,8 @@ impl TryFrom<ResponsesInputConverter> for Vec<Message> {
|
|||
content_items
|
||||
.iter()
|
||||
.filter_map(|c| match c {
|
||||
InputContent::InputText { text } => {
|
||||
InputContent::InputText { text }
|
||||
| InputContent::OutputText { text } => {
|
||||
Some(crate::apis::openai::ContentPart::Text {
|
||||
text: text.clone(),
|
||||
})
|
||||
|
|
|
|||
|
|
@ -18,7 +18,9 @@ pub fn convert_responses_output_to_input_items(output: &OutputItem) -> Option<In
|
|||
.iter()
|
||||
.filter_map(|c| match c {
|
||||
OutputContent::OutputText { text, .. } => {
|
||||
Some(InputContent::InputText { text: text.clone() })
|
||||
// Assistant (output-role) content must round-trip as
|
||||
// output_text; the Responses API rejects input_text here.
|
||||
Some(InputContent::OutputText { text: text.clone() })
|
||||
}
|
||||
OutputContent::OutputAudio { data, .. } => Some(InputContent::InputAudio {
|
||||
data: data.clone(),
|
||||
|
|
@ -59,7 +61,7 @@ pub fn convert_responses_output_to_input_items(output: &OutputItem) -> Option<In
|
|||
|
||||
Some(InputItem::Message(InputMessage {
|
||||
role: MessageRole::Assistant,
|
||||
content: MessageContent::Items(vec![InputContent::InputText {
|
||||
content: MessageContent::Items(vec![InputContent::OutputText {
|
||||
text: tool_call_text,
|
||||
}]),
|
||||
}))
|
||||
|
|
@ -104,8 +106,8 @@ mod tests {
|
|||
MessageContent::Items(items) => {
|
||||
assert_eq!(items.len(), 1);
|
||||
match &items[0] {
|
||||
InputContent::InputText { text } => assert_eq!(text, "Hello!"),
|
||||
_ => panic!("Expected InputText"),
|
||||
InputContent::OutputText { text } => assert_eq!(text, "Hello!"),
|
||||
_ => panic!("Expected OutputText"),
|
||||
}
|
||||
}
|
||||
_ => panic!("Expected MessageContent::Items"),
|
||||
|
|
@ -132,10 +134,10 @@ mod tests {
|
|||
assert!(matches!(msg.role, MessageRole::Assistant));
|
||||
match &msg.content {
|
||||
MessageContent::Items(items) => match &items[0] {
|
||||
InputContent::InputText { text } => {
|
||||
InputContent::OutputText { text } => {
|
||||
assert!(text.contains("get_weather"));
|
||||
}
|
||||
_ => panic!("Expected InputText"),
|
||||
_ => panic!("Expected OutputText"),
|
||||
},
|
||||
_ => panic!("Expected MessageContent::Items"),
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue