fixed mixed inputs from openai v1/responses api (#632)

* fixed mixed inputs from openai v1/responses api

* removing tracing from model-alias-rouing

* handling additional input types from openairs

---------

Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-342.local>
This commit is contained in:
Salman Paracha 2025-12-16 13:39:13 -08:00 committed by GitHub
parent a79f55f313
commit 33e90dd338
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 135 additions and 51 deletions

View file

@ -113,16 +113,29 @@ pub struct ResponsesAPIRequest {
pub enum InputParam {
/// Simple text input
Text(String),
/// Array of input items
/// Array of input items (messages, references, outputs, etc.)
Items(Vec<InputItem>),
}
/// Input item discriminated by type
/// Input item - can be a message, item reference, function call output, etc.
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "type", rename_all = "snake_case")]
#[serde(untagged)]
pub enum InputItem {
/// Input message
/// Input message (role + content)
Message(InputMessage),
/// Item reference
ItemReference {
#[serde(rename = "type")]
item_type: String,
id: String,
},
/// Function call output
FunctionCallOutput {
#[serde(rename = "type")]
item_type: String,
call_id: String,
output: String,
},
}
/// Input message with role and content
@ -130,8 +143,18 @@ pub enum InputItem {
pub struct InputMessage {
/// Message role
pub role: MessageRole,
/// Message content
pub content: Vec<InputContent>,
/// Message content - can be a string or array of InputContent
pub content: MessageContent,
}
/// Message content - can be either a simple string or array of content items
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(untagged)]
pub enum MessageContent {
/// Simple text content
Text(String),
/// Array of content items
Items(Vec<InputContent>),
}
/// Message roles
@ -1025,16 +1048,23 @@ impl ProviderRequest for ResponsesAPIRequest {
items.iter().fold(String::new(), |acc, item| {
match item {
InputItem::Message(msg) => {
let content_text = msg.content.iter().fold(String::new(), |acc, content| {
acc + " " + &match content {
InputContent::InputText { text } => text.clone(),
InputContent::InputImage { .. } => "[Image]".to_string(),
InputContent::InputFile { .. } => "[File]".to_string(),
InputContent::InputAudio { .. } => "[Audio]".to_string(),
let content_text = match &msg.content {
MessageContent::Text(text) => text.clone(),
MessageContent::Items(content_items) => {
content_items.iter().fold(String::new(), |acc, content| {
acc + " " + &match content {
InputContent::InputText { text } => text.clone(),
InputContent::InputImage { .. } => "[Image]".to_string(),
InputContent::InputFile { .. } => "[File]".to_string(),
InputContent::InputAudio { .. } => "[Audio]".to_string(),
}
})
}
});
};
acc + " " + &content_text
}
// Skip non-message items (references, outputs, etc.)
_ => acc,
}
})
}
@ -1048,14 +1078,20 @@ impl ProviderRequest for ResponsesAPIRequest {
items.iter().rev().find_map(|item| {
match item {
InputItem::Message(msg) if matches!(msg.role, MessageRole::User) => {
// Extract text from the first text content
msg.content.iter().find_map(|content| {
match content {
InputContent::InputText { text } => Some(text.clone()),
_ => None,
// Extract text from content
match &msg.content {
MessageContent::Text(text) => Some(text.clone()),
MessageContent::Items(content_items) => {
content_items.iter().find_map(|content| {
match content {
InputContent::InputText { text } => Some(text.clone()),
_ => None,
}
})
}
})
}
}
// Skip non-message items
_ => None,
}
})

View file

@ -291,15 +291,43 @@ impl TryFrom<ResponsesAPIRequest> for ChatCompletionsRequest {
MessageRole::Developer => Role::System, // Map developer to system
};
// Convert content blocks
let content = if input_msg.content.len() == 1 {
// Single content item - check if it's simple text
match &input_msg.content[0] {
InputContent::InputText { text } => MessageContent::Text(text.clone()),
_ => {
// Convert to parts for non-text content
// Convert content based on MessageContent type
let content = match &input_msg.content {
crate::apis::openai_responses::MessageContent::Text(text) => {
// Simple text content
MessageContent::Text(text.clone())
}
crate::apis::openai_responses::MessageContent::Items(content_items) => {
// Check if it's a single text item (can use simple text format)
if content_items.len() == 1 {
if let InputContent::InputText { text } = &content_items[0] {
MessageContent::Text(text.clone())
} else {
// Single non-text item - use parts format
MessageContent::Parts(
content_items.iter()
.filter_map(|c| match c {
InputContent::InputText { text } => {
Some(crate::apis::openai::ContentPart::Text { text: text.clone() })
}
InputContent::InputImage { image_url, .. } => {
Some(crate::apis::openai::ContentPart::ImageUrl {
image_url: crate::apis::openai::ImageUrl {
url: image_url.clone(),
detail: None,
}
})
}
InputContent::InputFile { .. } => None, // Skip files for now
InputContent::InputAudio { .. } => None, // Skip audio for now
})
.collect()
)
}
} else {
// Multiple content items - convert to parts
MessageContent::Parts(
input_msg.content.iter()
content_items.iter()
.filter_map(|c| match c {
InputContent::InputText { text } => {
Some(crate::apis::openai::ContentPart::Text { text: text.clone() })
@ -319,27 +347,6 @@ impl TryFrom<ResponsesAPIRequest> for ChatCompletionsRequest {
)
}
}
} else {
// Multiple content items - convert to parts
MessageContent::Parts(
input_msg.content.iter()
.filter_map(|c| match c {
InputContent::InputText { text } => {
Some(crate::apis::openai::ContentPart::Text { text: text.clone() })
}
InputContent::InputImage { image_url, .. } => {
Some(crate::apis::openai::ContentPart::ImageUrl {
image_url: crate::apis::openai::ImageUrl {
url: image_url.clone(),
detail: None,
}
})
}
InputContent::InputFile { .. } => None, // Skip files for now
InputContent::InputAudio { .. } => None, // Skip audio for now
})
.collect()
)
};
converted_messages.push(Message {
@ -350,6 +357,9 @@ impl TryFrom<ResponsesAPIRequest> for ChatCompletionsRequest {
tool_calls: None,
});
}
// Skip non-message items (references, outputs) for now
// These would need special handling in chat completions format
_ => {}
}
}

View file

@ -89,6 +89,3 @@ model_aliases:
# Alias for grok testing
arch.grok.v1:
target: grok-4-0709
tracing:
random_sampling: 100

View file

@ -628,3 +628,44 @@ def test_openai_responses_api_streaming_with_tools_upstream_anthropic():
assert (
full_text or tool_calls
), "Expected streamed text or tool call argument deltas from Responses tools stream"
def test_openai_responses_api_mixed_content_types():
"""Test Responses API with mixed content types (string and array) in input messages"""
base_url = LLM_GATEWAY_ENDPOINT.replace("/v1/chat/completions", "")
client = openai.OpenAI(api_key="test-key", base_url=f"{base_url}/v1")
# This test mimics the request that was failing:
# One message with string content, another with array content
resp = client.responses.create(
model="arch.title.v1",
input=[
{
"role": "developer",
"content": "Generate a very short chat title (2-5 words max) based on the user's message.\n"
"Rules:\n"
"- Maximum 30 characters\n"
"- No quotes, colons, hashtags, or markdown\n"
"- Just the topic/intent, not a full sentence\n"
'- If the message is a greeting like "hi" or "hello", respond with just "New conversation"\n'
'- Be concise: "Weather in NYC" not "User asking about the weather in New York City"',
},
{
"role": "user",
"content": [
{"type": "input_text", "text": "What is the weather in Seattle"}
],
},
],
)
# Print the response
print(f"\n{'='*80}")
print(f"Model: {resp.model}")
print(f"Output: {resp.output_text}")
print(f"{'='*80}\n")
assert resp is not None
assert resp.id is not None
# Verify we got a reasonable title
assert len(resp.output_text) > 0