mirror of
https://github.com/katanemo/plano.git
synced 2026-06-08 14:55:14 +02:00
fixed mixed inputs from openai v1/responses api (#632)
* fixed mixed inputs from openai v1/responses api * removing tracing from model-alias-rouing * handling additional input types from openairs --------- Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-342.local>
This commit is contained in:
parent
a79f55f313
commit
33e90dd338
4 changed files with 135 additions and 51 deletions
|
|
@ -113,16 +113,29 @@ pub struct ResponsesAPIRequest {
|
|||
pub enum InputParam {
|
||||
/// Simple text input
|
||||
Text(String),
|
||||
/// Array of input items
|
||||
/// Array of input items (messages, references, outputs, etc.)
|
||||
Items(Vec<InputItem>),
|
||||
}
|
||||
|
||||
/// Input item discriminated by type
|
||||
/// Input item - can be a message, item reference, function call output, etc.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(tag = "type", rename_all = "snake_case")]
|
||||
#[serde(untagged)]
|
||||
pub enum InputItem {
|
||||
/// Input message
|
||||
/// Input message (role + content)
|
||||
Message(InputMessage),
|
||||
/// Item reference
|
||||
ItemReference {
|
||||
#[serde(rename = "type")]
|
||||
item_type: String,
|
||||
id: String,
|
||||
},
|
||||
/// Function call output
|
||||
FunctionCallOutput {
|
||||
#[serde(rename = "type")]
|
||||
item_type: String,
|
||||
call_id: String,
|
||||
output: String,
|
||||
},
|
||||
}
|
||||
|
||||
/// Input message with role and content
|
||||
|
|
@ -130,8 +143,18 @@ pub enum InputItem {
|
|||
pub struct InputMessage {
|
||||
/// Message role
|
||||
pub role: MessageRole,
|
||||
/// Message content
|
||||
pub content: Vec<InputContent>,
|
||||
/// Message content - can be a string or array of InputContent
|
||||
pub content: MessageContent,
|
||||
}
|
||||
|
||||
/// Message content - can be either a simple string or array of content items
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(untagged)]
|
||||
pub enum MessageContent {
|
||||
/// Simple text content
|
||||
Text(String),
|
||||
/// Array of content items
|
||||
Items(Vec<InputContent>),
|
||||
}
|
||||
|
||||
/// Message roles
|
||||
|
|
@ -1025,16 +1048,23 @@ impl ProviderRequest for ResponsesAPIRequest {
|
|||
items.iter().fold(String::new(), |acc, item| {
|
||||
match item {
|
||||
InputItem::Message(msg) => {
|
||||
let content_text = msg.content.iter().fold(String::new(), |acc, content| {
|
||||
acc + " " + &match content {
|
||||
InputContent::InputText { text } => text.clone(),
|
||||
InputContent::InputImage { .. } => "[Image]".to_string(),
|
||||
InputContent::InputFile { .. } => "[File]".to_string(),
|
||||
InputContent::InputAudio { .. } => "[Audio]".to_string(),
|
||||
let content_text = match &msg.content {
|
||||
MessageContent::Text(text) => text.clone(),
|
||||
MessageContent::Items(content_items) => {
|
||||
content_items.iter().fold(String::new(), |acc, content| {
|
||||
acc + " " + &match content {
|
||||
InputContent::InputText { text } => text.clone(),
|
||||
InputContent::InputImage { .. } => "[Image]".to_string(),
|
||||
InputContent::InputFile { .. } => "[File]".to_string(),
|
||||
InputContent::InputAudio { .. } => "[Audio]".to_string(),
|
||||
}
|
||||
})
|
||||
}
|
||||
});
|
||||
};
|
||||
acc + " " + &content_text
|
||||
}
|
||||
// Skip non-message items (references, outputs, etc.)
|
||||
_ => acc,
|
||||
}
|
||||
})
|
||||
}
|
||||
|
|
@ -1048,14 +1078,20 @@ impl ProviderRequest for ResponsesAPIRequest {
|
|||
items.iter().rev().find_map(|item| {
|
||||
match item {
|
||||
InputItem::Message(msg) if matches!(msg.role, MessageRole::User) => {
|
||||
// Extract text from the first text content
|
||||
msg.content.iter().find_map(|content| {
|
||||
match content {
|
||||
InputContent::InputText { text } => Some(text.clone()),
|
||||
_ => None,
|
||||
// Extract text from content
|
||||
match &msg.content {
|
||||
MessageContent::Text(text) => Some(text.clone()),
|
||||
MessageContent::Items(content_items) => {
|
||||
content_items.iter().find_map(|content| {
|
||||
match content {
|
||||
InputContent::InputText { text } => Some(text.clone()),
|
||||
_ => None,
|
||||
}
|
||||
})
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
// Skip non-message items
|
||||
_ => None,
|
||||
}
|
||||
})
|
||||
|
|
|
|||
|
|
@ -291,15 +291,43 @@ impl TryFrom<ResponsesAPIRequest> for ChatCompletionsRequest {
|
|||
MessageRole::Developer => Role::System, // Map developer to system
|
||||
};
|
||||
|
||||
// Convert content blocks
|
||||
let content = if input_msg.content.len() == 1 {
|
||||
// Single content item - check if it's simple text
|
||||
match &input_msg.content[0] {
|
||||
InputContent::InputText { text } => MessageContent::Text(text.clone()),
|
||||
_ => {
|
||||
// Convert to parts for non-text content
|
||||
// Convert content based on MessageContent type
|
||||
let content = match &input_msg.content {
|
||||
crate::apis::openai_responses::MessageContent::Text(text) => {
|
||||
// Simple text content
|
||||
MessageContent::Text(text.clone())
|
||||
}
|
||||
crate::apis::openai_responses::MessageContent::Items(content_items) => {
|
||||
// Check if it's a single text item (can use simple text format)
|
||||
if content_items.len() == 1 {
|
||||
if let InputContent::InputText { text } = &content_items[0] {
|
||||
MessageContent::Text(text.clone())
|
||||
} else {
|
||||
// Single non-text item - use parts format
|
||||
MessageContent::Parts(
|
||||
content_items.iter()
|
||||
.filter_map(|c| match c {
|
||||
InputContent::InputText { text } => {
|
||||
Some(crate::apis::openai::ContentPart::Text { text: text.clone() })
|
||||
}
|
||||
InputContent::InputImage { image_url, .. } => {
|
||||
Some(crate::apis::openai::ContentPart::ImageUrl {
|
||||
image_url: crate::apis::openai::ImageUrl {
|
||||
url: image_url.clone(),
|
||||
detail: None,
|
||||
}
|
||||
})
|
||||
}
|
||||
InputContent::InputFile { .. } => None, // Skip files for now
|
||||
InputContent::InputAudio { .. } => None, // Skip audio for now
|
||||
})
|
||||
.collect()
|
||||
)
|
||||
}
|
||||
} else {
|
||||
// Multiple content items - convert to parts
|
||||
MessageContent::Parts(
|
||||
input_msg.content.iter()
|
||||
content_items.iter()
|
||||
.filter_map(|c| match c {
|
||||
InputContent::InputText { text } => {
|
||||
Some(crate::apis::openai::ContentPart::Text { text: text.clone() })
|
||||
|
|
@ -319,27 +347,6 @@ impl TryFrom<ResponsesAPIRequest> for ChatCompletionsRequest {
|
|||
)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Multiple content items - convert to parts
|
||||
MessageContent::Parts(
|
||||
input_msg.content.iter()
|
||||
.filter_map(|c| match c {
|
||||
InputContent::InputText { text } => {
|
||||
Some(crate::apis::openai::ContentPart::Text { text: text.clone() })
|
||||
}
|
||||
InputContent::InputImage { image_url, .. } => {
|
||||
Some(crate::apis::openai::ContentPart::ImageUrl {
|
||||
image_url: crate::apis::openai::ImageUrl {
|
||||
url: image_url.clone(),
|
||||
detail: None,
|
||||
}
|
||||
})
|
||||
}
|
||||
InputContent::InputFile { .. } => None, // Skip files for now
|
||||
InputContent::InputAudio { .. } => None, // Skip audio for now
|
||||
})
|
||||
.collect()
|
||||
)
|
||||
};
|
||||
|
||||
converted_messages.push(Message {
|
||||
|
|
@ -350,6 +357,9 @@ impl TryFrom<ResponsesAPIRequest> for ChatCompletionsRequest {
|
|||
tool_calls: None,
|
||||
});
|
||||
}
|
||||
// Skip non-message items (references, outputs) for now
|
||||
// These would need special handling in chat completions format
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -89,6 +89,3 @@ model_aliases:
|
|||
# Alias for grok testing
|
||||
arch.grok.v1:
|
||||
target: grok-4-0709
|
||||
|
||||
tracing:
|
||||
random_sampling: 100
|
||||
|
|
|
|||
|
|
@ -628,3 +628,44 @@ def test_openai_responses_api_streaming_with_tools_upstream_anthropic():
|
|||
assert (
|
||||
full_text or tool_calls
|
||||
), "Expected streamed text or tool call argument deltas from Responses tools stream"
|
||||
|
||||
|
||||
def test_openai_responses_api_mixed_content_types():
|
||||
"""Test Responses API with mixed content types (string and array) in input messages"""
|
||||
base_url = LLM_GATEWAY_ENDPOINT.replace("/v1/chat/completions", "")
|
||||
client = openai.OpenAI(api_key="test-key", base_url=f"{base_url}/v1")
|
||||
|
||||
# This test mimics the request that was failing:
|
||||
# One message with string content, another with array content
|
||||
resp = client.responses.create(
|
||||
model="arch.title.v1",
|
||||
input=[
|
||||
{
|
||||
"role": "developer",
|
||||
"content": "Generate a very short chat title (2-5 words max) based on the user's message.\n"
|
||||
"Rules:\n"
|
||||
"- Maximum 30 characters\n"
|
||||
"- No quotes, colons, hashtags, or markdown\n"
|
||||
"- Just the topic/intent, not a full sentence\n"
|
||||
'- If the message is a greeting like "hi" or "hello", respond with just "New conversation"\n'
|
||||
'- Be concise: "Weather in NYC" not "User asking about the weather in New York City"',
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "input_text", "text": "What is the weather in Seattle"}
|
||||
],
|
||||
},
|
||||
],
|
||||
)
|
||||
|
||||
# Print the response
|
||||
print(f"\n{'='*80}")
|
||||
print(f"Model: {resp.model}")
|
||||
print(f"Output: {resp.output_text}")
|
||||
print(f"{'='*80}\n")
|
||||
|
||||
assert resp is not None
|
||||
assert resp.id is not None
|
||||
# Verify we got a reasonable title
|
||||
assert len(resp.output_text) > 0
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue