Implemented request adaptor for ChatGPT codex subscription endpoints which do not match the standard openai ones.

Made-with: Cursor
This commit is contained in:
Tom Stoffer 2026-04-15 20:32:17 +12:00
parent 843903c8bc
commit d8588521fa
8 changed files with 574 additions and 30 deletions

View file

@ -5,7 +5,7 @@ use common::llm_providers::LlmProviders;
use hermesllm::apis::openai::Message;
use hermesllm::apis::openai_responses::InputParam;
use hermesllm::clients::{SupportedAPIsFromClient, SupportedUpstreamAPIs};
use hermesllm::{ProviderRequest, ProviderRequestType};
use hermesllm::{serialize_for_upstream, ProviderRequest, ProviderRequestType};
use http_body_util::combinators::BoxBody;
use http_body_util::BodyExt;
use hyper::header::{self};
@ -283,16 +283,15 @@ async fn llm_chat_inner(
};
// Serialize request for upstream BEFORE router consumes it
let client_request_bytes_for_upstream: Bytes =
match ProviderRequestType::to_bytes(&client_request) {
Ok(bytes) => bytes.into(),
Err(err) => {
warn!(error = %err, "failed to serialize request for upstream");
let mut r = Response::new(full(format!("Failed to serialize request: {}", err)));
*r.status_mut() = StatusCode::INTERNAL_SERVER_ERROR;
return Ok(r);
}
};
let client_request_bytes_for_upstream: Bytes = match serialize_for_upstream(&client_request, provider_id) {
Ok(bytes) => bytes.into(),
Err(err) => {
warn!(error = %err, "failed to serialize request for upstream");
let mut r = Response::new(full(format!("Failed to serialize request: {}", err)));
*r.status_mut() = StatusCode::INTERNAL_SERVER_ERROR;
return Ok(r);
}
};
// --- Phase 3: Route the request (or use pinned model from session cache) ---
let resolved_model = if let Some(cached_model) = pinned_model {

View file

@ -280,16 +280,31 @@ pub struct ConversationParam {
pub id: Option<String>,
}
/// Tool definitions
/// Tool definitions.
///
/// Supports both the canonical OpenAI Responses flat tool shape:
/// { "type": "function", "name": "...", "description": "...", "parameters": {...} }
/// and the nested chat-completions-compatible shape:
/// { "type": "function", "function": { "name": "...", "description": "...", "parameters": {...} } }
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum Tool {
/// Function tool - flat structure in Responses API
/// Function tool — accepts both flat and nested `function` object shapes.
Function {
name: String,
/// Top-level name (flat shape).
name: Option<String>,
/// Top-level description (flat shape).
description: Option<String>,
/// Top-level parameters (flat shape).
parameters: Option<serde_json::Value>,
/// Top-level strict flag (flat shape).
strict: Option<bool>,
/// Nested `function` object (nested/compat shape).
///
/// When present, `name`/`description`/`parameters` from the outer level are
/// ignored in favour of the values inside this object.
#[serde(default, flatten)]
function: Option<FunctionDef>,
},
/// File search tool
FileSearch {
@ -321,6 +336,49 @@ pub enum Tool {
},
}
impl Tool {
pub fn name(&self) -> Option<&str> {
match self {
Tool::Function { name, function, .. } => {
function
.as_ref()
.and_then(|f| f.name.as_ref())
.map(|s| s.as_str())
.or_else(|| name.as_ref().map(|s| s.as_str()))
}
Tool::Custom { name, .. } => name.as_deref(),
_ => None,
}
}
pub fn description(&self) -> Option<&String> {
match self {
Tool::Function {
description,
function,
..
} => description
.as_ref()
.or_else(|| function.as_ref().and_then(|f| f.description.as_ref())),
Tool::Custom { description, .. } => description.as_ref(),
_ => None,
}
}
pub fn parameters(&self) -> Option<&serde_json::Value> {
match self {
Tool::Function {
parameters,
function,
..
} => parameters
.as_ref()
.or_else(|| function.as_ref().and_then(|f| f.parameters.as_ref())),
_ => None,
}
}
}
/// Ranking options for file search
#[skip_serializing_none]
#[derive(Debug, Clone, Serialize, Deserialize)]
@ -343,6 +401,16 @@ pub struct UserLocation {
pub timezone: Option<String>,
}
/// Inner function definition — used inside the nested `function` object.
#[skip_serializing_none]
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FunctionDef {
pub name: Option<String>,
pub description: Option<String>,
pub parameters: Option<serde_json::Value>,
pub strict: Option<bool>,
}
/// Tool choice options
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(untagged)]
@ -1158,7 +1226,10 @@ impl ProviderRequest for ResponsesAPIRequest {
tools
.iter()
.filter_map(|tool| match tool {
Tool::Function { name, .. } => Some(name.clone()),
Tool::Function { name, function, .. } => function
.as_ref()
.and_then(|f| f.name.clone())
.or_else(|| name.clone()),
Tool::Custom {
name: Some(name), ..
} => Some(name.clone()),

View file

@ -11,6 +11,7 @@ pub use apis::streaming_shapes::sse::{SseEvent, SseStreamIter};
pub use aws_smithy_eventstream::frame::DecodedFrame;
pub use providers::id::ProviderId;
pub use providers::request::{ProviderRequest, ProviderRequestError, ProviderRequestType};
pub use providers::request_adapter::serialize_for_upstream;
pub use providers::response::{
ProviderResponse, ProviderResponseError, ProviderResponseType, TokenUsage,
};

View file

@ -5,10 +5,12 @@
//!
pub mod id;
pub mod request;
pub mod request_adapter;
pub mod response;
pub mod streaming_response;
pub use id::ProviderId;
pub use request::{ProviderRequest, ProviderRequestError, ProviderRequestType};
pub use request_adapter::serialize_for_upstream;
pub use response::{ProviderResponse, ProviderResponseType, TokenUsage};
pub use streaming_response::{ProviderStreamResponse, ProviderStreamResponseType};

View file

@ -1015,6 +1015,54 @@ mod tests {
}
}
#[test]
fn test_normalize_for_upstream_chatgpt_sets_store_stream_and_wraps_input() {
use crate::apis::openai::OpenAIApi::Responses;
use crate::apis::openai_responses::InputParam;
let responses_req = ResponsesAPIRequest {
model: "gpt-5.4".to_string(),
input: InputParam::Text("Hello, Codex!".to_string()),
temperature: None,
max_output_tokens: Some(8192),
stream: Some(false),
metadata: None,
tools: None,
tool_choice: None,
parallel_tool_calls: None,
instructions: None,
modalities: None,
user: None,
store: None,
reasoning_effort: None,
include: None,
audio: None,
text: None,
service_tier: None,
top_p: None,
top_logprobs: None,
stream_options: None,
truncation: None,
conversation: None,
previous_response_id: None,
max_tool_calls: None,
background: None,
};
let upstream_api = SupportedUpstreamAPIs::OpenAIResponsesAPI(Responses);
let mut request = ProviderRequestType::ResponsesAPIRequest(responses_req);
request.normalize_for_upstream(ProviderId::ChatGPT, &upstream_api);
match request {
ProviderRequestType::ResponsesAPIRequest(req) => {
assert_eq!(req.max_output_tokens, Some(8192));
assert_eq!(req.store, Some(false));
assert_eq!(req.stream, Some(true));
assert!(matches!(req.input, InputParam::Items(_)));
}
_ => panic!("Expected ResponsesAPIRequest variant"),
}
}
#[test]
fn test_chat_completions_to_responses_api_not_supported() {
use crate::apis::openai::OpenAIApi::Responses;

View file

@ -0,0 +1,407 @@
use crate::apis::openai_responses::ResponsesAPIRequest;
use crate::providers::id::ProviderId;
use crate::providers::request::{ProviderRequest, ProviderRequestError, ProviderRequestType};
/// Serialize a provider request for the upstream wire format.
///
/// For most providers this is plain `to_bytes()`. ChatGPT's native /responses
/// backend has wire-format quirks that require post-serialization patching:
/// - `max_output_tokens` must be sent as `maxTokens`
/// - Structured content arrays (`input_text`/`output_text` typed parts)
/// must be flattened to plain text strings
pub fn serialize_for_upstream(
request: &ProviderRequestType,
provider_id: ProviderId,
) -> Result<Vec<u8>, ProviderRequestError> {
match (provider_id, request) {
(ProviderId::ChatGPT, ProviderRequestType::ResponsesAPIRequest(req)) => {
adapt_chatgpt_responses_request(req)
}
_ => request.to_bytes(),
}
}
/// Apply ChatGPT-specific wire-format fixes to a ResponsesAPI request.
///
/// Works at the JSON value level so we can rename keys and restructure
/// content without needing separate serde types for the ChatGPT variant.
fn adapt_chatgpt_responses_request(
req: &ResponsesAPIRequest,
) -> Result<Vec<u8>, ProviderRequestError> {
let mut value = serde_json::to_value(req).map_err(|e| ProviderRequestError {
message: format!("Failed to encode ChatGPT responses request as JSON value: {}", e),
source: Some(Box::new(e)),
})?;
if let Some(obj) = value.as_object_mut() {
// ChatGPT rejects `max_output_tokens`; it expects `maxTokens`
if let Some(max_output_tokens) = obj.remove("max_output_tokens") {
if !max_output_tokens.is_null() {
obj.insert("maxTokens".to_string(), max_output_tokens);
}
}
// ChatGPT rejects structured content arrays with typed parts
// (input_text, output_text); flatten them to plain text strings
flatten_input_content_parts(obj);
}
serde_json::to_vec(&value).map_err(|e| ProviderRequestError {
message: format!("Failed to serialize ChatGPT responses request for upstream: {}", e),
source: Some(Box::new(e)),
})
}
/// Walk through `input[].content` and collapse typed content-part arrays
/// into plain text strings that ChatGPT accepts.
fn flatten_input_content_parts(obj: &mut serde_json::Map<String, serde_json::Value>) {
let input = match obj.get_mut("input").and_then(|v| v.as_array_mut()) {
Some(arr) => arr,
None => return,
};
for item in input {
let content = match item
.as_object_mut()
.and_then(|m| m.get_mut("content"))
{
Some(c) => c,
None => continue,
};
let parts = match content.as_array() {
Some(p) => p,
None => continue,
};
let mut saw_text_part = false;
let text = parts
.iter()
.filter_map(|part| {
let part_obj = part.as_object()?;
match part_obj.get("type").and_then(|v| v.as_str()) {
Some("input_text") | Some("output_text") => {
saw_text_part = true;
Some(
part_obj
.get("text")
.and_then(|v| v.as_str())
.unwrap_or_default()
.to_string(),
)
}
_ => None,
}
})
.collect::<Vec<_>>()
.join("\n");
// Even when all text parts are empty, we still need to collapse the array.
// Leaving typed parts in-place causes ChatGPT Codex endpoints to reject them.
if saw_text_part {
*content = serde_json::Value::String(text);
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::apis::openai::OpenAIApi;
use crate::apis::openai_responses::{
InputContent, InputItem, InputMessage, InputParam, MessageContent, MessageRole,
ResponsesAPIRequest,
};
fn make_responses_request(input: InputParam, max_output_tokens: Option<i32>) -> ResponsesAPIRequest {
ResponsesAPIRequest {
model: "gpt-5.4".to_string(),
input,
temperature: None,
max_output_tokens,
stream: Some(true),
metadata: None,
tools: None,
tool_choice: None,
parallel_tool_calls: None,
instructions: Some("You are Codex.".to_string()),
modalities: None,
user: None,
store: Some(false),
reasoning_effort: None,
include: None,
audio: None,
text: None,
service_tier: None,
top_p: None,
top_logprobs: None,
stream_options: None,
truncation: None,
conversation: None,
previous_response_id: None,
max_tool_calls: None,
background: None,
}
}
// ---------------------------------------------------------------
// max_output_tokens → maxTokens rename
// ---------------------------------------------------------------
#[test]
fn chatgpt_renames_max_output_tokens_to_max_tokens_on_wire() {
let req = make_responses_request(
InputParam::Text("Hello".to_string()),
Some(8192),
);
let request = ProviderRequestType::ResponsesAPIRequest(req);
let bytes = serialize_for_upstream(&request, ProviderId::ChatGPT).unwrap();
let wire: serde_json::Value = serde_json::from_slice(&bytes).unwrap();
assert!(wire.get("max_output_tokens").is_none(),
"max_output_tokens should be absent from wire format");
assert_eq!(wire.get("maxTokens").and_then(|v| v.as_i64()), Some(8192),
"maxTokens should be present with the original value");
}
#[test]
fn chatgpt_omits_max_tokens_when_max_output_tokens_is_none() {
let req = make_responses_request(
InputParam::Text("Hello".to_string()),
None,
);
let request = ProviderRequestType::ResponsesAPIRequest(req);
let bytes = serialize_for_upstream(&request, ProviderId::ChatGPT).unwrap();
let wire: serde_json::Value = serde_json::from_slice(&bytes).unwrap();
assert!(wire.get("max_output_tokens").is_none());
assert!(wire.get("maxTokens").is_none(),
"maxTokens should not appear when original was None");
}
#[test]
fn non_chatgpt_preserves_max_output_tokens_field_name() {
let req = make_responses_request(
InputParam::Text("Hello".to_string()),
Some(4096),
);
let request = ProviderRequestType::ResponsesAPIRequest(req);
let bytes = serialize_for_upstream(&request, ProviderId::OpenAI).unwrap();
let wire: serde_json::Value = serde_json::from_slice(&bytes).unwrap();
assert_eq!(wire.get("max_output_tokens").and_then(|v| v.as_i64()), Some(4096));
assert!(wire.get("maxTokens").is_none());
}
// ---------------------------------------------------------------
// input_text / output_text content flattening
// ---------------------------------------------------------------
#[test]
fn chatgpt_flattens_input_text_content_parts_to_plain_string() {
let input = InputParam::Items(vec![InputItem::Message(InputMessage {
role: MessageRole::User,
content: MessageContent::Items(vec![
InputContent::InputText { text: "first line".to_string() },
InputContent::InputText { text: "second line".to_string() },
]),
})]);
let req = make_responses_request(input, None);
let request = ProviderRequestType::ResponsesAPIRequest(req);
let bytes = serialize_for_upstream(&request, ProviderId::ChatGPT).unwrap();
let wire: serde_json::Value = serde_json::from_slice(&bytes).unwrap();
let content = &wire["input"][0]["content"];
assert!(content.is_string(),
"content should be flattened to a string, got: {}", content);
assert_eq!(content.as_str().unwrap(), "first line\nsecond line");
}
#[test]
fn chatgpt_flattens_output_text_content_parts() {
let input = InputParam::Items(vec![InputItem::Message(InputMessage {
role: MessageRole::Assistant,
content: MessageContent::Items(vec![
InputContent::InputText { text: "assistant reply".to_string() },
]),
})]);
let req = make_responses_request(input, None);
let request = ProviderRequestType::ResponsesAPIRequest(req);
let bytes = serialize_for_upstream(&request, ProviderId::ChatGPT).unwrap();
let wire: serde_json::Value = serde_json::from_slice(&bytes).unwrap();
let content = &wire["input"][0]["content"];
assert!(content.is_string());
assert_eq!(content.as_str().unwrap(), "assistant reply");
}
#[test]
fn chatgpt_flattens_empty_input_text_content_parts() {
let input = InputParam::Items(vec![InputItem::Message(InputMessage {
role: MessageRole::Assistant,
content: MessageContent::Items(vec![InputContent::InputText {
text: "".to_string(),
}]),
})]);
let req = make_responses_request(input, None);
let request = ProviderRequestType::ResponsesAPIRequest(req);
let bytes = serialize_for_upstream(&request, ProviderId::ChatGPT).unwrap();
let wire: serde_json::Value = serde_json::from_slice(&bytes).unwrap();
let content = &wire["input"][0]["content"];
assert!(
content.is_string(),
"content should be flattened to a string, got: {}",
content
);
assert_eq!(content.as_str().unwrap(), "");
}
#[test]
fn chatgpt_preserves_plain_text_content_unchanged() {
let input = InputParam::Items(vec![InputItem::Message(InputMessage {
role: MessageRole::User,
content: MessageContent::Text("plain text message".to_string()),
})]);
let req = make_responses_request(input, None);
let request = ProviderRequestType::ResponsesAPIRequest(req);
let bytes = serialize_for_upstream(&request, ProviderId::ChatGPT).unwrap();
let wire: serde_json::Value = serde_json::from_slice(&bytes).unwrap();
let content = &wire["input"][0]["content"];
assert_eq!(content.as_str().unwrap(), "plain text message");
}
#[test]
fn non_chatgpt_does_not_flatten_content_parts() {
let input = InputParam::Items(vec![InputItem::Message(InputMessage {
role: MessageRole::User,
content: MessageContent::Items(vec![
InputContent::InputText { text: "part one".to_string() },
InputContent::InputText { text: "part two".to_string() },
]),
})]);
let req = make_responses_request(input, None);
let request = ProviderRequestType::ResponsesAPIRequest(req);
let bytes = serialize_for_upstream(&request, ProviderId::OpenAI).unwrap();
let wire: serde_json::Value = serde_json::from_slice(&bytes).unwrap();
let content = &wire["input"][0]["content"];
assert!(content.is_array(),
"OpenAI should preserve array content, got: {}", content);
}
// ---------------------------------------------------------------
// Both fixes together (realistic ChatGPT payload)
// ---------------------------------------------------------------
#[test]
fn chatgpt_applies_both_fixes_together() {
let input = InputParam::Items(vec![
InputItem::Message(InputMessage {
role: MessageRole::User,
content: MessageContent::Items(vec![
InputContent::InputText { text: "Write a function".to_string() },
]),
}),
InputItem::Message(InputMessage {
role: MessageRole::Assistant,
content: MessageContent::Items(vec![
InputContent::InputText { text: "def hello(): pass".to_string() },
]),
}),
InputItem::Message(InputMessage {
role: MessageRole::User,
content: MessageContent::Items(vec![
InputContent::InputText { text: "Add a docstring".to_string() },
]),
}),
]);
let req = make_responses_request(input, Some(16384));
let request = ProviderRequestType::ResponsesAPIRequest(req);
let bytes = serialize_for_upstream(&request, ProviderId::ChatGPT).unwrap();
let wire: serde_json::Value = serde_json::from_slice(&bytes).unwrap();
// max_output_tokens renamed
assert!(wire.get("max_output_tokens").is_none());
assert_eq!(wire.get("maxTokens").and_then(|v| v.as_i64()), Some(16384));
// All content arrays flattened
for (i, item) in wire["input"].as_array().unwrap().iter().enumerate() {
let content = &item["content"];
assert!(content.is_string(),
"input[{}].content should be a string, got: {}", i, content);
}
}
// ---------------------------------------------------------------
// Non-ResponsesAPI requests pass through unchanged
// ---------------------------------------------------------------
#[test]
fn chatgpt_chat_completions_request_passes_through() {
use crate::apis::openai::{ChatCompletionsRequest, Message, MessageContent as MC, Role};
let chat_req = ChatCompletionsRequest {
model: "gpt-5.4".to_string(),
messages: vec![Message {
role: Role::User,
content: Some(MC::Text("Hello".to_string())),
name: None,
tool_calls: None,
tool_call_id: None,
}],
max_completion_tokens: Some(1024),
..Default::default()
};
let request = ProviderRequestType::ChatCompletionsRequest(chat_req);
let bytes = serialize_for_upstream(&request, ProviderId::ChatGPT).unwrap();
let wire: serde_json::Value = serde_json::from_slice(&bytes).unwrap();
assert_eq!(wire.get("max_completion_tokens").and_then(|v| v.as_i64()), Some(1024));
}
// ---------------------------------------------------------------
// Normalize + serialize round-trip (full pipeline test)
// ---------------------------------------------------------------
#[test]
fn chatgpt_full_pipeline_normalize_then_serialize() {
let input = InputParam::Text("Hello, Codex!".to_string());
let req = make_responses_request(input, Some(8192));
let upstream_api = crate::clients::endpoints::SupportedUpstreamAPIs::OpenAIResponsesAPI(
OpenAIApi::Responses,
);
let mut request = ProviderRequestType::ResponsesAPIRequest(req);
// normalize_for_upstream sets store=false, stream=true, wraps input in Items
request.normalize_for_upstream(ProviderId::ChatGPT, &upstream_api);
// serialize_for_upstream then renames max_output_tokens and flattens content
let bytes = serialize_for_upstream(&request, ProviderId::ChatGPT).unwrap();
let wire: serde_json::Value = serde_json::from_slice(&bytes).unwrap();
assert!(wire.get("max_output_tokens").is_none());
assert_eq!(wire.get("maxTokens").and_then(|v| v.as_i64()), Some(8192));
assert_eq!(wire.get("store"), Some(&serde_json::Value::Bool(false)));
assert_eq!(wire.get("stream"), Some(&serde_json::Value::Bool(true)));
assert!(wire["input"].is_array(), "input should be an array after normalize");
}
}

View file

@ -513,15 +513,27 @@ impl TryFrom<ResponsesAPIRequest> for ChatCompletionsRequest {
description,
parameters,
strict,
} => converted_chat_tools.push(Tool {
tool_type: "function".to_string(),
function: crate::apis::openai::Function {
name,
description,
parameters: normalize_function_parameters(parameters, None),
strict,
},
}),
function,
} => {
let resolved_name = function
.as_ref()
.and_then(|f| f.name.clone())
.or_else(|| name.clone())
.unwrap_or_else(|| "".to_string());
let resolved_description = function
.as_ref()
.and_then(|f| f.description.clone())
.or_else(|| description.clone());
converted_chat_tools.push(Tool {
tool_type: "function".to_string(),
function: crate::apis::openai::Function {
name: resolved_name,
description: resolved_description,
parameters: normalize_function_parameters(parameters, None),
strict,
},
})
}
ResponsesTool::WebSearchPreview {
search_context_size,
user_location,
@ -803,10 +815,10 @@ impl TryFrom<ChatCompletionsRequest> for ConverseRequest {
.into_iter()
.map(|tool| BedrockTool::ToolSpec {
tool_spec: ToolSpecDefinition {
name: tool.function.name,
description: tool.function.description,
name: tool.function.name.clone(),
description: tool.function.description.clone(),
input_schema: ToolInputSchema {
json: tool.function.parameters,
json: tool.function.parameters.clone(),
},
},
})
@ -1349,7 +1361,7 @@ mod tests {
output: serde_json::json!({"status":"ok","stdout":"hello"}),
}]),
tools: Some(vec![ResponsesTool::Function {
name: "exec_command".to_string(),
name: Some("exec_command".to_string()),
description: Some("Execute a shell command".to_string()),
parameters: Some(serde_json::json!({
"type": "object",
@ -1358,6 +1370,7 @@ mod tests {
},
"required": ["cmd"]
})),
function: None,
strict: Some(false),
}]),
include: None,

View file

@ -27,7 +27,8 @@ use hermesllm::clients::endpoints::SupportedAPIsFromClient;
use hermesllm::providers::response::ProviderResponse;
use hermesllm::providers::streaming_response::ProviderStreamResponse;
use hermesllm::{
DecodedFrame, ProviderId, ProviderRequest, ProviderRequestType, ProviderResponseType,
serialize_for_upstream, DecodedFrame, ProviderId, ProviderRequest, ProviderRequestType,
ProviderResponseType,
ProviderStreamResponseType,
};
@ -1105,7 +1106,9 @@ impl HttpContext for StreamContext {
);
return Action::Pause;
}
}
};
request_bytes
}
Err(e) => {
warn!(