This commit is contained in:
Tom Stoffer 2026-06-05 18:32:17 +00:00 committed by GitHub
commit 153b789348
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
15 changed files with 1013 additions and 89 deletions

View file

@ -100,6 +100,16 @@ For library tests only:
cargo test --lib
```
**Build Rust artifacts with the correct targets:**
```bash
cd crates
cargo build --release --target wasm32-wasip1 -p llm_gateway -p prompt_gateway
cargo build --release -p brightstaff -p hermesllm -p common
```
Do not run a blanket workspace-native build such as `cargo build --release` from `crates/`. The `llm_gateway` and `prompt_gateway` crates are Proxy-WASM `cdylib`s and must be built for `wasm32-wasip1`, while `brightstaff`, `hermesllm`, and `common` build natively.
**Run Python CLI tests:**
```bash

View file

@ -471,10 +471,12 @@ def up(
else:
env_file_dict = load_env_file_to_dict(app_env_file)
for access_key in access_keys:
if env_file_dict.get(access_key) is None:
missing_keys.append(access_key)
else:
if env_file_dict.get(access_key) is not None:
env_stage[access_key] = env_file_dict[access_key]
elif env.get(access_key) is not None:
env_stage[access_key] = env.get(access_key)
else:
missing_keys.append(access_key)
if missing_keys:
_print_missing_keys(console, missing_keys)

View file

@ -5,7 +5,7 @@ use common::llm_providers::LlmProviders;
use hermesllm::apis::openai::Message;
use hermesllm::apis::openai_responses::InputParam;
use hermesllm::clients::{SupportedAPIsFromClient, SupportedUpstreamAPIs};
use hermesllm::{ProviderRequest, ProviderRequestType};
use hermesllm::{serialize_for_upstream, ProviderRequest, ProviderRequestType};
use http_body_util::combinators::BoxBody;
use http_body_util::BodyExt;
use hyper::header::{self};
@ -284,7 +284,7 @@ async fn llm_chat_inner(
// Serialize request for upstream BEFORE router consumes it
let client_request_bytes_for_upstream: Bytes =
match ProviderRequestType::to_bytes(&client_request) {
match serialize_for_upstream(&client_request, provider_id) {
Ok(bytes) => bytes.into(),
Err(err) => {
warn!(error = %err, "failed to serialize request for upstream");

View file

@ -126,16 +126,12 @@ pub enum InputParam {
pub enum InputItem {
/// Input message (role + content)
Message(InputMessage),
/// Item reference
ItemReference {
#[serde(rename = "type")]
item_type: String,
id: String,
},
/// Function call emitted by model in prior turn
FunctionCall {
#[serde(rename = "type")]
item_type: String,
#[serde(skip_serializing_if = "Option::is_none")]
id: Option<String>,
name: String,
arguments: String,
call_id: String,
@ -147,6 +143,18 @@ pub enum InputItem {
call_id: String,
output: serde_json::Value,
},
/// Item reference
///
/// Keep this after concrete item variants. Some Responses items include an
/// `id` plus additional required fields (`function_call` has `call_id`,
/// `name`, and `arguments`). With serde's untagged enum matching, placing
/// this broad reference shape first silently drops those fields and sends an
/// invalid upstream item.
ItemReference {
#[serde(rename = "type")]
item_type: String,
id: String,
},
}
/// Input message with role and content
@ -280,16 +288,31 @@ pub struct ConversationParam {
pub id: Option<String>,
}
/// Tool definitions
/// Tool definitions.
///
/// Supports both the canonical OpenAI Responses flat tool shape:
/// { "type": "function", "name": "...", "description": "...", "parameters": {...} }
/// and the nested chat-completions-compatible shape:
/// { "type": "function", "function": { "name": "...", "description": "...", "parameters": {...} } }
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum Tool {
/// Function tool - flat structure in Responses API
/// Function tool — accepts both flat and nested `function` object shapes.
Function {
name: String,
/// Top-level name (flat shape).
name: Option<String>,
/// Top-level description (flat shape).
description: Option<String>,
/// Top-level parameters (flat shape).
parameters: Option<serde_json::Value>,
/// Top-level strict flag (flat shape).
strict: Option<bool>,
/// Nested `function` object (nested/compat shape).
///
/// When present, `name`/`description`/`parameters` from the outer level are
/// ignored in favour of the values inside this object.
#[serde(default, flatten)]
function: Option<FunctionDef>,
},
/// File search tool
FileSearch {
@ -321,6 +344,47 @@ pub enum Tool {
},
}
impl Tool {
pub fn name(&self) -> Option<&str> {
match self {
Tool::Function { name, function, .. } => function
.as_ref()
.and_then(|f| f.name.as_ref())
.map(|s| s.as_str())
.or_else(|| name.as_ref().map(|s| s.as_str())),
Tool::Custom { name, .. } => name.as_deref(),
_ => None,
}
}
pub fn description(&self) -> Option<&String> {
match self {
Tool::Function {
description,
function,
..
} => description
.as_ref()
.or_else(|| function.as_ref().and_then(|f| f.description.as_ref())),
Tool::Custom { description, .. } => description.as_ref(),
_ => None,
}
}
pub fn parameters(&self) -> Option<&serde_json::Value> {
match self {
Tool::Function {
parameters,
function,
..
} => parameters
.as_ref()
.or_else(|| function.as_ref().and_then(|f| f.parameters.as_ref())),
_ => None,
}
}
}
/// Ranking options for file search
#[skip_serializing_none]
#[derive(Debug, Clone, Serialize, Deserialize)]
@ -343,6 +407,16 @@ pub struct UserLocation {
pub timezone: Option<String>,
}
/// Inner function definition — used inside the nested `function` object.
#[skip_serializing_none]
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FunctionDef {
pub name: Option<String>,
pub description: Option<String>,
pub parameters: Option<serde_json::Value>,
pub strict: Option<bool>,
}
/// Tool choice options
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(untagged)]
@ -1158,7 +1232,10 @@ impl ProviderRequest for ResponsesAPIRequest {
tools
.iter()
.filter_map(|tool| match tool {
Tool::Function { name, .. } => Some(name.clone()),
Tool::Function { name, function, .. } => function
.as_ref()
.and_then(|f| f.name.clone())
.or_else(|| name.clone()),
Tool::Custom {
name: Some(name), ..
} => Some(name.clone()),

View file

@ -1,5 +1,5 @@
use crate::apis::openai_responses::{
OutputItem, OutputItemStatus, Reasoning, ResponseStatus, ResponsesAPIResponse,
OutputContent, OutputItem, OutputItemStatus, Reasoning, ResponseStatus, ResponsesAPIResponse,
ResponsesAPIStreamEvent, TextConfig, TextFormat,
};
use crate::apis::streaming_shapes::sse::{SseEvent, SseStreamBufferTrait};
@ -14,6 +14,8 @@ fn event_to_sse(event: ResponsesAPIStreamEvent) -> SseEvent {
ResponsesAPIStreamEvent::ResponseCompleted { .. } => "response.completed",
ResponsesAPIStreamEvent::ResponseOutputItemAdded { .. } => "response.output_item.added",
ResponsesAPIStreamEvent::ResponseOutputItemDone { .. } => "response.output_item.done",
ResponsesAPIStreamEvent::ResponseContentPartAdded { .. } => "response.content_part.added",
ResponsesAPIStreamEvent::ResponseContentPartDone { .. } => "response.content_part.done",
ResponsesAPIStreamEvent::ResponseOutputTextDelta { .. } => "response.output_text.delta",
ResponsesAPIStreamEvent::ResponseOutputTextDone { .. } => "response.output_text.done",
ResponsesAPIStreamEvent::ResponseFunctionCallArgumentsDelta { .. } => {
@ -178,6 +180,22 @@ impl ResponsesAPIStreamBuffer {
event_to_sse(event)
}
/// Create content_part.added event for text
fn create_content_part_added_event(&mut self, output_index: i32, item_id: &str) -> SseEvent {
let event = ResponsesAPIStreamEvent::ResponseContentPartAdded {
item_id: item_id.to_string(),
output_index,
content_index: 0,
part: OutputContent::OutputText {
text: String::new(),
annotations: vec![],
logprobs: None,
},
sequence_number: self.next_sequence_number(),
};
event_to_sse(event)
}
/// Create output_item.added event for tool call
fn create_tool_call_added_event(
&mut self,
@ -291,44 +309,10 @@ impl ResponsesAPIStreamBuffer {
// Emit done events for all accumulated content
// Text content done events
let text_items: Vec<_> = self
.text_content
.iter()
.map(|(id, content)| (id.clone(), content.clone()))
.collect();
for (item_id, content) in text_items {
let output_index = self
.output_items_added
.iter()
.find(|(_, id)| **id == item_id)
.map(|(idx, _)| *idx)
.unwrap_or(0);
let seq1 = self.next_sequence_number();
let text_done_event = ResponsesAPIStreamEvent::ResponseOutputTextDone {
item_id: item_id.clone(),
output_index,
content_index: 0,
text: content.clone(),
logprobs: vec![],
sequence_number: seq1,
};
events.push(event_to_sse(text_done_event));
let seq2 = self.next_sequence_number();
let item_done_event = ResponsesAPIStreamEvent::ResponseOutputItemDone {
output_index,
item: OutputItem::Message {
id: item_id.clone(),
status: OutputItemStatus::Completed,
role: "assistant".to_string(),
content: vec![],
},
sequence_number: seq2,
};
events.push(event_to_sse(item_done_event));
}
// Text completion is represented in response.completed.output below.
// Avoid emitting optional per-item done events here: some proxy-wasm
// streaming paths truncate size-changing final chunks, and strict
// Responses clients only require the terminal completed response.
// Function call done events
let func_items: Vec<_> = self
@ -541,6 +525,7 @@ impl SseStreamBufferTrait for ResponsesAPIStreamBuffer {
self.output_items_added
.insert(*output_index, item_id.clone());
events.push(self.create_output_item_added_event(*output_index, &item_id));
events.push(self.create_content_part_added_event(*output_index, &item_id));
}
// Accumulate text content
@ -622,6 +607,26 @@ impl SseStreamBufferTrait for ResponsesAPIStreamBuffer {
}
events.push(event_to_sse(delta_event));
}
ResponsesAPIStreamEvent::ResponseCompleted { response, .. } => {
// Some upstream Responses API streams emit response.completed with an
// empty response.output even after text deltas were streamed. Clients
// such as OpenClaw may read the final response.output rather than the
// deltas, so synthesize our completed event from accumulated stream
// state while preserving upstream metadata like usage/service tier.
self.upstream_response_metadata = Some(response.clone());
self.finalize();
return;
}
ResponsesAPIStreamEvent::ResponseOutputItemAdded { .. }
| ResponsesAPIStreamEvent::ResponseContentPartAdded { .. }
| ResponsesAPIStreamEvent::ResponseOutputTextDone { .. }
| ResponsesAPIStreamEvent::ResponseContentPartDone { .. }
| ResponsesAPIStreamEvent::ResponseOutputItemDone { .. } => {
// We generate a canonical Responses lifecycle from deltas/finalize.
// Passing upstream lifecycle events through can create duplicate or
// out-of-order item/content IDs that strict clients may ignore.
return;
}
_ => {
// For other event types, just pass through with sequence number
let other_event = stream_event.as_ref().clone();
@ -637,7 +642,7 @@ impl SseStreamBufferTrait for ResponsesAPIStreamBuffer {
fn to_bytes(&mut self) -> Vec<u8> {
// For Responses API, we need special handling:
// - Most events are already in buffered_events from add_transformed_event
// - We should NOT finalize here - finalization happens when we detect [DONE] or end of stream
// - Finalization happens when we detect [DONE], response.completed, or transport end
// - Just flush the accumulated events and clear the buffer
// Convert all accumulated events to bytes and clear buffer
@ -715,14 +720,6 @@ mod tests {
output.contains("response.output_text.delta"),
"Should have text deltas"
);
assert!(
output.contains("response.output_text.done"),
"Should have text.done"
);
assert!(
output.contains("response.output_item.done"),
"Should have output_item.done"
);
assert!(
output.contains("response.completed"),
"Should have response.completed"
@ -731,8 +728,8 @@ mod tests {
println!("\nVALIDATION SUMMARY:");
println!("{}", "-".repeat(80));
println!("✓ Lifecycle events: response.created, response.in_progress, response.completed");
println!("✓ Output item lifecycle: output_item.added, output_item.done");
println!("✓ Text streaming: output_text.delta (2 deltas), output_text.done");
println!("✓ Output item lifecycle: output_item.added, response.completed output");
println!("✓ Text streaming: output_text.delta (2 deltas), response.completed output");
println!("✓ Complete transformation with finalization ([DONE] processed)\n");
}
@ -853,4 +850,42 @@ data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1234567890
"response.completed should be emitted exactly once"
);
}
#[test]
fn test_responses_transport_end_finalizes_captured_gpt55_shape() {
// Captured from the OpenClaw GPT-5.5 repro: upstream emitted Responses
// text deltas, then the transport ended without an explicit [DONE] or
// response.completed event. The gateway must synthesize the terminal
// lifecycle so strict Responses clients can produce a visible result.
let raw_input = r#"event: response.output_item.added
data: {"type":"response.output_item.added","output_index":1,"item":{"type":"message","id":"msg_upstream","status":"in_progress","role":"assistant","content":[]},"sequence_number":2}
event: response.content_part.added
data: {"type":"response.content_part.added","item_id":"msg_upstream","output_index":1,"content_index":0,"part":{"type":"output_text","text":"","annotations":[],"logprobs":null},"sequence_number":3}
event: response.output_text.delta
data: {"type":"response.output_text.delta","item_id":"msg_upstream","output_index":1,"content_index":0,"delta":"PAR","logprobs":[],"sequence_number":4}
event: response.output_text.delta
data: {"type":"response.output_text.delta","item_id":"msg_upstream","output_index":1,"content_index":0,"delta":"IS","logprobs":[],"sequence_number":5}"#;
let client_api = SupportedAPIsFromClient::OpenAIResponsesAPI(OpenAIApi::Responses);
let upstream_api = SupportedUpstreamAPIs::OpenAIResponsesAPI(OpenAIApi::Responses);
let stream_iter = SseStreamIter::try_from(raw_input.as_bytes()).unwrap();
let mut buffer = ResponsesAPIStreamBuffer::new();
for raw_event in stream_iter {
let transformed_event =
SseEvent::try_from((raw_event, &client_api, &upstream_api)).unwrap();
buffer.add_transformed_event(transformed_event);
}
let partial_output = String::from_utf8_lossy(&buffer.to_bytes()).to_string();
assert!(partial_output.contains("response.output_text.delta"));
assert!(!partial_output.contains("response.completed"));
buffer.finalize();
let terminal_output = String::from_utf8_lossy(&buffer.to_bytes()).to_string();
assert!(terminal_output.contains("event: response.completed"));
assert!(terminal_output.contains("PARIS"));
}
}

View file

@ -38,6 +38,11 @@ pub trait SseStreamBufferTrait: Send + Sync {
/// # Returns
/// Bytes ready for wire transmission (may be empty if no events were accumulated)
fn to_bytes(&mut self) -> Vec<u8>;
/// Finalize a stream when the transport ends without an explicit provider
/// terminal event. Most buffers do not need this; Responses API buffering
/// uses it to synthesize *.done and response.completed lifecycle events.
fn finalize_stream(&mut self) {}
}
/// Unified SSE Stream Buffer enum that provides a zero-cost abstraction
@ -66,6 +71,12 @@ impl SseStreamBufferTrait for SseStreamBuffer {
Self::OpenAIResponses(buffer) => buffer.to_bytes(),
}
}
fn finalize_stream(&mut self) {
if let Self::OpenAIResponses(buffer) = self {
buffer.finalize();
}
}
}
// ============================================================================

View file

@ -237,6 +237,7 @@ providers:
- openai/gpt-5.4-nano-2026-03-17
- openai/gpt-5.4-nano
- openai/gpt-5.4-mini-2026-03-17
- openai/gpt-5.5
- openai/gpt-3.5-turbo-instruct
- openai/gpt-3.5-turbo-instruct-0914
- openai/gpt-3.5-turbo-1106
@ -331,6 +332,7 @@ providers:
- xiaomi/mimo-v2-omni
- xiaomi/mimo-v2-pro
chatgpt:
- chatgpt/gpt-5.5
- chatgpt/gpt-5.4
- chatgpt/gpt-5.3-codex
- chatgpt/gpt-5.2

View file

@ -11,6 +11,7 @@ pub use apis::streaming_shapes::sse::{SseEvent, SseStreamIter};
pub use aws_smithy_eventstream::frame::DecodedFrame;
pub use providers::id::ProviderId;
pub use providers::request::{ProviderRequest, ProviderRequestError, ProviderRequestType};
pub use providers::request_adapter::serialize_for_upstream;
pub use providers::response::{
ProviderResponse, ProviderResponseError, ProviderResponseType, TokenUsage,
};

View file

@ -342,6 +342,15 @@ mod tests {
);
}
#[test]
fn test_chatgpt_models_include_gpt_5_5() {
let chatgpt_models = ProviderId::ChatGPT.models();
assert!(
chatgpt_models.iter().any(|model| model == "gpt-5.5"),
"ChatGPT models should include gpt-5.5"
);
}
#[test]
fn test_unsupported_providers_return_empty() {
// Providers without models should return empty vec

View file

@ -5,10 +5,12 @@
//!
pub mod id;
pub mod request;
pub mod request_adapter;
pub mod response;
pub mod streaming_response;
pub use id::ProviderId;
pub use request::{ProviderRequest, ProviderRequestError, ProviderRequestType};
pub use request_adapter::serialize_for_upstream;
pub use response::{ProviderResponse, ProviderResponseType, TokenUsage};
pub use streaming_response::{ProviderStreamResponse, ProviderStreamResponseType};

View file

@ -1070,6 +1070,56 @@ mod tests {
}
}
#[test]
fn test_normalize_for_upstream_chatgpt_sets_store_stream_and_wraps_input() {
use crate::apis::openai::OpenAIApi::Responses;
use crate::apis::openai_responses::InputParam;
let responses_req = ResponsesAPIRequest {
model: "gpt-5.4".to_string(),
input: InputParam::Text("Hello, Codex!".to_string()),
temperature: None,
max_output_tokens: Some(8192),
stream: None,
metadata: None,
tools: None,
tool_choice: None,
parallel_tool_calls: None,
instructions: None,
modalities: None,
user: None,
store: None,
reasoning_effort: None,
include: None,
audio: None,
text: None,
service_tier: None,
top_p: None,
top_logprobs: None,
stream_options: None,
truncation: None,
conversation: None,
previous_response_id: None,
max_tool_calls: None,
background: None,
};
let upstream_api = SupportedUpstreamAPIs::OpenAIResponsesAPI(Responses);
let mut request = ProviderRequestType::ResponsesAPIRequest(responses_req);
request
.normalize_for_upstream(ProviderId::ChatGPT, &upstream_api)
.expect("ChatGPT responses request should normalize");
match request {
ProviderRequestType::ResponsesAPIRequest(req) => {
assert_eq!(req.max_output_tokens, Some(8192));
assert_eq!(req.store, Some(false));
assert_eq!(req.stream, Some(true));
assert!(matches!(req.input, InputParam::Items(_)));
}
_ => panic!("Expected ResponsesAPIRequest variant"),
}
}
#[test]
fn test_chat_completions_to_responses_api_not_supported() {
use crate::apis::openai::OpenAIApi::Responses;

View file

@ -0,0 +1,633 @@
use crate::apis::openai_responses::ResponsesAPIRequest;
use crate::providers::id::ProviderId;
use crate::providers::request::{ProviderRequest, ProviderRequestError, ProviderRequestType};
/// Serialize a provider request for the upstream wire format.
///
/// For most providers this is plain `to_bytes()`. ChatGPT's native /responses
/// backend has wire-format quirks that require post-serialization patching:
/// - `max_output_tokens` must be sent as `maxTokens` for GPT-5.4-era models,
/// but omitted for GPT-5.5, which rejects `maxTokens`
/// - `truncation` must be omitted; ChatGPT Codex rejects it
/// - Structured content arrays (`input_text`/`output_text` typed parts)
/// must be flattened to plain text strings
pub fn serialize_for_upstream(
request: &ProviderRequestType,
provider_id: ProviderId,
) -> Result<Vec<u8>, ProviderRequestError> {
match (provider_id, request) {
(ProviderId::ChatGPT, ProviderRequestType::ResponsesAPIRequest(req)) => {
adapt_chatgpt_responses_request(req)
}
_ => request.to_bytes(),
}
}
/// Apply ChatGPT-specific wire-format fixes to a ResponsesAPI request.
///
/// Works at the JSON value level so we can rename keys and restructure
/// content without needing separate serde types for the ChatGPT variant.
fn adapt_chatgpt_responses_request(
req: &ResponsesAPIRequest,
) -> Result<Vec<u8>, ProviderRequestError> {
let mut value = serde_json::to_value(req).map_err(|e| ProviderRequestError {
message: format!(
"Failed to encode ChatGPT responses request as JSON value: {}",
e
),
source: Some(Box::new(e)),
})?;
if let Some(obj) = value.as_object_mut() {
let is_gpt_55 = obj
.get("model")
.and_then(|v| v.as_str())
.map(|model| model == "gpt-5.5" || model.starts_with("gpt-5.5-"))
.unwrap_or(false);
// ChatGPT rejects `max_output_tokens`. GPT-5.4-era Codex expects
// `maxTokens`, but GPT-5.5 rejects `maxTokens` too, so omit it there.
if let Some(max_output_tokens) = obj.remove("max_output_tokens") {
if !is_gpt_55 && !max_output_tokens.is_null() {
obj.insert("maxTokens".to_string(), max_output_tokens);
}
}
// ChatGPT Codex rejects this OpenAI Responses field.
obj.remove("truncation");
// ChatGPT rejects structured content arrays with typed parts
// (input_text, output_text); flatten them to plain text strings
flatten_input_content_parts(obj);
// ChatGPT does not persist output item references when store=false.
// OpenClaw uses store=false, so replayed hidden reasoning references
// must be dropped instead of sent back as `type=reasoning,id=rs_*`.
// The visible assistant/user transcript remains in the request.
remove_unstored_reasoning_input_refs(obj);
// ChatGPT requires remaining reasoning input items to carry a summary
// array. This covers stored conversations where reasoning refs are valid.
ensure_reasoning_input_summaries(obj);
}
serde_json::to_vec(&value).map_err(|e| ProviderRequestError {
message: format!(
"Failed to serialize ChatGPT responses request for upstream: {}",
e
),
source: Some(Box::new(e)),
})
}
/// Walk through `input[].content` and collapse typed content-part arrays
/// into plain text strings that ChatGPT accepts.
fn flatten_input_content_parts(obj: &mut serde_json::Map<String, serde_json::Value>) {
let input = match obj.get_mut("input").and_then(|v| v.as_array_mut()) {
Some(arr) => arr,
None => return,
};
for item in input {
let content = match item.as_object_mut().and_then(|m| m.get_mut("content")) {
Some(c) => c,
None => continue,
};
let parts = match content.as_array() {
Some(p) => p,
None => continue,
};
let mut saw_text_part = false;
let text = parts
.iter()
.filter_map(|part| {
let part_obj = part.as_object()?;
match part_obj.get("type").and_then(|v| v.as_str()) {
Some("input_text") | Some("output_text") => {
saw_text_part = true;
Some(
part_obj
.get("text")
.and_then(|v| v.as_str())
.unwrap_or_default()
.to_string(),
)
}
_ => None,
}
})
.collect::<Vec<_>>()
.join("\n");
// Even when all text parts are empty, we still need to collapse the array.
// Leaving typed parts in-place causes ChatGPT Codex endpoints to reject them.
if saw_text_part {
*content = serde_json::Value::String(text);
}
}
}
fn remove_unstored_reasoning_input_refs(obj: &mut serde_json::Map<String, serde_json::Value>) {
if obj.get("store").and_then(|v| v.as_bool()) != Some(false) {
return;
}
let input = match obj.get_mut("input").and_then(|v| v.as_array_mut()) {
Some(arr) => arr,
None => return,
};
input.retain(|item| {
let Some(item) = item.as_object() else {
return true;
};
!(item.get("type").and_then(|v| v.as_str()) == Some("reasoning")
&& item.get("id").and_then(|v| v.as_str()).is_some())
});
}
fn ensure_reasoning_input_summaries(obj: &mut serde_json::Map<String, serde_json::Value>) {
let input = match obj.get_mut("input").and_then(|v| v.as_array_mut()) {
Some(arr) => arr,
None => return,
};
for item in input {
let item = match item.as_object_mut() {
Some(item) => item,
None => continue,
};
if item.get("type").and_then(|v| v.as_str()) == Some("reasoning")
&& !item.contains_key("summary")
{
item.insert("summary".to_string(), serde_json::Value::Array(Vec::new()));
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::apis::openai::OpenAIApi;
use crate::apis::openai_responses::{
InputContent, InputItem, InputMessage, InputParam, MessageContent, MessageRole,
ResponsesAPIRequest,
};
fn make_responses_request(
input: InputParam,
max_output_tokens: Option<i32>,
) -> ResponsesAPIRequest {
ResponsesAPIRequest {
model: "gpt-5.4".to_string(),
input,
temperature: None,
max_output_tokens,
stream: Some(true),
metadata: None,
tools: None,
tool_choice: None,
parallel_tool_calls: None,
instructions: Some("You are Codex.".to_string()),
modalities: None,
user: None,
store: Some(false),
reasoning_effort: None,
include: None,
audio: None,
text: None,
service_tier: None,
top_p: None,
top_logprobs: None,
stream_options: None,
truncation: None,
conversation: None,
previous_response_id: None,
max_tool_calls: None,
background: None,
}
}
// ---------------------------------------------------------------
// max_output_tokens → maxTokens rename
// ---------------------------------------------------------------
#[test]
fn chatgpt_renames_max_output_tokens_to_max_tokens_on_wire() {
let req = make_responses_request(InputParam::Text("Hello".to_string()), Some(8192));
let request = ProviderRequestType::ResponsesAPIRequest(req);
let bytes = serialize_for_upstream(&request, ProviderId::ChatGPT).unwrap();
let wire: serde_json::Value = serde_json::from_slice(&bytes).unwrap();
assert!(
wire.get("max_output_tokens").is_none(),
"max_output_tokens should be absent from wire format"
);
assert_eq!(
wire.get("maxTokens").and_then(|v| v.as_i64()),
Some(8192),
"maxTokens should be present with the original value"
);
}
#[test]
fn chatgpt_omits_max_tokens_when_max_output_tokens_is_none() {
let req = make_responses_request(InputParam::Text("Hello".to_string()), None);
let request = ProviderRequestType::ResponsesAPIRequest(req);
let bytes = serialize_for_upstream(&request, ProviderId::ChatGPT).unwrap();
let wire: serde_json::Value = serde_json::from_slice(&bytes).unwrap();
assert!(wire.get("max_output_tokens").is_none());
assert!(
wire.get("maxTokens").is_none(),
"maxTokens should not appear when original was None"
);
}
#[test]
fn chatgpt_gpt55_omits_max_tokens_on_wire() {
let mut req = make_responses_request(InputParam::Text("Hello".to_string()), Some(8192));
req.model = "gpt-5.5".to_string();
let request = ProviderRequestType::ResponsesAPIRequest(req);
let bytes = serialize_for_upstream(&request, ProviderId::ChatGPT).unwrap();
let wire: serde_json::Value = serde_json::from_slice(&bytes).unwrap();
assert!(wire.get("max_output_tokens").is_none());
assert!(
wire.get("maxTokens").is_none(),
"GPT-5.5 ChatGPT Codex rejects maxTokens, so it must be omitted"
);
}
#[test]
fn chatgpt_omits_truncation_on_wire() {
let mut req = make_responses_request(InputParam::Text("Hello".to_string()), None);
req.truncation = Some("disabled".to_string());
let request = ProviderRequestType::ResponsesAPIRequest(req);
let bytes = serialize_for_upstream(&request, ProviderId::ChatGPT).unwrap();
let wire: serde_json::Value = serde_json::from_slice(&bytes).unwrap();
assert!(
wire.get("truncation").is_none(),
"ChatGPT Codex rejects truncation, so it must be omitted"
);
}
#[test]
fn non_chatgpt_preserves_max_output_tokens_field_name() {
let req = make_responses_request(InputParam::Text("Hello".to_string()), Some(4096));
let request = ProviderRequestType::ResponsesAPIRequest(req);
let bytes = serialize_for_upstream(&request, ProviderId::OpenAI).unwrap();
let wire: serde_json::Value = serde_json::from_slice(&bytes).unwrap();
assert_eq!(
wire.get("max_output_tokens").and_then(|v| v.as_i64()),
Some(4096)
);
assert!(wire.get("maxTokens").is_none());
}
// ---------------------------------------------------------------
// input_text / output_text content flattening
// ---------------------------------------------------------------
#[test]
fn chatgpt_flattens_input_text_content_parts_to_plain_string() {
let input = InputParam::Items(vec![InputItem::Message(InputMessage {
role: MessageRole::User,
content: MessageContent::Items(vec![
InputContent::InputText {
text: "first line".to_string(),
},
InputContent::InputText {
text: "second line".to_string(),
},
]),
})]);
let req = make_responses_request(input, None);
let request = ProviderRequestType::ResponsesAPIRequest(req);
let bytes = serialize_for_upstream(&request, ProviderId::ChatGPT).unwrap();
let wire: serde_json::Value = serde_json::from_slice(&bytes).unwrap();
let content = &wire["input"][0]["content"];
assert!(
content.is_string(),
"content should be flattened to a string, got: {}",
content
);
assert_eq!(content.as_str().unwrap(), "first line\nsecond line");
}
#[test]
fn chatgpt_flattens_output_text_content_parts() {
let input = InputParam::Items(vec![InputItem::Message(InputMessage {
role: MessageRole::Assistant,
content: MessageContent::Items(vec![InputContent::InputText {
text: "assistant reply".to_string(),
}]),
})]);
let req = make_responses_request(input, None);
let request = ProviderRequestType::ResponsesAPIRequest(req);
let bytes = serialize_for_upstream(&request, ProviderId::ChatGPT).unwrap();
let wire: serde_json::Value = serde_json::from_slice(&bytes).unwrap();
let content = &wire["input"][0]["content"];
assert!(content.is_string());
assert_eq!(content.as_str().unwrap(), "assistant reply");
}
#[test]
fn chatgpt_flattens_empty_input_text_content_parts() {
let input = InputParam::Items(vec![InputItem::Message(InputMessage {
role: MessageRole::Assistant,
content: MessageContent::Items(vec![InputContent::InputText {
text: "".to_string(),
}]),
})]);
let req = make_responses_request(input, None);
let request = ProviderRequestType::ResponsesAPIRequest(req);
let bytes = serialize_for_upstream(&request, ProviderId::ChatGPT).unwrap();
let wire: serde_json::Value = serde_json::from_slice(&bytes).unwrap();
let content = &wire["input"][0]["content"];
assert!(
content.is_string(),
"content should be flattened to a string, got: {}",
content
);
assert_eq!(content.as_str().unwrap(), "");
}
#[test]
fn chatgpt_preserves_plain_text_content_unchanged() {
let input = InputParam::Items(vec![InputItem::Message(InputMessage {
role: MessageRole::User,
content: MessageContent::Text("plain text message".to_string()),
})]);
let req = make_responses_request(input, None);
let request = ProviderRequestType::ResponsesAPIRequest(req);
let bytes = serialize_for_upstream(&request, ProviderId::ChatGPT).unwrap();
let wire: serde_json::Value = serde_json::from_slice(&bytes).unwrap();
let content = &wire["input"][0]["content"];
assert_eq!(content.as_str().unwrap(), "plain text message");
}
#[test]
fn non_chatgpt_does_not_flatten_content_parts() {
let input = InputParam::Items(vec![InputItem::Message(InputMessage {
role: MessageRole::User,
content: MessageContent::Items(vec![
InputContent::InputText {
text: "part one".to_string(),
},
InputContent::InputText {
text: "part two".to_string(),
},
]),
})]);
let req = make_responses_request(input, None);
let request = ProviderRequestType::ResponsesAPIRequest(req);
let bytes = serialize_for_upstream(&request, ProviderId::OpenAI).unwrap();
let wire: serde_json::Value = serde_json::from_slice(&bytes).unwrap();
let content = &wire["input"][0]["content"];
assert!(
content.is_array(),
"OpenAI should preserve array content, got: {}",
content
);
}
// ---------------------------------------------------------------
// Reasoning item compatibility
// ---------------------------------------------------------------
#[test]
fn chatgpt_adds_empty_summary_to_stored_reasoning_input_items() {
let req: ResponsesAPIRequest = serde_json::from_value(serde_json::json!({
"model": "gpt-5.5",
"input": [
{"type": "reasoning", "id": "rs_123"},
{"role": "user", "content": "Are you there?"}
],
"store": true,
"stream": true
}))
.unwrap();
let request = ProviderRequestType::ResponsesAPIRequest(req);
let bytes = serialize_for_upstream(&request, ProviderId::ChatGPT).unwrap();
let wire: serde_json::Value = serde_json::from_slice(&bytes).unwrap();
assert_eq!(
wire["input"][0]["summary"],
serde_json::Value::Array(Vec::new()),
"GPT-5.5 ChatGPT rejects reasoning input items without summary"
);
}
#[test]
fn chatgpt_drops_reasoning_item_references_when_store_false() {
let req: ResponsesAPIRequest = serde_json::from_value(serde_json::json!({
"model": "gpt-5.5",
"input": [
{"type": "reasoning", "id": "rs_123"},
{"role": "user", "content": "Are you there?"}
],
"store": false,
"stream": true
}))
.unwrap();
let request = ProviderRequestType::ResponsesAPIRequest(req);
let bytes = serialize_for_upstream(&request, ProviderId::ChatGPT).unwrap();
let wire: serde_json::Value = serde_json::from_slice(&bytes).unwrap();
let input = wire["input"].as_array().unwrap();
assert_eq!(input.len(), 1);
assert_eq!(input[0]["content"], "Are you there?");
}
#[test]
fn chatgpt_preserves_function_call_fields_with_id() {
let req: ResponsesAPIRequest = serde_json::from_value(serde_json::json!({
"model": "gpt-5.5",
"input": [
{
"type": "function_call",
"id": "fc_123",
"call_id": "call_123",
"name": "exec",
"arguments": "{}"
},
{"type": "function_call_output", "call_id": "call_123", "output": "ok"},
{"role": "user", "content": "continue"}
],
"store": false,
"stream": true
}))
.unwrap();
let request = ProviderRequestType::ResponsesAPIRequest(req);
let bytes = serialize_for_upstream(&request, ProviderId::ChatGPT).unwrap();
let wire: serde_json::Value = serde_json::from_slice(&bytes).unwrap();
assert_eq!(wire["input"][0]["type"], "function_call");
assert_eq!(wire["input"][0]["id"], "fc_123");
assert_eq!(wire["input"][0]["call_id"], "call_123");
assert_eq!(wire["input"][0]["name"], "exec");
assert_eq!(wire["input"][0]["arguments"], "{}");
}
#[test]
fn non_chatgpt_preserves_reasoning_item_reference_without_summary() {
let req: ResponsesAPIRequest = serde_json::from_value(serde_json::json!({
"model": "gpt-5.5",
"input": [
{"type": "reasoning", "id": "rs_123"},
{"role": "user", "content": "Are you there?"}
],
"stream": true
}))
.unwrap();
let request = ProviderRequestType::ResponsesAPIRequest(req);
let bytes = serialize_for_upstream(&request, ProviderId::OpenAI).unwrap();
let wire: serde_json::Value = serde_json::from_slice(&bytes).unwrap();
assert!(wire["input"][0].get("summary").is_none());
}
// ---------------------------------------------------------------
// Both fixes together (realistic ChatGPT payload)
// ---------------------------------------------------------------
#[test]
fn chatgpt_applies_both_fixes_together() {
let input = InputParam::Items(vec![
InputItem::Message(InputMessage {
role: MessageRole::User,
content: MessageContent::Items(vec![InputContent::InputText {
text: "Write a function".to_string(),
}]),
}),
InputItem::Message(InputMessage {
role: MessageRole::Assistant,
content: MessageContent::Items(vec![InputContent::InputText {
text: "def hello(): pass".to_string(),
}]),
}),
InputItem::Message(InputMessage {
role: MessageRole::User,
content: MessageContent::Items(vec![InputContent::InputText {
text: "Add a docstring".to_string(),
}]),
}),
]);
let req = make_responses_request(input, Some(16384));
let request = ProviderRequestType::ResponsesAPIRequest(req);
let bytes = serialize_for_upstream(&request, ProviderId::ChatGPT).unwrap();
let wire: serde_json::Value = serde_json::from_slice(&bytes).unwrap();
// max_output_tokens renamed
assert!(wire.get("max_output_tokens").is_none());
assert_eq!(wire.get("maxTokens").and_then(|v| v.as_i64()), Some(16384));
// All content arrays flattened
for (i, item) in wire["input"].as_array().unwrap().iter().enumerate() {
let content = &item["content"];
assert!(
content.is_string(),
"input[{}].content should be a string, got: {}",
i,
content
);
}
}
// ---------------------------------------------------------------
// Non-ResponsesAPI requests pass through unchanged
// ---------------------------------------------------------------
#[test]
fn chatgpt_chat_completions_request_passes_through() {
use crate::apis::openai::{ChatCompletionsRequest, Message, MessageContent as MC, Role};
let chat_req = ChatCompletionsRequest {
model: "gpt-5.4".to_string(),
messages: vec![Message {
role: Role::User,
content: Some(MC::Text("Hello".to_string())),
name: None,
tool_calls: None,
tool_call_id: None,
}],
max_completion_tokens: Some(1024),
..Default::default()
};
let request = ProviderRequestType::ChatCompletionsRequest(chat_req);
let bytes = serialize_for_upstream(&request, ProviderId::ChatGPT).unwrap();
let wire: serde_json::Value = serde_json::from_slice(&bytes).unwrap();
assert_eq!(
wire.get("max_completion_tokens").and_then(|v| v.as_i64()),
Some(1024)
);
}
// ---------------------------------------------------------------
// Normalize + serialize round-trip (full pipeline test)
// ---------------------------------------------------------------
#[test]
fn chatgpt_full_pipeline_normalize_then_serialize() {
let input = InputParam::Text("Hello, Codex!".to_string());
let req = make_responses_request(input, Some(8192));
let upstream_api = crate::clients::endpoints::SupportedUpstreamAPIs::OpenAIResponsesAPI(
OpenAIApi::Responses,
);
let mut request = ProviderRequestType::ResponsesAPIRequest(req);
// normalize_for_upstream sets store=false, stream=true, wraps input in Items
request
.normalize_for_upstream(ProviderId::ChatGPT, &upstream_api)
.expect("ChatGPT responses request should normalize");
// serialize_for_upstream then renames max_output_tokens and flattens content
let bytes = serialize_for_upstream(&request, ProviderId::ChatGPT).unwrap();
let wire: serde_json::Value = serde_json::from_slice(&bytes).unwrap();
assert!(wire.get("max_output_tokens").is_none());
assert_eq!(wire.get("maxTokens").and_then(|v| v.as_i64()), Some(8192));
assert_eq!(wire.get("store"), Some(&serde_json::Value::Bool(false)));
assert_eq!(wire.get("stream"), Some(&serde_json::Value::Bool(true)));
assert!(
wire["input"].is_array(),
"input should be an array after normalize"
);
}
}

View file

@ -196,6 +196,7 @@ impl TryFrom<ResponsesInputConverter> for Vec<Message> {
}
InputItem::FunctionCall {
item_type: _,
id: _,
name,
arguments,
call_id,
@ -513,15 +514,27 @@ impl TryFrom<ResponsesAPIRequest> for ChatCompletionsRequest {
description,
parameters,
strict,
} => converted_chat_tools.push(Tool {
tool_type: "function".to_string(),
function: crate::apis::openai::Function {
name,
description,
parameters: normalize_function_parameters(parameters, None),
strict,
},
}),
function,
} => {
let resolved_name = function
.as_ref()
.and_then(|f| f.name.clone())
.or_else(|| name.clone())
.unwrap_or_else(|| "".to_string());
let resolved_description = function
.as_ref()
.and_then(|f| f.description.clone())
.or_else(|| description.clone());
converted_chat_tools.push(Tool {
tool_type: "function".to_string(),
function: crate::apis::openai::Function {
name: resolved_name,
description: resolved_description,
parameters: normalize_function_parameters(parameters, None),
strict,
},
})
}
ResponsesTool::WebSearchPreview {
search_context_size,
user_location,
@ -803,10 +816,10 @@ impl TryFrom<ChatCompletionsRequest> for ConverseRequest {
.into_iter()
.map(|tool| BedrockTool::ToolSpec {
tool_spec: ToolSpecDefinition {
name: tool.function.name,
description: tool.function.description,
name: tool.function.name.clone(),
description: tool.function.description.clone(),
input_schema: ToolInputSchema {
json: tool.function.parameters,
json: tool.function.parameters.clone(),
},
},
})
@ -1349,7 +1362,7 @@ mod tests {
output: serde_json::json!({"status":"ok","stdout":"hello"}),
}]),
tools: Some(vec![ResponsesTool::Function {
name: "exec_command".to_string(),
name: Some("exec_command".to_string()),
description: Some("Execute a shell command".to_string()),
parameters: Some(serde_json::json!({
"type": "object",
@ -1358,6 +1371,7 @@ mod tests {
},
"required": ["cmd"]
})),
function: None,
strict: Some(false),
}]),
include: None,
@ -1410,6 +1424,7 @@ mod tests {
}),
InputItem::FunctionCall {
item_type: "function_call".to_string(),
id: Some("fc_abc123".to_string()),
name: "exec_command".to_string(),
arguments: "{\"cmd\":\"pwd\"}".to_string(),
call_id: "toolu_abc123".to_string(),

View file

@ -27,8 +27,8 @@ use hermesllm::clients::endpoints::SupportedAPIsFromClient;
use hermesllm::providers::response::ProviderResponse;
use hermesllm::providers::streaming_response::ProviderStreamResponse;
use hermesllm::{
DecodedFrame, ProviderId, ProviderRequest, ProviderRequestType, ProviderResponseType,
ProviderStreamResponseType,
serialize_for_upstream, DecodedFrame, ProviderId, ProviderRequest, ProviderRequestType,
ProviderResponseType, ProviderStreamResponseType,
};
pub struct StreamContext {
@ -56,6 +56,7 @@ pub struct StreamContext {
http_protocol: Option<String>,
sse_buffer: Option<SseStreamBuffer>,
sse_chunk_processor: Option<SseChunkProcessor>,
deferred_responses_stream_body: Vec<u8>,
}
impl StreamContext {
@ -87,6 +88,7 @@ impl StreamContext {
http_protocol: None,
sse_buffer: None,
sse_chunk_processor: None,
deferred_responses_stream_body: Vec::new(),
}
}
@ -260,6 +262,18 @@ impl StreamContext {
self.set_http_request_header("content-length", None);
}
fn force_identity_response_encoding_for_streams(&mut self) {
// The WASM gateway parses upstream SSE bytes directly. If the client's
// Accept-Encoding (for example Python requests' default gzip/deflate/br)
// is forwarded upstream, ChatGPT Codex can return compressed SSE chunks;
// those are not parseable as SSE here and can surface as a clean HTTP 200
// with only lifecycle events reaching the client. Ask upstream for plain
// bytes whenever this filter is responsible for streaming transforms.
if self.streaming_response {
self.set_http_request_header("accept-encoding", Some("identity"));
}
}
fn save_ratelimit_header(&mut self) {
self.ratelimit_selector = self
.get_http_request_header(RATELIMIT_SELECTOR_HEADER_KEY)
@ -902,6 +916,7 @@ impl HttpContext for StreamContext {
}
self.delete_content_length_header();
self.force_identity_response_encoding_for_streams();
self.save_ratelimit_header();
self.request_id = self.get_http_request_header(REQUEST_ID_HEADER);
@ -1082,14 +1097,15 @@ impl HttpContext for StreamContext {
);
return Action::Pause;
}
debug!(
"request_id={}: upstream request payload: {}",
self.request_identifier(),
String::from_utf8_lossy(&request.to_bytes().unwrap_or_default())
);
match request.to_bytes() {
Ok(bytes) => bytes,
match serialize_for_upstream(&request, self.get_provider_id()) {
Ok(bytes) => {
debug!(
"request_id={}: upstream request payload: {}",
self.request_identifier(),
String::from_utf8_lossy(&bytes)
);
bytes
}
Err(e) => {
warn!(
"request_id={}: failed to serialize request body: {}",
@ -1179,6 +1195,25 @@ impl HttpContext for StreamContext {
self.request_identifier(),
body_size
);
if self.streaming_response {
if let Some(buffer) = self.sse_buffer.as_mut() {
buffer.finalize_stream();
let bytes = buffer.to_bytes();
if matches!(
self.client_api,
Some(SupportedAPIsFromClient::OpenAIResponsesAPI(_))
) {
self.deferred_responses_stream_body
.extend_from_slice(&bytes);
let full_body = std::mem::take(&mut self.deferred_responses_stream_body);
if !full_body.is_empty() {
set_response_body_allow_growth(self, body_size, &full_body);
}
} else if !bytes.is_empty() {
self.set_http_response_body(0, body_size, &bytes);
}
}
}
self.handle_end_of_request_metrics_and_traces(current_time);
return Action::Continue;
}
@ -1243,7 +1278,37 @@ impl HttpContext for StreamContext {
if self.streaming_response {
match self.handle_streaming_response(&body, provider_id) {
Ok(serialized_body) => {
self.set_http_response_body(0, body_size, &serialized_body);
let terminal_bytes = if end_of_stream {
if let Some(buffer) = self.sse_buffer.as_mut() {
buffer.finalize_stream();
buffer.to_bytes()
} else {
Vec::new()
}
} else {
Vec::new()
};
if matches!(
self.client_api,
Some(SupportedAPIsFromClient::OpenAIResponsesAPI(_))
) {
self.deferred_responses_stream_body
.extend_from_slice(&serialized_body);
self.deferred_responses_stream_body
.extend_from_slice(&terminal_bytes);
if end_of_stream {
let full_body =
std::mem::take(&mut self.deferred_responses_stream_body);
set_response_body_allow_growth(self, body_size, &full_body);
} else {
self.set_http_response_body(0, body_size, &[]);
}
} else {
set_response_body_allow_growth(self, body_size, &serialized_body);
if !terminal_bytes.is_empty() {
self.set_http_response_body(serialized_body.len(), 0, &terminal_bytes);
}
}
}
Err(action) => return action,
}
@ -1260,6 +1325,17 @@ impl HttpContext for StreamContext {
}
}
fn set_response_body_allow_growth(ctx: &dyn HttpContext, original_size: usize, body: &[u8]) {
if body.len() <= original_size {
ctx.set_http_response_body(0, original_size, body);
return;
}
let (replacement, extra) = body.split_at(original_size);
ctx.set_http_response_body(0, original_size, replacement);
ctx.set_http_response_body(original_size, 0, extra);
}
fn current_time_ns() -> u128 {
SystemTime::now()
.duration_since(UNIX_EPOCH)

View file

@ -48,6 +48,7 @@ bash test_chatgpt.sh
## Available models
```
chatgpt/gpt-5.5
chatgpt/gpt-5.4
chatgpt/gpt-5.3-codex
chatgpt/gpt-5.2