diff --git a/crates/hermesllm/src/providers/response.rs b/crates/hermesllm/src/providers/response.rs index c2497472..b1d387e3 100644 --- a/crates/hermesllm/src/providers/response.rs +++ b/crates/hermesllm/src/providers/response.rs @@ -104,6 +104,19 @@ impl Iterator for ProviderStreamResponseIter { } } } + +// Helper to serialize only the inner struct, not the enum wrapper. +// This avoids the problem where serde serializes the enum variant as a wrapper object in JSON. +impl ProviderResponseType { + /// Serialize the response as JSON bytes, omitting the enum wrapper. + pub fn as_json_bytes(&self) -> Result, serde_json::Error> { + match self { + ProviderResponseType::ChatCompletionsResponse(resp) => serde_json::to_vec(resp), + ProviderResponseType::MessagesResponse(resp) => serde_json::to_vec(resp), + } + } +} + pub trait ProviderResponse: Send + Sync { /// Get usage information if available - returns dynamic trait object fn usage(&self) -> Option<&dyn TokenUsage>; diff --git a/crates/llm_gateway/src/stream_context.rs b/crates/llm_gateway/src/stream_context.rs index ed13d09a..b41396fd 100644 --- a/crates/llm_gateway/src/stream_context.rs +++ b/crates/llm_gateway/src/stream_context.rs @@ -681,7 +681,7 @@ impl HttpContext for StreamContext { match (supported_api, self.resolved_api.as_ref()) { (Some(supported_api), Some(_)) => { match ProviderResponseType::try_from((&body[..], supported_api, &provider_id)) { - Ok(response) => match serde_json::to_vec(&response) { + Ok(response) => match response.as_json_bytes() { Ok(bytes) => { self.set_http_response_body(0, bytes.len(), &bytes); }