diff --git a/crates/hermesllm/src/apis/openai.rs b/crates/hermesllm/src/apis/openai.rs index bd9b3217..f2534961 100644 --- a/crates/hermesllm/src/apis/openai.rs +++ b/crates/hermesllm/src/apis/openai.rs @@ -349,6 +349,7 @@ pub struct ChatCompletionsResponse { pub choices: Vec, pub usage: Usage, pub system_fingerprint: Option, + pub service_tier: Option, } /// Finish reason for completion @@ -1160,4 +1161,84 @@ mod tests { let invalid_result: Result = serde_json::from_value(json!("invalid")); assert!(invalid_result.is_err()); } + + #[test] + fn test_chat_completions_response_with_service_tier() { + // Test that ChatCompletionsResponse can deserialize OpenAI responses with service_tier field + let json_response = r#"{ + "id": "chatcmpl-CAJc2Df6QCc7Mv3RP0Cf2xlbDV1x2", + "object": "chat.completion", + "created": 1756574706, + "model": "gpt-4o-2024-08-06", + "choices": [{ + "index": 0, + "message": { + "role": "assistant", + "content": "Test response content", + "annotations": [] + }, + "finish_reason": "stop" + }], + "usage": { + "prompt_tokens": 65, + "completion_tokens": 184, + "total_tokens": 249, + "prompt_tokens_details": { + "cached_tokens": 0, + "audio_tokens": 0 + }, + "completion_tokens_details": { + "reasoning_tokens": 0, + "audio_tokens": 0, + "accepted_prediction_tokens": 0, + "rejected_prediction_tokens": 0 + } + }, + "service_tier": "default", + "system_fingerprint": "fp_f33640a400" + }"#; + + let response: ChatCompletionsResponse = serde_json::from_str(json_response).unwrap(); + + assert_eq!(response.id, "chatcmpl-CAJc2Df6QCc7Mv3RP0Cf2xlbDV1x2"); + assert_eq!(response.object, "chat.completion"); + assert_eq!(response.created, 1756574706); + assert_eq!(response.model, "gpt-4o-2024-08-06"); + assert_eq!(response.service_tier, Some("default".to_string())); + assert_eq!(response.system_fingerprint, Some("fp_f33640a400".to_string())); + assert_eq!(response.choices.len(), 1); + assert_eq!(response.usage.prompt_tokens, 65); + assert_eq!(response.usage.completion_tokens, 184); + assert_eq!(response.usage.total_tokens, 249); + } + + #[test] + fn test_chat_completions_response_without_service_tier() { + // Test that ChatCompletionsResponse can deserialize responses without service_tier (backward compatibility) + let json_response = r#"{ + "id": "chatcmpl-123", + "object": "chat.completion", + "created": 1234567890, + "model": "gpt-4", + "choices": [{ + "index": 0, + "message": { + "role": "assistant", + "content": "Test response" + }, + "finish_reason": "stop" + }], + "usage": { + "prompt_tokens": 10, + "completion_tokens": 20, + "total_tokens": 30 + } + }"#; + + let response: ChatCompletionsResponse = serde_json::from_str(json_response).unwrap(); + + assert_eq!(response.id, "chatcmpl-123"); + assert_eq!(response.service_tier, None); // Should be None when not present + assert_eq!(response.system_fingerprint, None); + } } diff --git a/crates/hermesllm/src/clients/transformer.rs b/crates/hermesllm/src/clients/transformer.rs index 0e8c3705..8170a53d 100644 --- a/crates/hermesllm/src/clients/transformer.rs +++ b/crates/hermesllm/src/clients/transformer.rs @@ -211,6 +211,7 @@ impl TryFrom for ChatCompletionsResponse { choices: vec![choice], usage, system_fingerprint: None, + service_tier: None, }) } } diff --git a/crates/llm_gateway/src/stream_context.rs b/crates/llm_gateway/src/stream_context.rs index 6c0977f0..f4dc17c9 100644 --- a/crates/llm_gateway/src/stream_context.rs +++ b/crates/llm_gateway/src/stream_context.rs @@ -446,7 +446,6 @@ impl StreamContext { } else { warn!("No usage information found in response"); } - // Serialize the normalized response back to JSON bytes match serde_json::to_vec(&response) { Ok(bytes) => Ok(bytes), @@ -743,8 +742,7 @@ impl HttpContext for StreamContext { if let Some(supported_api) = supported_api_opt { match self.handle_streaming_response(&body, supported_api, provider_id) { Ok(serialized_body) => { - // Pass-through: let the original streaming response continue unchanged - self.set_http_response_body(0, serialized_body.len(), &serialized_body); + self.set_http_response_body(0, body.len(), &serialized_body); } Err(action) => return action, } @@ -755,7 +753,7 @@ impl HttpContext for StreamContext { if let Some(supported_api) = supported_api_opt { match self.handle_non_streaming_response(&body, supported_api, provider_id) { Ok(serialized_body) => { - self.set_http_response_body(0, serialized_body.len(), &serialized_body); + self.set_http_response_body(0, body.len(), &serialized_body); } Err(action) => return action, }