add support for v1/messages and transformations (#558)

* pushing draft PR * transformations are working. Now need to add some tests next * updated tests and added necessary response transformations for Anthropics' message response object * fixed bugs for integration tests * fixed doc tests * fixed serialization issues with enums on response * adding some debug logs to help * fixed issues with non-streaming responses * updated the stream_context to update response bytes * the serialized bytes length must be set in the response side * fixed the debug statement that was causing the integration tests for wasm to fail * fixing json parsing errors * intentionally removing the headers * making sure that we convert the raw bytes to the correct provider type upstream * fixing non-streaming responses to tranform correctly * /v1/messages works with transformations to and from /v1/chat/completions * updating the CLI and demos to support anthropic vs. claude * adding the anthropic key to the preference based routing tests * fixed test cases and added more structured logs * fixed integration tests and cleaned up logs * added python client tests for anthropic and openai * cleaned up logs and fixed issue with connectivity for llm gateway in weather forecast demo * fixing the tests. python dependency order was broken * updated the openAI client to fix demos * removed the raw response debug statement * fixed the dup cloning issue and cleaned up the ProviderRequestType enum and traits * fixing logs * moved away from string literals to consts * fixed streaming from Anthropic Client to OpenAI * removed debug statement that would likely trip up integration tests * fixed integration tests for llm_gateway * cleaned up test cases and removed unnecessary crates * fixing comments from PR * fixed bug whereby we were sending an OpenAIChatCompletions request object to llm_gateway even though the request may have been AnthropicMessages --------- Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-4.local> Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-9.local> Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-10.local> Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-41.local> Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-136.local>
2026-04-27 09:46:28 +02:00 · 2025-09-10 07:40:30 -07:00 · 2025-09-10 07:40:30 -07:00 · fb0581fd39
commit fb0581fd39
parent bb71d041a0
38 changed files with 2842 additions and 919 deletions
--- a/crates/hermesllm/src/lib.rs
+++ b/crates/hermesllm/src/lib.rs
@ -4,12 +4,16 @@
 pub mod providers;
 pub mod apis;
 pub mod clients;
-
 // Re-export important types and traits
 pub use providers::request::{ProviderRequestType, ProviderRequest, ProviderRequestError};
-pub use providers::response::{ProviderResponseType, ProviderResponse, ProviderStreamResponse, ProviderStreamResponseIter, ProviderResponseError, TokenUsage};
+pub use providers::response::{ProviderResponseType, ProviderStreamResponseType, ProviderResponse, ProviderStreamResponse, ProviderResponseError, TokenUsage, SseEvent, SseStreamIter};
 pub use providers::id::ProviderId;
-pub use providers::adapters::{has_compatible_api, supported_apis};
+
+
+//TODO: Refactor such that commons doesn't depend on Hermes. For now this will clean up strings
+pub const CHAT_COMPLETIONS_PATH: &str = "/v1/chat/completions";
+pub const MESSAGES_PATH: &str = "/v1/messages";
+

 #[cfg(test)]
 mod tests {
@ -23,72 +27,51 @@ mod tests {
        assert_eq!(ProviderId::from("arch"), ProviderId::Arch);
    }

-    #[test]
-    fn test_provider_api_compatibility() {
-        assert!(has_compatible_api(&ProviderId::OpenAI, "/v1/chat/completions"));
-        assert!(!has_compatible_api(&ProviderId::OpenAI, "/v1/embeddings"));
-    }
-
-    #[test]
-    fn test_provider_supported_apis() {
-        let apis = supported_apis(&ProviderId::OpenAI);
-        assert!(apis.contains(&"/v1/chat/completions"));
-
-        // Test that provider supports the expected API endpoints
-        assert!(has_compatible_api(&ProviderId::OpenAI, "/v1/chat/completions"));
-    }
-
-    #[test]
-    fn test_provider_request_parsing() {
-        // Test with a sample JSON request
-        let json_request = r#"{
-            "model": "gpt-4",
-            "messages": [
-                {
-                    "role": "system",
-                    "content": "You are a helpful assistant"
-                },
-                {
-                    "role": "user",
-                    "content": "Hello!"
-                }
-            ]
-        }"#;
-
-        let result: Result<ProviderRequestType, std::io::Error> = ProviderRequestType::try_from(json_request.as_bytes());
-        assert!(result.is_ok());
-
-        let request = result.unwrap();
-        assert_eq!(request.model(), "gpt-4");
-        assert_eq!(request.get_recent_user_message(), Some("Hello!".to_string()));
-    }
-
    #[test]
    fn test_provider_streaming_response() {
        // Test streaming response parsing with sample SSE data
-        let sse_data = r#"data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1694268190,"model":"gpt-4","choices":[{"index":0,"delta":{"role":"assistant","content":"Hello"},"finish_reason":null}]}
+    let sse_data = r#"data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1694268190,"model":"gpt-4","choices":[{"index":0,"delta":{"role":"assistant","content":"Hello"},"finish_reason":null}]}

-data: [DONE]
-"#;
+    data: [DONE]
+    "#;

-        let result = ProviderStreamResponseIter::try_from((sse_data.as_bytes(), &ProviderId::OpenAI));
-        assert!(result.is_ok());
+    use crate::clients::endpoints::SupportedAPIs;
+    let client_api = SupportedAPIs::OpenAIChatCompletions(crate::apis::OpenAIApi::ChatCompletions);
+    let upstream_api =  SupportedAPIs::OpenAIChatCompletions(crate::apis::OpenAIApi::ChatCompletions);

-        let mut streaming_response = result.unwrap();
+    // Test the new simplified architecture - create SseStreamIter directly
+    let sse_iter = SseStreamIter::try_from(sse_data.as_bytes());
+    assert!(sse_iter.is_ok());

-        // Test that we can iterate over chunks - it's just an iterator now!
-        let first_chunk = streaming_response.next();
-        assert!(first_chunk.is_some());
+    let mut streaming_iter = sse_iter.unwrap();

-        let chunk_result = first_chunk.unwrap();
-        assert!(chunk_result.is_ok());
+    // Test that we can iterate over SseEvents
+    let first_event = streaming_iter.next();
+    assert!(first_event.is_some());

-        let chunk = chunk_result.unwrap();
-        assert_eq!(chunk.content_delta(), Some("Hello"));
-        assert!(!chunk.is_final());
+    let sse_event = first_event.unwrap();

-        // Test that stream ends properly
-        let final_chunk = streaming_response.next();
-        assert!(final_chunk.is_none());
+    // Test SseEvent properties
+    assert!(!sse_event.is_done());
+    assert!(sse_event.data.as_ref().unwrap().contains("Hello"));
+
+    // Test that we can parse the event into a provider stream response
+    let transformed_event = SseEvent::try_from((sse_event, &client_api, &upstream_api));
+    if let Err(e) = &transformed_event {
+        println!("Transform error: {:?}", e);
+    }
+    assert!(transformed_event.is_ok());
+
+    let transformed_event = transformed_event.unwrap();
+    let provider_response = transformed_event.provider_response();
+    assert!(provider_response.is_ok());
+
+    let stream_response = provider_response.unwrap();
+    assert_eq!(stream_response.content_delta(), Some("Hello"));
+    assert!(!stream_response.is_final());
+
+    // Test that stream ends properly with [DONE] (SseStreamIter should stop before [DONE])
+    let final_event = streaming_iter.next();
+    assert!(final_event.is_none()); // Should be None because iterator stops at [DONE]
    }
 }