updating the implementation of /v1/chat/completions to use the generi… (#548)

* updating the implementation of /v1/chat/completions to use the generic provider interfaces

* saving changes, although we will need a small re-factor after this as well

* more refactoring changes, getting close

* more refactoring changes to avoid unecessary re-direction and duplication

* more clean up

* more refactoring

* more refactoring to clean code and make stream_context.rs work

* removing unecessary trait implemenations

* some more clean-up

* fixed bugs

* fixing test cases, and making sure all references to the ChatCOmpletions* objects point to the new types

* refactored changes to support enum dispatch

* removed the dependency on try_streaming_from_bytes into a try_from trait implementation

* updated readme based on new usage

* updated code based on code review comments

---------

Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-2.local>
Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-4.local>
This commit is contained in:
Salman Paracha 2025-08-20 12:55:29 -07:00 committed by GitHub
parent 1fdde8181a
commit 89ab51697a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
22 changed files with 1044 additions and 972 deletions

View file

@ -3,7 +3,7 @@ use std::sync::Arc;
use bytes::Bytes;
use common::configuration::ModelUsagePreference;
use common::consts::ARCH_PROVIDER_HINT_HEADER;
use hermesllm::providers::openai::types::ChatCompletionsRequest;
use hermesllm::apis::openai::ChatCompletionsRequest;
use http_body_util::combinators::BoxBody;
use http_body_util::{BodyExt, Full, StreamBody};
use hyper::body::Frame;
@ -93,7 +93,7 @@ pub async fn chat_completions(
chat_completion_request.metadata.and_then(|metadata| {
metadata
.get("archgw_preference_config")
.and_then(|value| value.as_str().map(String::from))
.map(|value| value.to_string())
});
let usage_preferences: Option<Vec<ModelUsagePreference>> = usage_preferences_str
@ -105,9 +105,7 @@ pub async fn chat_completions(
.messages
.last()
.map_or("None".to_string(), |msg| {
msg.content.as_ref().map_or("None".to_string(), |content| {
content.to_string().replace('\n', "\\n")
})
msg.content.to_string().replace('\n', "\\n")
});
const MAX_MESSAGE_LENGTH: usize = 50;

View file

@ -1,6 +1,6 @@
use bytes::Bytes;
use common::configuration::{IntoModels, LlmProvider};
use hermesllm::providers::openai::types::Models;
use hermesllm::apis::openai::Models;
use http_body_util::{combinators::BoxBody, BodyExt, Full};
use hyper::{Response, StatusCode};
use serde_json;