Add support for v1/responses API (#622)

* making first commit. still need to work on streaming respones * making first commit. still need to work on streaming respones * stream buffer implementation with tests * adding grok API keys to workflow * fixed changes based on code review * adding support for bedrock models * fixed issues with translation to claude code --------- Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-342.local>
2026-05-07 23:02:43 +02:00 · 2025-12-03 14:58:26 -08:00 · 2025-12-03 14:58:26 -08:00 · a448c6e9cb
commit a448c6e9cb
parent b01a81927d
38 changed files with 7015 additions and 2955 deletions
--- a/crates/brightstaff/src/handlers/router.rs
+++ b/crates/brightstaff/src/handlers/router.rs
@ -3,7 +3,7 @@ use common::configuration::{ModelAlias, ModelUsagePreference};
 use common::consts::{ARCH_IS_STREAMING_HEADER, ARCH_PROVIDER_HINT_HEADER};
 use hermesllm::apis::openai::ChatCompletionsRequest;
 use hermesllm::clients::endpoints::SupportedUpstreamAPIs;
-use hermesllm::clients::SupportedAPIs;
+use hermesllm::clients::SupportedAPIsFromClient;
 use hermesllm::{ProviderRequest, ProviderRequestType};
 use http_body_util::combinators::BoxBody;
 use http_body_util::{BodyExt, Full};
@ -39,7 +39,7 @@ pub async fn router_chat(

    let mut client_request = match ProviderRequestType::try_from((
        &chat_request_bytes[..],
-        &SupportedAPIs::from_endpoint(request_path.as_str()).unwrap(),
+        &SupportedAPIsFromClient::from_endpoint(request_path.as_str()).unwrap(),
    )) {
        Ok(request) => request,
        Err(err) => {
@ -58,7 +58,7 @@ pub async fn router_chat(
    let resolved_model = if let Some(model_aliases) = model_aliases.as_ref() {
        if let Some(model_alias) = model_aliases.get(&model_from_request) {
            debug!(
-                "Model Alias: 'From {}' -> 'To{}'",
+                "Model Alias: 'From {}' -> 'To {}'",
                model_from_request, model_alias.target
            );
            model_alias.target.clone()
@ -91,10 +91,11 @@ pub async fn router_chat(
            Ok(
                ProviderRequestType::MessagesRequest(_)
                | ProviderRequestType::BedrockConverse(_)
-                | ProviderRequestType::BedrockConverseStream(_),
+                | ProviderRequestType::BedrockConverseStream(_)
+                | ProviderRequestType::ResponsesAPIRequest(_),
            ) => {
                // This should not happen after conversion to OpenAI format
-                warn!("Unexpected: got MessagesRequest after converting to OpenAI format");
+                warn!("Unexpected: got non-ChatCompletions request after converting to OpenAI format");
                let err_msg = "Request conversion failed".to_string();
                let mut bad_request = Response::new(full(err_msg));
                *bad_request.status_mut() = StatusCode::BAD_REQUEST;
--- a/crates/brightstaff/src/main.rs
+++ b/crates/brightstaff/src/main.rs
@ -6,7 +6,7 @@ use brightstaff::router::llm_router::RouterService;
 use brightstaff::utils::tracing::init_tracer;
 use bytes::Bytes;
 use common::configuration::Configuration;
-use common::consts::{CHAT_COMPLETIONS_PATH, MESSAGES_PATH};
+use common::consts::{CHAT_COMPLETIONS_PATH, MESSAGES_PATH, OPENAI_RESPONSES_API_PATH};
 use http_body_util::{combinators::BoxBody, BodyExt, Empty};
 use hyper::body::Incoming;
 use hyper::server::conn::http1;
@ -123,7 +123,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {

            async move {
                match (req.method(), req.uri().path()) {
-                    (&Method::POST, CHAT_COMPLETIONS_PATH | MESSAGES_PATH) => {
+                    (&Method::POST, CHAT_COMPLETIONS_PATH | MESSAGES_PATH | OPENAI_RESPONSES_API_PATH) => {
                        let fully_qualified_url =
                            format!("{}{}", llm_provider_url, req.uri().path());
                        router_chat(req, router_service, fully_qualified_url, model_aliases)