add support for v1/messages and transformations (#558)

* pushing draft PR

* transformations are working. Now need to add some tests next

* updated tests and added necessary response transformations for Anthropics' message response object

* fixed bugs for integration tests

* fixed doc tests

* fixed serialization issues with enums on response

* adding some debug logs to help

* fixed issues with non-streaming responses

* updated the stream_context to update response bytes

* the serialized bytes length must be set in the response side

* fixed the debug statement that was causing the integration tests for wasm to fail

* fixing json parsing errors

* intentionally removing the headers

* making sure that we convert the raw bytes to the correct provider type upstream

* fixing non-streaming responses to tranform correctly

* /v1/messages works with transformations to and from /v1/chat/completions

* updating the CLI and demos to support anthropic vs. claude

* adding the anthropic key to the preference based routing tests

* fixed test cases and added more structured logs

* fixed integration tests and cleaned up logs

* added python client tests for anthropic and openai

* cleaned up logs and fixed issue with connectivity for llm gateway in weather forecast demo

* fixing the tests. python dependency order was broken

* updated the openAI client to fix demos

* removed the raw response debug statement

* fixed the dup cloning issue and cleaned up the ProviderRequestType enum and traits

* fixing logs

* moved away from string literals to consts

* fixed streaming from Anthropic Client to OpenAI

* removed debug statement that would likely trip up integration tests

* fixed integration tests for llm_gateway

* cleaned up test cases and removed unnecessary crates

* fixing comments from PR

* fixed bug whereby we were sending an OpenAIChatCompletions request object to llm_gateway even though the request may have been AnthropicMessages

---------

Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-4.local>
Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-9.local>
Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-10.local>
Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-41.local>
Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-136.local>
This commit is contained in:
Salman Paracha 2025-09-10 07:40:30 -07:00 committed by GitHub
parent bb71d041a0
commit fb0581fd39
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
38 changed files with 2842 additions and 919 deletions

View file

@ -1,9 +1,10 @@
use brightstaff::handlers::chat_completions::chat_completions;
use brightstaff::handlers::chat_completions::chat;
use brightstaff::handlers::models::list_models;
use brightstaff::router::llm_router::RouterService;
use brightstaff::utils::tracing::init_tracer;
use bytes::Bytes;
use common::configuration::Configuration;
use common::consts::{CHAT_COMPLETIONS_PATH, MESSAGES_PATH};
use http_body_util::{combinators::BoxBody, BodyExt, Empty};
use hyper::body::Incoming;
use hyper::server::conn::http1;
@ -67,10 +68,10 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
&serde_json::to_string(arch_config.as_ref()).unwrap()
);
let llm_provider_endpoint = env::var("LLM_PROVIDER_ENDPOINT")
.unwrap_or_else(|_| "http://localhost:12001/v1/chat/completions".to_string());
let llm_provider_url = env::var("LLM_PROVIDER_ENDPOINT")
.unwrap_or_else(|_| "http://localhost:12001".to_string());
info!("llm provider endpoint: {}", llm_provider_endpoint);
info!("llm provider url: {}", llm_provider_url);
info!("listening on http://{}", bind_address);
let listener = TcpListener::bind(bind_address).await?;
@ -88,7 +89,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
let router_service: Arc<RouterService> = Arc::new(RouterService::new(
arch_config.llm_providers.clone(),
llm_provider_endpoint.clone(),
llm_provider_url.clone() + CHAT_COMPLETIONS_PATH,
routing_model_name,
routing_llm_provider,
));
@ -99,19 +100,21 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
let io = TokioIo::new(stream);
let router_service: Arc<RouterService> = Arc::clone(&router_service);
let llm_provider_endpoint = llm_provider_endpoint.clone();
let llm_provider_url = llm_provider_url.clone();
let llm_providers = llm_providers.clone();
let service = service_fn(move |req| {
let router_service = Arc::clone(&router_service);
let parent_cx = extract_context_from_request(&req);
let llm_provider_endpoint = llm_provider_endpoint.clone();
let llm_provider_url = llm_provider_url.clone();
let llm_providers = llm_providers.clone();
async move {
match (req.method(), req.uri().path()) {
(&Method::POST, "/v1/chat/completions") => {
chat_completions(req, router_service, llm_provider_endpoint)
(&Method::POST, CHAT_COMPLETIONS_PATH | MESSAGES_PATH) => {
let fully_qualified_url = format!("{}{}", llm_provider_url, req.uri().path());
chat(req, router_service, fully_qualified_url)
.with_context(parent_cx)
.await
}