remove 9090 and consolite to port 12000 for llm routing

2026-06-23 15:38:07 +02:00 · 2025-05-16 17:10:37 -07:00 · 2025-05-16 17:10:37 -07:00 · 7feb168a06
commit 7feb168a06
parent ef65527ff0
5 changed files with 32 additions and 69 deletions
--- a/crates/brightstaff/src/handlers/chat_completions.rs
+++ b/crates/brightstaff/src/handlers/chat_completions.rs
@ -5,7 +5,6 @@ use common::api::open_ai::ChatCompletionsRequest;
 use common::consts::ARCH_PROVIDER_HINT_HEADER;
 use http_body_util::combinators::BoxBody;
 use http_body_util::{BodyExt, Full, StreamBody};
-use hyper::body::Body;
 use hyper::body::Frame;
 use hyper::header::{self};
 use hyper::{Request, Response, StatusCode};
@ -22,18 +21,11 @@ fn full<T: Into<Bytes>>(chunk: T) -> BoxBody<Bytes, hyper::Error> {
        .boxed()
 }

-pub async fn chat_completion(
+pub async fn chat_completions(
    request: Request<hyper::body::Incoming>,
    router_service: Arc<RouterService>,
    llm_provider_endpoint: String,
 ) -> Result<Response<BoxBody<Bytes, hyper::Error>>, hyper::Error> {
-    let max = request.body().size_hint().upper().unwrap_or(u64::MAX);
-    if max > 1024 * 1024 {
-        let error_msg = format!("Request body too large: {} bytes", max);
-        let mut too_large = Response::new(full(error_msg));
-        *too_large.status_mut() = StatusCode::PAYLOAD_TOO_LARGE;
-        return Ok(too_large);
-    }

    let mut request_headers = request.headers().clone();

--- a/crates/brightstaff/src/main.rs
+++ b/crates/brightstaff/src/main.rs
@ -1,4 +1,4 @@
-use brightstaff::handlers::chat_completions::chat_completion;
+use brightstaff::handlers::chat_completions::chat_completions;
 use brightstaff::router::llm_router::RouterService;
 use bytes::Bytes;
 use common::configuration::Configuration;
@ -89,16 +89,23 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
    );

    let llm_provider_endpoint = env::var("LLM_PROVIDER_ENDPOINT")
-        .unwrap_or_else(|_| "http://localhost:12000/v1/chat/completions".to_string());
+        .unwrap_or_else(|_| "http://localhost:12001/v1/chat/completions".to_string());

    info!("llm provider endpoint: {}", llm_provider_endpoint);
    info!("Listening on http://{}", bind_address);
    let listener = TcpListener::bind(bind_address).await?;

+
+    // if routing is null then return gpt-4o as model name
+    let model = arch_config.routing.as_ref().map_or_else(
+        || "gpt-4o".to_string(),
+        |routing| routing.model.clone(),
+    );
+
    let router_service: Arc<RouterService> = Arc::new(RouterService::new(
        arch_config.llm_providers.clone(),
        llm_provider_endpoint.clone(),
-        arch_config.routing.as_ref().unwrap().model.clone(),
+        model,
    ));

    loop {
@ -123,7 +130,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
            async move {
                match (req.method(), req.uri().path()) {
                    (&Method::POST, "/v1/chat/completions") => {
-                        chat_completion(req, router_service, llm_provider_endpoint)
+                        chat_completions(req, router_service, llm_provider_endpoint)
                            .with_context(parent_cx)
                            .await
                    }
--- a/crates/brightstaff/src/router/llm_router.rs
+++ b/crates/brightstaff/src/router/llm_router.rs
@ -17,6 +17,7 @@ pub struct RouterService {
    client: reqwest::Client,
    router_model: Arc<dyn RouterModel>,
    routing_model_name: String,
+    llm_usage_defined: bool,
 }

 #[derive(Debug, Error)]
@ -73,6 +74,7 @@ impl RouterService {
            client: reqwest::Client::new(),
            router_model,
            routing_model_name,
+            llm_usage_defined: !providers_with_usage.is_empty(),
        }
    }

@ -81,6 +83,11 @@ impl RouterService {
        messages: &[Message],
        trace_parent: Option<String>,
    ) -> Result<Option<String>> {
+
+        if !self.llm_usage_defined {
+            return Ok(None);
+        }
+
        let router_request = self.router_model.generate_request(messages);

        info!(
--- a/crates/brightstaff/src/router/router_model_v1.rs
+++ b/crates/brightstaff/src/router/router_model_v1.rs
@ -82,6 +82,9 @@ impl RouterModel for RouterModelV1 {
    }

    fn parse_response(&self, content: &str) -> Result<Option<String>> {
+        if content.is_empty() {
+            return Ok(None);
+        }
        let router_resp_fixed = fix_json_response(content);
        info!(
            "router response (fixed): {}",
@ -226,6 +229,11 @@ fn test_parse_response() {
    let result = router.parse_response(input).unwrap();
    assert_eq!(result, None);

+    // Case 4.1: empty string
+    let input = r#""#;
+    let result = router.parse_response(input).unwrap();
+    assert_eq!(result, None);
+
    // Case 5: Malformed JSON
    let input = r#"{"route": "route1""#; // missing closing }
    let result = router.parse_response(input);