mirror of
https://github.com/katanemo/plano.git
synced 2026-06-23 15:38:07 +02:00
remove 9090 and consolite to port 12000 for llm routing
This commit is contained in:
parent
ef65527ff0
commit
7feb168a06
5 changed files with 32 additions and 69 deletions
|
|
@ -5,7 +5,6 @@ use common::api::open_ai::ChatCompletionsRequest;
|
|||
use common::consts::ARCH_PROVIDER_HINT_HEADER;
|
||||
use http_body_util::combinators::BoxBody;
|
||||
use http_body_util::{BodyExt, Full, StreamBody};
|
||||
use hyper::body::Body;
|
||||
use hyper::body::Frame;
|
||||
use hyper::header::{self};
|
||||
use hyper::{Request, Response, StatusCode};
|
||||
|
|
@ -22,18 +21,11 @@ fn full<T: Into<Bytes>>(chunk: T) -> BoxBody<Bytes, hyper::Error> {
|
|||
.boxed()
|
||||
}
|
||||
|
||||
pub async fn chat_completion(
|
||||
pub async fn chat_completions(
|
||||
request: Request<hyper::body::Incoming>,
|
||||
router_service: Arc<RouterService>,
|
||||
llm_provider_endpoint: String,
|
||||
) -> Result<Response<BoxBody<Bytes, hyper::Error>>, hyper::Error> {
|
||||
let max = request.body().size_hint().upper().unwrap_or(u64::MAX);
|
||||
if max > 1024 * 1024 {
|
||||
let error_msg = format!("Request body too large: {} bytes", max);
|
||||
let mut too_large = Response::new(full(error_msg));
|
||||
*too_large.status_mut() = StatusCode::PAYLOAD_TOO_LARGE;
|
||||
return Ok(too_large);
|
||||
}
|
||||
|
||||
let mut request_headers = request.headers().clone();
|
||||
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
use brightstaff::handlers::chat_completions::chat_completion;
|
||||
use brightstaff::handlers::chat_completions::chat_completions;
|
||||
use brightstaff::router::llm_router::RouterService;
|
||||
use bytes::Bytes;
|
||||
use common::configuration::Configuration;
|
||||
|
|
@ -89,16 +89,23 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
|
|||
);
|
||||
|
||||
let llm_provider_endpoint = env::var("LLM_PROVIDER_ENDPOINT")
|
||||
.unwrap_or_else(|_| "http://localhost:12000/v1/chat/completions".to_string());
|
||||
.unwrap_or_else(|_| "http://localhost:12001/v1/chat/completions".to_string());
|
||||
|
||||
info!("llm provider endpoint: {}", llm_provider_endpoint);
|
||||
info!("Listening on http://{}", bind_address);
|
||||
let listener = TcpListener::bind(bind_address).await?;
|
||||
|
||||
|
||||
// if routing is null then return gpt-4o as model name
|
||||
let model = arch_config.routing.as_ref().map_or_else(
|
||||
|| "gpt-4o".to_string(),
|
||||
|routing| routing.model.clone(),
|
||||
);
|
||||
|
||||
let router_service: Arc<RouterService> = Arc::new(RouterService::new(
|
||||
arch_config.llm_providers.clone(),
|
||||
llm_provider_endpoint.clone(),
|
||||
arch_config.routing.as_ref().unwrap().model.clone(),
|
||||
model,
|
||||
));
|
||||
|
||||
loop {
|
||||
|
|
@ -123,7 +130,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
|
|||
async move {
|
||||
match (req.method(), req.uri().path()) {
|
||||
(&Method::POST, "/v1/chat/completions") => {
|
||||
chat_completion(req, router_service, llm_provider_endpoint)
|
||||
chat_completions(req, router_service, llm_provider_endpoint)
|
||||
.with_context(parent_cx)
|
||||
.await
|
||||
}
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@ pub struct RouterService {
|
|||
client: reqwest::Client,
|
||||
router_model: Arc<dyn RouterModel>,
|
||||
routing_model_name: String,
|
||||
llm_usage_defined: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
|
|
@ -73,6 +74,7 @@ impl RouterService {
|
|||
client: reqwest::Client::new(),
|
||||
router_model,
|
||||
routing_model_name,
|
||||
llm_usage_defined: !providers_with_usage.is_empty(),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -81,6 +83,11 @@ impl RouterService {
|
|||
messages: &[Message],
|
||||
trace_parent: Option<String>,
|
||||
) -> Result<Option<String>> {
|
||||
|
||||
if !self.llm_usage_defined {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let router_request = self.router_model.generate_request(messages);
|
||||
|
||||
info!(
|
||||
|
|
|
|||
|
|
@ -82,6 +82,9 @@ impl RouterModel for RouterModelV1 {
|
|||
}
|
||||
|
||||
fn parse_response(&self, content: &str) -> Result<Option<String>> {
|
||||
if content.is_empty() {
|
||||
return Ok(None);
|
||||
}
|
||||
let router_resp_fixed = fix_json_response(content);
|
||||
info!(
|
||||
"router response (fixed): {}",
|
||||
|
|
@ -226,6 +229,11 @@ fn test_parse_response() {
|
|||
let result = router.parse_response(input).unwrap();
|
||||
assert_eq!(result, None);
|
||||
|
||||
// Case 4.1: empty string
|
||||
let input = r#""#;
|
||||
let result = router.parse_response(input).unwrap();
|
||||
assert_eq!(result, None);
|
||||
|
||||
// Case 5: Malformed JSON
|
||||
let input = r#"{"route": "route1""#; // missing closing }
|
||||
let result = router.parse_response(input);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue