mirror of
https://github.com/katanemo/plano.git
synced 2026-05-15 11:02:39 +02:00
refactor brightstaff (#736)
This commit is contained in:
parent
1f23c573bf
commit
1ad3e0f64e
30 changed files with 1802 additions and 1700 deletions
|
|
@ -1,28 +1,31 @@
|
|||
use brightstaff::handlers::agent_chat_completions::agent_chat;
|
||||
use brightstaff::app_state::AppState;
|
||||
use brightstaff::handlers::agents::orchestrator::agent_chat;
|
||||
use brightstaff::handlers::empty;
|
||||
use brightstaff::handlers::function_calling::function_calling_chat_handler;
|
||||
use brightstaff::handlers::llm::llm_chat;
|
||||
use brightstaff::handlers::models::list_models;
|
||||
use brightstaff::handlers::routing_service::routing_decision;
|
||||
use brightstaff::router::llm_router::RouterService;
|
||||
use brightstaff::router::plano_orchestrator::OrchestratorService;
|
||||
use brightstaff::router::llm::RouterService;
|
||||
use brightstaff::router::orchestrator::OrchestratorService;
|
||||
use brightstaff::state::memory::MemoryConversationalStorage;
|
||||
use brightstaff::state::postgresql::PostgreSQLConversationStorage;
|
||||
use brightstaff::state::StateStorage;
|
||||
use brightstaff::utils::tracing::init_tracer;
|
||||
use brightstaff::tracing::init_tracer;
|
||||
use bytes::Bytes;
|
||||
use common::configuration::{
|
||||
Agent, Configuration, FilterPipeline, ListenerType, ResolvedFilterChain,
|
||||
};
|
||||
use common::consts::{CHAT_COMPLETIONS_PATH, MESSAGES_PATH, OPENAI_RESPONSES_API_PATH};
|
||||
use common::llm_providers::LlmProviders;
|
||||
use http_body_util::{combinators::BoxBody, BodyExt, Empty};
|
||||
use http_body_util::combinators::BoxBody;
|
||||
use hyper::body::Incoming;
|
||||
use hyper::header::HeaderValue;
|
||||
use hyper::server::conn::http1;
|
||||
use hyper::service::service_fn;
|
||||
use hyper::{Method, Request, Response, StatusCode};
|
||||
use hyper_util::rt::TokioIo;
|
||||
use opentelemetry::global;
|
||||
use opentelemetry::trace::FutureExt;
|
||||
use opentelemetry::{global, Context};
|
||||
use opentelemetry_http::HeaderExtractor;
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
|
|
@ -31,82 +34,94 @@ use tokio::net::TcpListener;
|
|||
use tokio::sync::RwLock;
|
||||
use tracing::{debug, info, warn};
|
||||
|
||||
pub mod router;
|
||||
|
||||
const BIND_ADDRESS: &str = "0.0.0.0:9091";
|
||||
const DEFAULT_ROUTING_LLM_PROVIDER: &str = "arch-router";
|
||||
const DEFAULT_ROUTING_MODEL_NAME: &str = "Arch-Router";
|
||||
const DEFAULT_ORCHESTRATOR_LLM_PROVIDER: &str = "plano-orchestrator";
|
||||
const DEFAULT_ORCHESTRATOR_MODEL_NAME: &str = "Plano-Orchestrator";
|
||||
|
||||
// Utility function to extract the context from the incoming request headers
|
||||
fn extract_context_from_request(req: &Request<Incoming>) -> Context {
|
||||
global::get_text_map_propagator(|propagator| {
|
||||
propagator.extract(&HeaderExtractor(req.headers()))
|
||||
})
|
||||
/// CORS pre-flight response for the models endpoint.
|
||||
fn cors_preflight() -> Result<Response<BoxBody<Bytes, hyper::Error>>, hyper::Error> {
|
||||
let mut response = Response::new(empty());
|
||||
*response.status_mut() = StatusCode::NO_CONTENT;
|
||||
let h = response.headers_mut();
|
||||
h.insert("Allow", HeaderValue::from_static("GET, OPTIONS"));
|
||||
h.insert("Access-Control-Allow-Origin", HeaderValue::from_static("*"));
|
||||
h.insert(
|
||||
"Access-Control-Allow-Headers",
|
||||
HeaderValue::from_static("Authorization, Content-Type"),
|
||||
);
|
||||
h.insert(
|
||||
"Access-Control-Allow-Methods",
|
||||
HeaderValue::from_static("GET, POST, OPTIONS"),
|
||||
);
|
||||
h.insert("Content-Type", HeaderValue::from_static("application/json"));
|
||||
Ok(response)
|
||||
}
|
||||
|
||||
fn empty() -> BoxBody<Bytes, hyper::Error> {
|
||||
Empty::<Bytes>::new()
|
||||
.map_err(|never| match never {})
|
||||
.boxed()
|
||||
}
|
||||
// ---------------------------------------------------------------------------
|
||||
// Configuration loading
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
|
||||
let bind_address = env::var("BIND_ADDRESS").unwrap_or_else(|_| BIND_ADDRESS.to_string());
|
||||
|
||||
// loading plano_config.yaml file (before tracing init so we can read tracing config)
|
||||
let plano_config_path = env::var("PLANO_CONFIG_PATH_RENDERED")
|
||||
/// Load and parse the YAML configuration file.
|
||||
///
|
||||
/// The path is read from `PLANO_CONFIG_PATH_RENDERED` (env) or falls back to
|
||||
/// `./plano_config_rendered.yaml`.
|
||||
fn load_config() -> Result<Configuration, Box<dyn std::error::Error + Send + Sync>> {
|
||||
let path = env::var("PLANO_CONFIG_PATH_RENDERED")
|
||||
.unwrap_or_else(|_| "./plano_config_rendered.yaml".to_string());
|
||||
eprintln!("loading plano_config.yaml from {}", plano_config_path);
|
||||
eprintln!("loading plano_config.yaml from {}", path);
|
||||
|
||||
let config_contents =
|
||||
fs::read_to_string(&plano_config_path).expect("Failed to read plano_config.yaml");
|
||||
let contents = fs::read_to_string(&path).map_err(|e| format!("failed to read {path}: {e}"))?;
|
||||
|
||||
let config: Configuration =
|
||||
serde_yaml::from_str(&config_contents).expect("Failed to parse plano_config.yaml");
|
||||
serde_yaml::from_str(&contents).map_err(|e| format!("failed to parse {path}: {e}"))?;
|
||||
|
||||
// Initialize tracing using config.yaml tracing section
|
||||
let _tracer_provider = init_tracer(config.tracing.as_ref());
|
||||
info!(path = %plano_config_path, "loaded plano_config.yaml");
|
||||
Ok(config)
|
||||
}
|
||||
|
||||
let plano_config = Arc::new(config);
|
||||
// ---------------------------------------------------------------------------
|
||||
// Application state initialization
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
// combine agents and filters into a single list of agents
|
||||
let all_agents: Vec<Agent> = plano_config
|
||||
/// Build the shared [`AppState`] from a parsed [`Configuration`].
|
||||
async fn init_app_state(
|
||||
config: &Configuration,
|
||||
) -> Result<AppState, Box<dyn std::error::Error + Send + Sync>> {
|
||||
let llm_provider_url =
|
||||
env::var("LLM_PROVIDER_ENDPOINT").unwrap_or_else(|_| "http://localhost:12001".to_string());
|
||||
|
||||
// Combine agents and filters into a single list
|
||||
let all_agents: Vec<Agent> = config
|
||||
.agents
|
||||
.as_deref()
|
||||
.unwrap_or_default()
|
||||
.iter()
|
||||
.chain(plano_config.filters.as_deref().unwrap_or_default())
|
||||
.chain(config.filters.as_deref().unwrap_or_default())
|
||||
.cloned()
|
||||
.collect();
|
||||
|
||||
// Build global agent map for resolving filter chain references
|
||||
let global_agent_map: HashMap<String, Agent> = all_agents
|
||||
.iter()
|
||||
.map(|a| (a.id.clone(), a.clone()))
|
||||
.collect();
|
||||
|
||||
// Create expanded provider list for /v1/models endpoint
|
||||
let llm_providers = LlmProviders::try_from(plano_config.model_providers.clone())
|
||||
.expect("Failed to create LlmProviders");
|
||||
let llm_providers = Arc::new(RwLock::new(llm_providers));
|
||||
let combined_agents_filters_list = Arc::new(RwLock::new(Some(all_agents)));
|
||||
let llm_providers = LlmProviders::try_from(config.model_providers.clone())
|
||||
.map_err(|e| format!("failed to create LlmProviders: {e}"))?;
|
||||
|
||||
// Resolve model listener filter chain and agents at startup
|
||||
let model_listener_count = plano_config
|
||||
let model_listener_count = config
|
||||
.listeners
|
||||
.iter()
|
||||
.filter(|l| l.listener_type == ListenerType::Model)
|
||||
.count();
|
||||
assert!(
|
||||
model_listener_count <= 1,
|
||||
"only one model listener is allowed, found {}",
|
||||
model_listener_count
|
||||
);
|
||||
let model_listener = plano_config
|
||||
if model_listener_count > 1 {
|
||||
return Err(format!(
|
||||
"only one model listener is allowed, found {}",
|
||||
model_listener_count
|
||||
)
|
||||
.into());
|
||||
}
|
||||
let model_listener = config
|
||||
.listeners
|
||||
.iter()
|
||||
.find(|l| l.listener_type == ListenerType::Model);
|
||||
|
|
@ -114,7 +129,11 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
|
|||
filter_ids.map(|ids| {
|
||||
let agents = ids
|
||||
.iter()
|
||||
.filter_map(|id| global_agent_map.get(id).map(|a| (id.clone(), a.clone())))
|
||||
.filter_map(|id| {
|
||||
global_agent_map
|
||||
.get(id)
|
||||
.map(|a: &Agent| (id.clone(), a.clone()))
|
||||
})
|
||||
.collect();
|
||||
ResolvedFilterChain {
|
||||
filter_ids: ids,
|
||||
|
|
@ -126,14 +145,9 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
|
|||
input: resolve_chain(model_listener.and_then(|l| l.input_filters.clone())),
|
||||
output: resolve_chain(model_listener.and_then(|l| l.output_filters.clone())),
|
||||
});
|
||||
let listeners = Arc::new(RwLock::new(plano_config.listeners.clone()));
|
||||
let llm_provider_url =
|
||||
env::var("LLM_PROVIDER_ENDPOINT").unwrap_or_else(|_| "http://localhost:12001".to_string());
|
||||
|
||||
let listener = TcpListener::bind(bind_address).await?;
|
||||
let overrides = plano_config.overrides.clone().unwrap_or_default();
|
||||
let overrides = config.overrides.clone().unwrap_or_default();
|
||||
|
||||
// Strip provider prefix (e.g. "arch/") to get the model ID used in upstream requests
|
||||
let routing_model_name: String = overrides
|
||||
.llm_routing_model
|
||||
.as_deref()
|
||||
|
|
@ -141,21 +155,20 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
|
|||
.unwrap_or(DEFAULT_ROUTING_MODEL_NAME)
|
||||
.to_string();
|
||||
|
||||
let routing_llm_provider = plano_config
|
||||
let routing_llm_provider = config
|
||||
.model_providers
|
||||
.iter()
|
||||
.find(|p| p.model.as_deref() == Some(routing_model_name.as_str()))
|
||||
.map(|p| p.name.clone())
|
||||
.unwrap_or_else(|| DEFAULT_ROUTING_LLM_PROVIDER.to_string());
|
||||
|
||||
let router_service: Arc<RouterService> = Arc::new(RouterService::new(
|
||||
plano_config.model_providers.clone(),
|
||||
let router_service = Arc::new(RouterService::new(
|
||||
config.model_providers.clone(),
|
||||
format!("{llm_provider_url}{CHAT_COMPLETIONS_PATH}"),
|
||||
routing_model_name,
|
||||
routing_llm_provider,
|
||||
));
|
||||
|
||||
// Strip provider prefix (e.g. "arch/") to get the model ID used in upstream requests
|
||||
let orchestrator_model_name: String = overrides
|
||||
.agent_orchestration_model
|
||||
.as_deref()
|
||||
|
|
@ -163,213 +176,205 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
|
|||
.unwrap_or(DEFAULT_ORCHESTRATOR_MODEL_NAME)
|
||||
.to_string();
|
||||
|
||||
let orchestrator_llm_provider: String = plano_config
|
||||
let orchestrator_llm_provider: String = config
|
||||
.model_providers
|
||||
.iter()
|
||||
.find(|p| p.model.as_deref() == Some(orchestrator_model_name.as_str()))
|
||||
.map(|p| p.name.clone())
|
||||
.unwrap_or_else(|| DEFAULT_ORCHESTRATOR_LLM_PROVIDER.to_string());
|
||||
|
||||
let orchestrator_service: Arc<OrchestratorService> = Arc::new(OrchestratorService::new(
|
||||
let orchestrator_service = Arc::new(OrchestratorService::new(
|
||||
format!("{llm_provider_url}{CHAT_COMPLETIONS_PATH}"),
|
||||
orchestrator_model_name,
|
||||
orchestrator_llm_provider,
|
||||
));
|
||||
|
||||
let model_aliases = Arc::new(plano_config.model_aliases.clone());
|
||||
let span_attributes = Arc::new(
|
||||
plano_config
|
||||
.tracing
|
||||
.as_ref()
|
||||
.and_then(|tracing| tracing.span_attributes.clone()),
|
||||
);
|
||||
let state_storage = init_state_storage(config).await?;
|
||||
|
||||
// Initialize trace collector and start background flusher
|
||||
// Tracing is enabled if the tracing config is present in plano_config.yaml
|
||||
// Pass Some(true/false) to override, or None to use env var OTEL_TRACING_ENABLED
|
||||
// OpenTelemetry automatic instrumentation is configured in utils/tracing.rs
|
||||
let span_attributes = config
|
||||
.tracing
|
||||
.as_ref()
|
||||
.and_then(|tracing| tracing.span_attributes.clone());
|
||||
|
||||
// Initialize conversation state storage for v1/responses
|
||||
// Configurable via plano_config.yaml state_storage section
|
||||
// If not configured, state management is disabled
|
||||
// Environment variables are substituted by envsubst before config is read
|
||||
let state_storage: Option<Arc<dyn StateStorage>> =
|
||||
if let Some(storage_config) = &plano_config.state_storage {
|
||||
let storage: Arc<dyn StateStorage> = match storage_config.storage_type {
|
||||
common::configuration::StateStorageType::Memory => {
|
||||
info!(
|
||||
storage_type = "memory",
|
||||
"initialized conversation state storage"
|
||||
);
|
||||
Arc::new(MemoryConversationalStorage::new())
|
||||
}
|
||||
common::configuration::StateStorageType::Postgres => {
|
||||
let connection_string = storage_config
|
||||
.connection_string
|
||||
.as_ref()
|
||||
.expect("connection_string is required for postgres state_storage");
|
||||
Ok(AppState {
|
||||
router_service,
|
||||
orchestrator_service,
|
||||
model_aliases: config.model_aliases.clone(),
|
||||
llm_providers: Arc::new(RwLock::new(llm_providers)),
|
||||
agents_list: Some(all_agents),
|
||||
listeners: config.listeners.clone(),
|
||||
state_storage,
|
||||
llm_provider_url,
|
||||
span_attributes,
|
||||
http_client: reqwest::Client::new(),
|
||||
filter_pipeline,
|
||||
})
|
||||
}
|
||||
|
||||
debug!(connection_string = %connection_string, "postgres connection");
|
||||
info!(
|
||||
storage_type = "postgres",
|
||||
"initializing conversation state storage"
|
||||
);
|
||||
Arc::new(
|
||||
PostgreSQLConversationStorage::new(connection_string.clone())
|
||||
.await
|
||||
.expect("Failed to initialize Postgres state storage"),
|
||||
)
|
||||
}
|
||||
};
|
||||
Some(storage)
|
||||
} else {
|
||||
info!("no state_storage configured, conversation state management disabled");
|
||||
None
|
||||
};
|
||||
/// Initialize the conversation state storage backend (if configured).
|
||||
async fn init_state_storage(
|
||||
config: &Configuration,
|
||||
) -> Result<Option<Arc<dyn StateStorage>>, Box<dyn std::error::Error + Send + Sync>> {
|
||||
let Some(storage_config) = &config.state_storage else {
|
||||
info!("no state_storage configured, conversation state management disabled");
|
||||
return Ok(None);
|
||||
};
|
||||
|
||||
loop {
|
||||
let (stream, _) = listener.accept().await?;
|
||||
let peer_addr = stream.peer_addr()?;
|
||||
let io = TokioIo::new(stream);
|
||||
let storage: Arc<dyn StateStorage> = match storage_config.storage_type {
|
||||
common::configuration::StateStorageType::Memory => {
|
||||
info!(
|
||||
storage_type = "memory",
|
||||
"initialized conversation state storage"
|
||||
);
|
||||
Arc::new(MemoryConversationalStorage::new())
|
||||
}
|
||||
common::configuration::StateStorageType::Postgres => {
|
||||
let connection_string = storage_config
|
||||
.connection_string
|
||||
.as_ref()
|
||||
.ok_or("connection_string is required for postgres state_storage")?;
|
||||
|
||||
let router_service: Arc<RouterService> = Arc::clone(&router_service);
|
||||
let orchestrator_service: Arc<OrchestratorService> = Arc::clone(&orchestrator_service);
|
||||
let model_aliases: Arc<
|
||||
Option<std::collections::HashMap<String, common::configuration::ModelAlias>>,
|
||||
> = Arc::clone(&model_aliases);
|
||||
let llm_provider_url = llm_provider_url.clone();
|
||||
debug!(connection_string = %connection_string, "postgres connection");
|
||||
info!(
|
||||
storage_type = "postgres",
|
||||
"initializing conversation state storage"
|
||||
);
|
||||
|
||||
let llm_providers = llm_providers.clone();
|
||||
let agents_list = combined_agents_filters_list.clone();
|
||||
let filter_pipeline = filter_pipeline.clone();
|
||||
let listeners = listeners.clone();
|
||||
let span_attributes = span_attributes.clone();
|
||||
let state_storage = state_storage.clone();
|
||||
let service = service_fn(move |req| {
|
||||
let router_service = Arc::clone(&router_service);
|
||||
let orchestrator_service = Arc::clone(&orchestrator_service);
|
||||
let parent_cx = extract_context_from_request(&req);
|
||||
let llm_provider_url = llm_provider_url.clone();
|
||||
let llm_providers = llm_providers.clone();
|
||||
let model_aliases = Arc::clone(&model_aliases);
|
||||
let agents_list = agents_list.clone();
|
||||
let filter_pipeline = filter_pipeline.clone();
|
||||
let listeners = listeners.clone();
|
||||
let span_attributes = span_attributes.clone();
|
||||
let state_storage = state_storage.clone();
|
||||
Arc::new(
|
||||
PostgreSQLConversationStorage::new(connection_string.clone())
|
||||
.await
|
||||
.map_err(|e| format!("failed to initialize Postgres state storage: {e}"))?,
|
||||
)
|
||||
}
|
||||
};
|
||||
|
||||
async move {
|
||||
let path = req.uri().path().to_string();
|
||||
// Check if path starts with /agents
|
||||
if path.starts_with("/agents") {
|
||||
// Check if it matches one of the agent API paths
|
||||
let stripped_path = path.strip_prefix("/agents").unwrap();
|
||||
if matches!(
|
||||
stripped_path,
|
||||
CHAT_COMPLETIONS_PATH | MESSAGES_PATH | OPENAI_RESPONSES_API_PATH
|
||||
) {
|
||||
let fully_qualified_url = format!("{}{}", llm_provider_url, stripped_path);
|
||||
return agent_chat(
|
||||
req,
|
||||
orchestrator_service,
|
||||
fully_qualified_url,
|
||||
agents_list,
|
||||
listeners,
|
||||
span_attributes,
|
||||
llm_providers,
|
||||
)
|
||||
.with_context(parent_cx)
|
||||
.await;
|
||||
}
|
||||
}
|
||||
if let Some(stripped_path) = path.strip_prefix("/routing") {
|
||||
let stripped_path = stripped_path.to_string();
|
||||
if matches!(
|
||||
stripped_path.as_str(),
|
||||
CHAT_COMPLETIONS_PATH | MESSAGES_PATH | OPENAI_RESPONSES_API_PATH
|
||||
) {
|
||||
return routing_decision(
|
||||
req,
|
||||
router_service,
|
||||
stripped_path,
|
||||
span_attributes,
|
||||
)
|
||||
.with_context(parent_cx)
|
||||
.await;
|
||||
}
|
||||
}
|
||||
match (req.method(), path.as_str()) {
|
||||
(
|
||||
&Method::POST,
|
||||
CHAT_COMPLETIONS_PATH | MESSAGES_PATH | OPENAI_RESPONSES_API_PATH,
|
||||
) => {
|
||||
let fully_qualified_url = format!("{}{}", llm_provider_url, path);
|
||||
llm_chat(
|
||||
req,
|
||||
router_service,
|
||||
fully_qualified_url,
|
||||
model_aliases,
|
||||
llm_providers,
|
||||
span_attributes,
|
||||
state_storage,
|
||||
filter_pipeline,
|
||||
)
|
||||
.with_context(parent_cx)
|
||||
.await
|
||||
}
|
||||
(&Method::POST, "/function_calling") => {
|
||||
let fully_qualified_url =
|
||||
format!("{}{}", llm_provider_url, "/v1/chat/completions");
|
||||
function_calling_chat_handler(req, fully_qualified_url)
|
||||
.with_context(parent_cx)
|
||||
.await
|
||||
}
|
||||
(&Method::GET, "/v1/models" | "/agents/v1/models") => {
|
||||
Ok(list_models(llm_providers).await)
|
||||
}
|
||||
// hack for now to get openw-web-ui to work
|
||||
(&Method::OPTIONS, "/v1/models" | "/agents/v1/models") => {
|
||||
let mut response = Response::new(empty());
|
||||
*response.status_mut() = StatusCode::NO_CONTENT;
|
||||
response
|
||||
.headers_mut()
|
||||
.insert("Allow", "GET, OPTIONS".parse().unwrap());
|
||||
response
|
||||
.headers_mut()
|
||||
.insert("Access-Control-Allow-Origin", "*".parse().unwrap());
|
||||
response.headers_mut().insert(
|
||||
"Access-Control-Allow-Headers",
|
||||
"Authorization, Content-Type".parse().unwrap(),
|
||||
);
|
||||
response.headers_mut().insert(
|
||||
"Access-Control-Allow-Methods",
|
||||
"GET, POST, OPTIONS".parse().unwrap(),
|
||||
);
|
||||
response
|
||||
.headers_mut()
|
||||
.insert("Content-Type", "application/json".parse().unwrap());
|
||||
Ok(Some(storage))
|
||||
}
|
||||
|
||||
Ok(response)
|
||||
}
|
||||
_ => {
|
||||
debug!(method = %req.method(), path = %path, "no route found");
|
||||
let mut not_found = Response::new(empty());
|
||||
*not_found.status_mut() = StatusCode::NOT_FOUND;
|
||||
Ok(not_found)
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
// ---------------------------------------------------------------------------
|
||||
// Request routing
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
tokio::task::spawn(async move {
|
||||
debug!(peer = ?peer_addr, "accepted connection");
|
||||
if let Err(err) = http1::Builder::new()
|
||||
// .serve_connection(io, service_fn(chat_completion))
|
||||
.serve_connection(io, service)
|
||||
/// Route an incoming HTTP request to the appropriate handler.
|
||||
async fn route(
|
||||
req: Request<Incoming>,
|
||||
state: Arc<AppState>,
|
||||
) -> Result<Response<BoxBody<Bytes, hyper::Error>>, hyper::Error> {
|
||||
let parent_cx = global::get_text_map_propagator(|p| p.extract(&HeaderExtractor(req.headers())));
|
||||
let path = req.uri().path().to_string();
|
||||
|
||||
// --- Agent routes (/agents/...) ---
|
||||
if let Some(stripped) = path.strip_prefix("/agents") {
|
||||
if matches!(
|
||||
stripped,
|
||||
CHAT_COMPLETIONS_PATH | MESSAGES_PATH | OPENAI_RESPONSES_API_PATH
|
||||
) {
|
||||
return agent_chat(req, Arc::clone(&state))
|
||||
.with_context(parent_cx)
|
||||
.await;
|
||||
}
|
||||
}
|
||||
|
||||
// --- Routing decision routes (/routing/...) ---
|
||||
if let Some(stripped) = path.strip_prefix("/routing") {
|
||||
let stripped = stripped.to_string();
|
||||
if matches!(
|
||||
stripped.as_str(),
|
||||
CHAT_COMPLETIONS_PATH | MESSAGES_PATH | OPENAI_RESPONSES_API_PATH
|
||||
) {
|
||||
return routing_decision(
|
||||
req,
|
||||
Arc::clone(&state.router_service),
|
||||
stripped,
|
||||
&state.span_attributes,
|
||||
)
|
||||
.with_context(parent_cx)
|
||||
.await;
|
||||
}
|
||||
}
|
||||
|
||||
// --- Standard routes ---
|
||||
match (req.method(), path.as_str()) {
|
||||
(&Method::POST, CHAT_COMPLETIONS_PATH | MESSAGES_PATH | OPENAI_RESPONSES_API_PATH) => {
|
||||
llm_chat(req, Arc::clone(&state))
|
||||
.with_context(parent_cx)
|
||||
.await
|
||||
{
|
||||
warn!(error = ?err, "error serving connection");
|
||||
}
|
||||
});
|
||||
}
|
||||
(&Method::POST, "/function_calling") => {
|
||||
let url = format!("{}/v1/chat/completions", state.llm_provider_url);
|
||||
function_calling_chat_handler(req, url)
|
||||
.with_context(parent_cx)
|
||||
.await
|
||||
}
|
||||
(&Method::GET, "/v1/models" | "/agents/v1/models") => {
|
||||
Ok(list_models(Arc::clone(&state.llm_providers)).await)
|
||||
}
|
||||
(&Method::OPTIONS, "/v1/models" | "/agents/v1/models") => cors_preflight(),
|
||||
_ => {
|
||||
debug!(method = %req.method(), path = %path, "no route found");
|
||||
let mut not_found = Response::new(empty());
|
||||
*not_found.status_mut() = StatusCode::NOT_FOUND;
|
||||
Ok(not_found)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Server loop
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Accept connections and spawn a task per connection.
|
||||
///
|
||||
/// Listens for `SIGINT` / `ctrl-c` and shuts down gracefully, allowing
|
||||
/// in-flight connections to finish.
|
||||
async fn run_server(state: Arc<AppState>) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
|
||||
let bind_address = env::var("BIND_ADDRESS").unwrap_or_else(|_| BIND_ADDRESS.to_string());
|
||||
let listener = TcpListener::bind(&bind_address).await?;
|
||||
info!(address = %bind_address, "server listening");
|
||||
|
||||
let shutdown = tokio::signal::ctrl_c();
|
||||
tokio::pin!(shutdown);
|
||||
|
||||
loop {
|
||||
tokio::select! {
|
||||
result = listener.accept() => {
|
||||
let (stream, _) = result?;
|
||||
let peer_addr = stream.peer_addr()?;
|
||||
let io = TokioIo::new(stream);
|
||||
let state = Arc::clone(&state);
|
||||
|
||||
tokio::task::spawn(async move {
|
||||
debug!(peer = ?peer_addr, "accepted connection");
|
||||
|
||||
let service = service_fn(move |req| {
|
||||
let state = Arc::clone(&state);
|
||||
async move { route(req, state).await }
|
||||
});
|
||||
|
||||
if let Err(err) = http1::Builder::new().serve_connection(io, service).await {
|
||||
warn!(error = ?err, "error serving connection");
|
||||
}
|
||||
});
|
||||
}
|
||||
_ = &mut shutdown => {
|
||||
info!("received shutdown signal, stopping server");
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Entry point
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
|
||||
let config = load_config()?;
|
||||
let _tracer_provider = init_tracer(config.tracing.as_ref());
|
||||
info!("loaded plano_config.yaml");
|
||||
let state = Arc::new(init_app_state(&config).await?);
|
||||
run_server(state).await
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue