mirror of
https://github.com/katanemo/plano.git
synced 2026-05-10 00:02:43 +02:00
support configurable orchestrator model via orchestration config section
This commit is contained in:
parent
5400b0a2fa
commit
8edf686665
8 changed files with 60 additions and 11 deletions
|
|
@ -403,12 +403,26 @@ def validate_and_render_schema():
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
if "plano-orchestrator" not in model_provider_name_set:
|
orchestration_config = config_yaml.get("orchestration", {})
|
||||||
|
orchestration_model_provider = orchestration_config.get("llm_provider", None)
|
||||||
|
|
||||||
|
if (
|
||||||
|
orchestration_model_provider
|
||||||
|
and orchestration_model_provider not in model_provider_name_set
|
||||||
|
):
|
||||||
|
raise Exception(
|
||||||
|
f"Orchestration llm_provider {orchestration_model_provider} is not defined in model_providers"
|
||||||
|
)
|
||||||
|
|
||||||
|
if (
|
||||||
|
orchestration_model_provider is None
|
||||||
|
and "plano-orchestrator" not in model_provider_name_set
|
||||||
|
):
|
||||||
updated_model_providers.append(
|
updated_model_providers.append(
|
||||||
{
|
{
|
||||||
"name": "plano-orchestrator",
|
"name": "plano-orchestrator",
|
||||||
"provider_interface": "arch",
|
"provider_interface": "arch",
|
||||||
"model": "Plano-Orchestrator",
|
"model": orchestration_config.get("model", "Plano-Orchestrator"),
|
||||||
"internal": True,
|
"internal": True,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -416,6 +416,14 @@ properties:
|
||||||
model:
|
model:
|
||||||
type: string
|
type: string
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
|
orchestration:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
llm_provider:
|
||||||
|
type: string
|
||||||
|
model:
|
||||||
|
type: string
|
||||||
|
additionalProperties: false
|
||||||
state_storage:
|
state_storage:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
|
|
||||||
|
|
@ -178,6 +178,7 @@ mod tests {
|
||||||
Arc::new(OrchestratorService::new(
|
Arc::new(OrchestratorService::new(
|
||||||
"http://localhost:8080".to_string(),
|
"http://localhost:8080".to_string(),
|
||||||
"test-model".to_string(),
|
"test-model".to_string(),
|
||||||
|
"plano-orchestrator".to_string(),
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -23,6 +23,7 @@ mod tests {
|
||||||
Arc::new(OrchestratorService::new(
|
Arc::new(OrchestratorService::new(
|
||||||
"http://localhost:8080".to_string(),
|
"http://localhost:8080".to_string(),
|
||||||
"test-model".to_string(),
|
"test-model".to_string(),
|
||||||
|
"plano-orchestrator".to_string(),
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -11,9 +11,7 @@ use brightstaff::state::StateStorage;
|
||||||
use brightstaff::utils::tracing::init_tracer;
|
use brightstaff::utils::tracing::init_tracer;
|
||||||
use bytes::Bytes;
|
use bytes::Bytes;
|
||||||
use common::configuration::{Agent, Configuration};
|
use common::configuration::{Agent, Configuration};
|
||||||
use common::consts::{
|
use common::consts::{CHAT_COMPLETIONS_PATH, MESSAGES_PATH, OPENAI_RESPONSES_API_PATH};
|
||||||
CHAT_COMPLETIONS_PATH, MESSAGES_PATH, OPENAI_RESPONSES_API_PATH, PLANO_ORCHESTRATOR_MODEL_NAME,
|
|
||||||
};
|
|
||||||
use common::llm_providers::LlmProviders;
|
use common::llm_providers::LlmProviders;
|
||||||
use http_body_util::{combinators::BoxBody, BodyExt, Empty};
|
use http_body_util::{combinators::BoxBody, BodyExt, Empty};
|
||||||
use hyper::body::Incoming;
|
use hyper::body::Incoming;
|
||||||
|
|
@ -35,6 +33,8 @@ pub mod router;
|
||||||
const BIND_ADDRESS: &str = "0.0.0.0:9091";
|
const BIND_ADDRESS: &str = "0.0.0.0:9091";
|
||||||
const DEFAULT_ROUTING_LLM_PROVIDER: &str = "arch-router";
|
const DEFAULT_ROUTING_LLM_PROVIDER: &str = "arch-router";
|
||||||
const DEFAULT_ROUTING_MODEL_NAME: &str = "Arch-Router";
|
const DEFAULT_ROUTING_MODEL_NAME: &str = "Arch-Router";
|
||||||
|
const DEFAULT_ORCHESTRATOR_LLM_PROVIDER: &str = "plano-orchestrator";
|
||||||
|
const DEFAULT_ORCHESTRATOR_MODEL_NAME: &str = "Plano-Orchestrator";
|
||||||
|
|
||||||
// Utility function to extract the context from the incoming request headers
|
// Utility function to extract the context from the incoming request headers
|
||||||
fn extract_context_from_request(req: &Request<Incoming>) -> Context {
|
fn extract_context_from_request(req: &Request<Incoming>) -> Context {
|
||||||
|
|
@ -109,9 +109,22 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
|
||||||
routing_llm_provider,
|
routing_llm_provider,
|
||||||
));
|
));
|
||||||
|
|
||||||
|
let orchestrator_model_name: String = plano_config
|
||||||
|
.orchestration
|
||||||
|
.as_ref()
|
||||||
|
.and_then(|o| o.model.clone())
|
||||||
|
.unwrap_or_else(|| DEFAULT_ORCHESTRATOR_MODEL_NAME.to_string());
|
||||||
|
|
||||||
|
let orchestrator_llm_provider: String = plano_config
|
||||||
|
.orchestration
|
||||||
|
.as_ref()
|
||||||
|
.and_then(|o| o.model_provider.clone())
|
||||||
|
.unwrap_or_else(|| DEFAULT_ORCHESTRATOR_LLM_PROVIDER.to_string());
|
||||||
|
|
||||||
let orchestrator_service: Arc<OrchestratorService> = Arc::new(OrchestratorService::new(
|
let orchestrator_service: Arc<OrchestratorService> = Arc::new(OrchestratorService::new(
|
||||||
format!("{llm_provider_url}{CHAT_COMPLETIONS_PATH}"),
|
format!("{llm_provider_url}{CHAT_COMPLETIONS_PATH}"),
|
||||||
PLANO_ORCHESTRATOR_MODEL_NAME.to_string(),
|
orchestrator_model_name,
|
||||||
|
orchestrator_llm_provider,
|
||||||
));
|
));
|
||||||
|
|
||||||
let model_aliases = Arc::new(plano_config.model_aliases.clone());
|
let model_aliases = Arc::new(plano_config.model_aliases.clone());
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,7 @@ use std::{collections::HashMap, sync::Arc};
|
||||||
|
|
||||||
use common::{
|
use common::{
|
||||||
configuration::{AgentUsagePreference, OrchestrationPreference},
|
configuration::{AgentUsagePreference, OrchestrationPreference},
|
||||||
consts::{ARCH_PROVIDER_HINT_HEADER, PLANO_ORCHESTRATOR_MODEL_NAME, REQUEST_ID_HEADER},
|
consts::{ARCH_PROVIDER_HINT_HEADER, REQUEST_ID_HEADER},
|
||||||
};
|
};
|
||||||
use hermesllm::apis::openai::{ChatCompletionsResponse, Message};
|
use hermesllm::apis::openai::{ChatCompletionsResponse, Message};
|
||||||
use hyper::header;
|
use hyper::header;
|
||||||
|
|
@ -19,6 +19,7 @@ pub struct OrchestratorService {
|
||||||
orchestrator_url: String,
|
orchestrator_url: String,
|
||||||
client: reqwest::Client,
|
client: reqwest::Client,
|
||||||
orchestrator_model: Arc<dyn OrchestratorModel>,
|
orchestrator_model: Arc<dyn OrchestratorModel>,
|
||||||
|
orchestrator_provider_name: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Error)]
|
#[derive(Debug, Error)]
|
||||||
|
|
@ -36,7 +37,11 @@ pub enum OrchestrationError {
|
||||||
pub type Result<T> = std::result::Result<T, OrchestrationError>;
|
pub type Result<T> = std::result::Result<T, OrchestrationError>;
|
||||||
|
|
||||||
impl OrchestratorService {
|
impl OrchestratorService {
|
||||||
pub fn new(orchestrator_url: String, orchestration_model_name: String) -> Self {
|
pub fn new(
|
||||||
|
orchestrator_url: String,
|
||||||
|
orchestration_model_name: String,
|
||||||
|
orchestrator_provider_name: String,
|
||||||
|
) -> Self {
|
||||||
// Empty agent orchestrations - will be provided via usage_preferences in requests
|
// Empty agent orchestrations - will be provided via usage_preferences in requests
|
||||||
let agent_orchestrations: HashMap<String, Vec<OrchestrationPreference>> = HashMap::new();
|
let agent_orchestrations: HashMap<String, Vec<OrchestrationPreference>> = HashMap::new();
|
||||||
|
|
||||||
|
|
@ -50,6 +55,7 @@ impl OrchestratorService {
|
||||||
orchestrator_url,
|
orchestrator_url,
|
||||||
client: reqwest::Client::new(),
|
client: reqwest::Client::new(),
|
||||||
orchestrator_model,
|
orchestrator_model,
|
||||||
|
orchestrator_provider_name,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -91,7 +97,7 @@ impl OrchestratorService {
|
||||||
|
|
||||||
orchestration_request_headers.insert(
|
orchestration_request_headers.insert(
|
||||||
header::HeaderName::from_static(ARCH_PROVIDER_HINT_HEADER),
|
header::HeaderName::from_static(ARCH_PROVIDER_HINT_HEADER),
|
||||||
header::HeaderValue::from_str(PLANO_ORCHESTRATOR_MODEL_NAME).unwrap(),
|
header::HeaderValue::from_str(&self.orchestrator_provider_name).unwrap(),
|
||||||
);
|
);
|
||||||
|
|
||||||
// Inject OpenTelemetry trace context from current span
|
// Inject OpenTelemetry trace context from current span
|
||||||
|
|
@ -110,7 +116,7 @@ impl OrchestratorService {
|
||||||
|
|
||||||
orchestration_request_headers.insert(
|
orchestration_request_headers.insert(
|
||||||
header::HeaderName::from_static("model"),
|
header::HeaderName::from_static("model"),
|
||||||
header::HeaderValue::from_static(PLANO_ORCHESTRATOR_MODEL_NAME),
|
header::HeaderValue::from_str(&self.orchestrator_provider_name).unwrap(),
|
||||||
);
|
);
|
||||||
|
|
||||||
let start_time = std::time::Instant::now();
|
let start_time = std::time::Instant::now();
|
||||||
|
|
|
||||||
|
|
@ -13,6 +13,12 @@ pub struct Routing {
|
||||||
pub model: Option<String>,
|
pub model: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
pub struct Orchestration {
|
||||||
|
pub model_provider: Option<String>,
|
||||||
|
pub model: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
pub struct ModelAlias {
|
pub struct ModelAlias {
|
||||||
pub target: String,
|
pub target: String,
|
||||||
|
|
@ -73,6 +79,7 @@ pub struct Configuration {
|
||||||
pub tracing: Option<Tracing>,
|
pub tracing: Option<Tracing>,
|
||||||
pub mode: Option<GatewayMode>,
|
pub mode: Option<GatewayMode>,
|
||||||
pub routing: Option<Routing>,
|
pub routing: Option<Routing>,
|
||||||
|
pub orchestration: Option<Orchestration>,
|
||||||
pub agents: Option<Vec<Agent>>,
|
pub agents: Option<Vec<Agent>>,
|
||||||
pub filters: Option<Vec<Agent>>,
|
pub filters: Option<Vec<Agent>>,
|
||||||
pub listeners: Vec<Listener>,
|
pub listeners: Vec<Listener>,
|
||||||
|
|
|
||||||
|
|
@ -33,5 +33,4 @@ pub const OTEL_COLLECTOR_HTTP: &str = "opentelemetry_collector_http";
|
||||||
pub const LLM_ROUTE_HEADER: &str = "x-arch-llm-route";
|
pub const LLM_ROUTE_HEADER: &str = "x-arch-llm-route";
|
||||||
pub const ENVOY_RETRY_HEADER: &str = "x-envoy-max-retries";
|
pub const ENVOY_RETRY_HEADER: &str = "x-envoy-max-retries";
|
||||||
pub const BRIGHT_STAFF_SERVICE_NAME: &str = "brightstaff";
|
pub const BRIGHT_STAFF_SERVICE_NAME: &str = "brightstaff";
|
||||||
pub const PLANO_ORCHESTRATOR_MODEL_NAME: &str = "Plano-Orchestrator";
|
|
||||||
pub const ARCH_FC_CLUSTER: &str = "arch";
|
pub const ARCH_FC_CLUSTER: &str = "arch";
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue