From bc059aed4d80c3a0d6123bf7a506da3dc1d7f416 Mon Sep 17 00:00:00 2001 From: Adil Hafeez Date: Sun, 15 Mar 2026 09:36:11 -0700 Subject: [PATCH] Unified overrides for custom router and orchestrator models (#820) * support configurable orchestrator model via orchestration config section * add self-hosting docs and demo for Plano-Orchestrator * list all Plano-Orchestrator model variants in docs * use overrides for custom routing and orchestration model * update docs * update orchestrator model name * rename arch provider to plano, use llm_routing_model and agent_orchestration_model * regenerate rendered config reference --- cli/planoai/config_generator.py | 62 +++++++------- config/envoy.template.yaml | 4 +- config/plano_config_schema.yaml | 18 ++-- .../src/handlers/agent_selector.rs | 1 + .../src/handlers/integration_tests.rs | 1 + crates/brightstaff/src/main.rs | 45 +++++++--- .../src/router/plano_orchestrator.rs | 18 ++-- crates/common/src/configuration.rs | 19 ++--- crates/common/src/consts.rs | 3 +- crates/hermesllm/src/lib.rs | 2 +- crates/hermesllm/src/providers/id.rs | 10 +-- crates/llm_gateway/src/stream_context.rs | 2 +- .../travel_agents/README.md | 36 ++++++++ .../config_local_orchestrator.yaml | 66 +++++++++++++++ .../travel_agents/run_demo.sh | 9 +- .../llm_routing/openclaw_routing/config.yaml | 5 +- .../plano_config_local.yaml | 8 +- docs/source/guides/llm_router.rst | 16 ++-- docs/source/guides/orchestration.rst | 84 +++++++++++++++++++ .../plano_config_full_reference_rendered.yaml | 6 +- 20 files changed, 312 insertions(+), 103 deletions(-) create mode 100644 demos/agent_orchestration/travel_agents/config_local_orchestrator.yaml diff --git a/cli/planoai/config_generator.py b/cli/planoai/config_generator.py index 522968c9..c4e5831b 100644 --- a/cli/planoai/config_generator.py +++ b/cli/planoai/config_generator.py @@ -8,13 +8,12 @@ from urllib.parse import urlparse from copy import deepcopy from planoai.consts import DEFAULT_OTEL_TRACING_GRPC_ENDPOINT - SUPPORTED_PROVIDERS_WITH_BASE_URL = [ "azure_openai", "ollama", "qwen", "amazon_bedrock", - "arch", + "plano", ] SUPPORTED_PROVIDERS_WITHOUT_BASE_URL = [ @@ -368,47 +367,52 @@ def validate_and_render_schema(): llms_with_endpoint.append(model_provider) llms_with_endpoint_cluster_names.add(cluster_name) - if len(model_usage_name_keys) > 0: - routing_model_provider = config_yaml.get("routing", {}).get( - "model_provider", None + overrides_config = config_yaml.get("overrides", {}) + # Build lookup of model names (already prefix-stripped by config processing) + model_name_set = {mp.get("model") for mp in updated_model_providers} + + # Auto-add arch-router provider if routing preferences exist and no provider matches the router model + router_model = overrides_config.get("llm_routing_model", "Arch-Router") + # Strip provider prefix for comparison since config processing strips prefixes from model names + router_model_id = ( + router_model.split("/", 1)[1] if "/" in router_model else router_model + ) + if len(model_usage_name_keys) > 0 and router_model_id not in model_name_set: + updated_model_providers.append( + { + "name": "arch-router", + "provider_interface": "plano", + "model": router_model_id, + "internal": True, + } ) - if ( - routing_model_provider - and routing_model_provider not in model_provider_name_set - ): - raise Exception( - f"Routing model_provider {routing_model_provider} is not defined in model_providers" - ) - if ( - routing_model_provider is None - and "arch-router" not in model_provider_name_set - ): - updated_model_providers.append( - { - "name": "arch-router", - "provider_interface": "arch", - "model": config_yaml.get("routing", {}).get("model", "Arch-Router"), - "internal": True, - } - ) # Always add arch-function model provider if not already defined if "arch-function" not in model_provider_name_set: updated_model_providers.append( { "name": "arch-function", - "provider_interface": "arch", + "provider_interface": "plano", "model": "Arch-Function", "internal": True, } ) - if "plano-orchestrator" not in model_provider_name_set: + # Auto-add plano-orchestrator provider if no provider matches the orchestrator model + orchestrator_model = overrides_config.get( + "agent_orchestration_model", "Plano-Orchestrator" + ) + orchestrator_model_id = ( + orchestrator_model.split("/", 1)[1] + if "/" in orchestrator_model + else orchestrator_model + ) + if orchestrator_model_id not in model_name_set: updated_model_providers.append( { - "name": "plano-orchestrator", - "provider_interface": "arch", - "model": "Plano-Orchestrator", + "name": "plano/orchestrator", + "provider_interface": "plano", + "model": orchestrator_model_id, "internal": True, } ) diff --git a/config/envoy.template.yaml b/config/envoy.template.yaml index a780c3f1..c2dd5ed0 100644 --- a/config/envoy.template.yaml +++ b/config/envoy.template.yaml @@ -594,13 +594,13 @@ static_resources: clusters: - - name: arch + - name: plano connect_timeout: {{ upstream_connect_timeout | default('5s') }} type: LOGICAL_DNS dns_lookup_family: V4_ONLY lb_policy: ROUND_ROBIN load_assignment: - cluster_name: arch + cluster_name: plano endpoints: - lb_endpoints: - endpoint: diff --git a/config/plano_config_schema.yaml b/config/plano_config_schema.yaml index b63cb824..e204e298 100644 --- a/config/plano_config_schema.yaml +++ b/config/plano_config_schema.yaml @@ -173,7 +173,7 @@ properties: provider_interface: type: string enum: - - arch + - plano - claude - deepseek - groq @@ -220,7 +220,7 @@ properties: provider_interface: type: string enum: - - arch + - plano - claude - deepseek - groq @@ -271,6 +271,12 @@ properties: upstream_tls_ca_path: type: string description: "Path to the trusted CA bundle for upstream TLS verification. Default is '/etc/ssl/certs/ca-certificates.crt'." + llm_routing_model: + type: string + description: "Model name for the LLM router (e.g., 'Arch-Router'). Must match a model in model_providers." + agent_orchestration_model: + type: string + description: "Model name for the agent orchestrator (e.g., 'Plano-Orchestrator'). Must match a model in model_providers." system_prompt: type: string prompt_targets: @@ -408,14 +414,6 @@ properties: enum: - llm - prompt - routing: - type: object - properties: - llm_provider: - type: string - model: - type: string - additionalProperties: false state_storage: type: object properties: diff --git a/crates/brightstaff/src/handlers/agent_selector.rs b/crates/brightstaff/src/handlers/agent_selector.rs index faa734ee..a1b38b2c 100644 --- a/crates/brightstaff/src/handlers/agent_selector.rs +++ b/crates/brightstaff/src/handlers/agent_selector.rs @@ -178,6 +178,7 @@ mod tests { Arc::new(OrchestratorService::new( "http://localhost:8080".to_string(), "test-model".to_string(), + "plano-orchestrator".to_string(), )) } diff --git a/crates/brightstaff/src/handlers/integration_tests.rs b/crates/brightstaff/src/handlers/integration_tests.rs index 70b2999d..b440e198 100644 --- a/crates/brightstaff/src/handlers/integration_tests.rs +++ b/crates/brightstaff/src/handlers/integration_tests.rs @@ -23,6 +23,7 @@ mod tests { Arc::new(OrchestratorService::new( "http://localhost:8080".to_string(), "test-model".to_string(), + "plano-orchestrator".to_string(), )) } diff --git a/crates/brightstaff/src/main.rs b/crates/brightstaff/src/main.rs index 51c9127f..b9faafbf 100644 --- a/crates/brightstaff/src/main.rs +++ b/crates/brightstaff/src/main.rs @@ -11,9 +11,7 @@ use brightstaff::state::StateStorage; use brightstaff::utils::tracing::init_tracer; use bytes::Bytes; use common::configuration::{Agent, Configuration}; -use common::consts::{ - CHAT_COMPLETIONS_PATH, MESSAGES_PATH, OPENAI_RESPONSES_API_PATH, PLANO_ORCHESTRATOR_MODEL_NAME, -}; +use common::consts::{CHAT_COMPLETIONS_PATH, MESSAGES_PATH, OPENAI_RESPONSES_API_PATH}; use common::llm_providers::LlmProviders; use http_body_util::{combinators::BoxBody, BodyExt, Empty}; use hyper::body::Incoming; @@ -35,6 +33,8 @@ pub mod router; const BIND_ADDRESS: &str = "0.0.0.0:9091"; const DEFAULT_ROUTING_LLM_PROVIDER: &str = "arch-router"; const DEFAULT_ROUTING_MODEL_NAME: &str = "Arch-Router"; +const DEFAULT_ORCHESTRATOR_LLM_PROVIDER: &str = "plano-orchestrator"; +const DEFAULT_ORCHESTRATOR_MODEL_NAME: &str = "Plano-Orchestrator"; // Utility function to extract the context from the incoming request headers fn extract_context_from_request(req: &Request) -> Context { @@ -90,16 +90,21 @@ async fn main() -> Result<(), Box> { env::var("LLM_PROVIDER_ENDPOINT").unwrap_or_else(|_| "http://localhost:12001".to_string()); let listener = TcpListener::bind(bind_address).await?; - let routing_model_name: String = plano_config - .routing - .as_ref() - .and_then(|r| r.model.clone()) - .unwrap_or_else(|| DEFAULT_ROUTING_MODEL_NAME.to_string()); + let overrides = plano_config.overrides.clone().unwrap_or_default(); + + // Strip provider prefix (e.g. "arch/") to get the model ID used in upstream requests + let routing_model_name: String = overrides + .llm_routing_model + .as_deref() + .map(|m| m.split_once('/').map(|(_, id)| id).unwrap_or(m)) + .unwrap_or(DEFAULT_ROUTING_MODEL_NAME) + .to_string(); let routing_llm_provider = plano_config - .routing - .as_ref() - .and_then(|r| r.model_provider.clone()) + .model_providers + .iter() + .find(|p| p.model.as_deref() == Some(routing_model_name.as_str())) + .map(|p| p.name.clone()) .unwrap_or_else(|| DEFAULT_ROUTING_LLM_PROVIDER.to_string()); let router_service: Arc = Arc::new(RouterService::new( @@ -109,9 +114,25 @@ async fn main() -> Result<(), Box> { routing_llm_provider, )); + // Strip provider prefix (e.g. "arch/") to get the model ID used in upstream requests + let orchestrator_model_name: String = overrides + .agent_orchestration_model + .as_deref() + .map(|m| m.split_once('/').map(|(_, id)| id).unwrap_or(m)) + .unwrap_or(DEFAULT_ORCHESTRATOR_MODEL_NAME) + .to_string(); + + let orchestrator_llm_provider: String = plano_config + .model_providers + .iter() + .find(|p| p.model.as_deref() == Some(orchestrator_model_name.as_str())) + .map(|p| p.name.clone()) + .unwrap_or_else(|| DEFAULT_ORCHESTRATOR_LLM_PROVIDER.to_string()); + let orchestrator_service: Arc = Arc::new(OrchestratorService::new( format!("{llm_provider_url}{CHAT_COMPLETIONS_PATH}"), - PLANO_ORCHESTRATOR_MODEL_NAME.to_string(), + orchestrator_model_name, + orchestrator_llm_provider, )); let model_aliases = Arc::new(plano_config.model_aliases.clone()); diff --git a/crates/brightstaff/src/router/plano_orchestrator.rs b/crates/brightstaff/src/router/plano_orchestrator.rs index cf2688b9..12140570 100644 --- a/crates/brightstaff/src/router/plano_orchestrator.rs +++ b/crates/brightstaff/src/router/plano_orchestrator.rs @@ -2,7 +2,7 @@ use std::{collections::HashMap, sync::Arc}; use common::{ configuration::{AgentUsagePreference, OrchestrationPreference}, - consts::{ARCH_PROVIDER_HINT_HEADER, PLANO_ORCHESTRATOR_MODEL_NAME, REQUEST_ID_HEADER}, + consts::{ARCH_PROVIDER_HINT_HEADER, REQUEST_ID_HEADER}, }; use hermesllm::apis::openai::{ChatCompletionsResponse, Message}; use hyper::header; @@ -19,6 +19,7 @@ pub struct OrchestratorService { orchestrator_url: String, client: reqwest::Client, orchestrator_model: Arc, + orchestrator_provider_name: String, } #[derive(Debug, Error)] @@ -36,7 +37,11 @@ pub enum OrchestrationError { pub type Result = std::result::Result; impl OrchestratorService { - pub fn new(orchestrator_url: String, orchestration_model_name: String) -> Self { + pub fn new( + orchestrator_url: String, + orchestration_model_name: String, + orchestrator_provider_name: String, + ) -> Self { // Empty agent orchestrations - will be provided via usage_preferences in requests let agent_orchestrations: HashMap> = HashMap::new(); @@ -50,6 +55,7 @@ impl OrchestratorService { orchestrator_url, client: reqwest::Client::new(), orchestrator_model, + orchestrator_provider_name, } } @@ -75,12 +81,12 @@ impl OrchestratorService { debug!( model = %self.orchestrator_model.get_model_name(), endpoint = %self.orchestrator_url, - "sending request to arch-orchestrator" + "sending request to plano-orchestrator" ); debug!( body = %serde_json::to_string(&orchestrator_request).unwrap(), - "arch orchestrator request" + "plano orchestrator request" ); let mut orchestration_request_headers = header::HeaderMap::new(); @@ -91,7 +97,7 @@ impl OrchestratorService { orchestration_request_headers.insert( header::HeaderName::from_static(ARCH_PROVIDER_HINT_HEADER), - header::HeaderValue::from_str(PLANO_ORCHESTRATOR_MODEL_NAME).unwrap(), + header::HeaderValue::from_str(&self.orchestrator_provider_name).unwrap(), ); // Inject OpenTelemetry trace context from current span @@ -110,7 +116,7 @@ impl OrchestratorService { orchestration_request_headers.insert( header::HeaderName::from_static("model"), - header::HeaderValue::from_static(PLANO_ORCHESTRATOR_MODEL_NAME), + header::HeaderValue::from_str(&self.orchestrator_provider_name).unwrap(), ); let start_time = std::time::Instant::now(); diff --git a/crates/common/src/configuration.rs b/crates/common/src/configuration.rs index f4e2b7b4..12c7d73f 100644 --- a/crates/common/src/configuration.rs +++ b/crates/common/src/configuration.rs @@ -7,12 +7,6 @@ use crate::api::open_ai::{ ChatCompletionTool, FunctionDefinition, FunctionParameter, FunctionParameters, ParameterType, }; -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct Routing { - pub model_provider: Option, - pub model: Option, -} - #[derive(Debug, Clone, Serialize, Deserialize)] pub struct ModelAlias { pub target: String, @@ -72,7 +66,6 @@ pub struct Configuration { pub ratelimits: Option>, pub tracing: Option, pub mode: Option, - pub routing: Option, pub agents: Option>, pub filters: Option>, pub listeners: Vec, @@ -84,6 +77,8 @@ pub struct Overrides { pub prompt_target_intent_matching_threshold: Option, pub optimize_context_window: Option, pub use_agent_orchestrator: Option, + pub llm_routing_model: Option, + pub agent_orchestration_model: Option, } #[derive(Debug, Clone, Serialize, Deserialize, Default)] @@ -207,8 +202,6 @@ pub struct EmbeddingProviver { #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)] pub enum LlmProviderType { - #[serde(rename = "arch")] - Arch, #[serde(rename = "anthropic")] Anthropic, #[serde(rename = "deepseek")] @@ -237,12 +230,13 @@ pub enum LlmProviderType { Qwen, #[serde(rename = "amazon_bedrock")] AmazonBedrock, + #[serde(rename = "plano")] + Plano, } impl Display for LlmProviderType { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - LlmProviderType::Arch => write!(f, "arch"), LlmProviderType::Anthropic => write!(f, "anthropic"), LlmProviderType::Deepseek => write!(f, "deepseek"), LlmProviderType::Groq => write!(f, "groq"), @@ -257,6 +251,7 @@ impl Display for LlmProviderType { LlmProviderType::Zhipu => write!(f, "zhipu"), LlmProviderType::Qwen => write!(f, "qwen"), LlmProviderType::AmazonBedrock => write!(f, "amazon_bedrock"), + LlmProviderType::Plano => write!(f, "plano"), } } } @@ -591,14 +586,14 @@ mod test { }, LlmProvider { name: "arch-router".to_string(), - provider_interface: LlmProviderType::Arch, + provider_interface: LlmProviderType::Plano, model: Some("Arch-Router".to_string()), internal: Some(true), ..Default::default() }, LlmProvider { name: "plano-orchestrator".to_string(), - provider_interface: LlmProviderType::Arch, + provider_interface: LlmProviderType::Plano, model: Some("Plano-Orchestrator".to_string()), internal: Some(true), ..Default::default() diff --git a/crates/common/src/consts.rs b/crates/common/src/consts.rs index cafc8e80..dbd0bc41 100644 --- a/crates/common/src/consts.rs +++ b/crates/common/src/consts.rs @@ -33,5 +33,4 @@ pub const OTEL_COLLECTOR_HTTP: &str = "opentelemetry_collector_http"; pub const LLM_ROUTE_HEADER: &str = "x-arch-llm-route"; pub const ENVOY_RETRY_HEADER: &str = "x-envoy-max-retries"; pub const BRIGHT_STAFF_SERVICE_NAME: &str = "brightstaff"; -pub const PLANO_ORCHESTRATOR_MODEL_NAME: &str = "Plano-Orchestrator"; -pub const ARCH_FC_CLUSTER: &str = "arch"; +pub const PLANO_FC_CLUSTER: &str = "plano"; diff --git a/crates/hermesllm/src/lib.rs b/crates/hermesllm/src/lib.rs index 997fc72a..3b9611e0 100644 --- a/crates/hermesllm/src/lib.rs +++ b/crates/hermesllm/src/lib.rs @@ -35,7 +35,7 @@ mod tests { ProviderId::Mistral ); assert_eq!(ProviderId::try_from("groq").unwrap(), ProviderId::Groq); - assert_eq!(ProviderId::try_from("arch").unwrap(), ProviderId::Arch); + assert_eq!(ProviderId::try_from("plano").unwrap(), ProviderId::Plano); // Test aliases assert_eq!(ProviderId::try_from("google").unwrap(), ProviderId::Gemini); diff --git a/crates/hermesllm/src/providers/id.rs b/crates/hermesllm/src/providers/id.rs index 11008711..9f5f42c9 100644 --- a/crates/hermesllm/src/providers/id.rs +++ b/crates/hermesllm/src/providers/id.rs @@ -34,7 +34,7 @@ pub enum ProviderId { Gemini, Anthropic, GitHub, - Arch, + Plano, AzureOpenAI, XAI, TogetherAI, @@ -58,7 +58,7 @@ impl TryFrom<&str> for ProviderId { "google" => Ok(ProviderId::Gemini), // alias "anthropic" => Ok(ProviderId::Anthropic), "github" => Ok(ProviderId::GitHub), - "arch" => Ok(ProviderId::Arch), + "plano" => Ok(ProviderId::Plano), "azure_openai" => Ok(ProviderId::AzureOpenAI), "xai" => Ok(ProviderId::XAI), "together_ai" => Ok(ProviderId::TogetherAI), @@ -135,7 +135,7 @@ impl ProviderId { | ProviderId::Groq | ProviderId::Mistral | ProviderId::Deepseek - | ProviderId::Arch + | ProviderId::Plano | ProviderId::Gemini | ProviderId::GitHub | ProviderId::AzureOpenAI @@ -153,7 +153,7 @@ impl ProviderId { | ProviderId::Groq | ProviderId::Mistral | ProviderId::Deepseek - | ProviderId::Arch + | ProviderId::Plano | ProviderId::Gemini | ProviderId::GitHub | ProviderId::AzureOpenAI @@ -219,7 +219,7 @@ impl Display for ProviderId { ProviderId::Gemini => write!(f, "Gemini"), ProviderId::Anthropic => write!(f, "Anthropic"), ProviderId::GitHub => write!(f, "GitHub"), - ProviderId::Arch => write!(f, "Arch"), + ProviderId::Plano => write!(f, "Plano"), ProviderId::AzureOpenAI => write!(f, "azure_openai"), ProviderId::XAI => write!(f, "xai"), ProviderId::TogetherAI => write!(f, "together_ai"), diff --git a/crates/llm_gateway/src/stream_context.rs b/crates/llm_gateway/src/stream_context.rs index 7a353bcb..f62631fa 100644 --- a/crates/llm_gateway/src/stream_context.rs +++ b/crates/llm_gateway/src/stream_context.rs @@ -873,7 +873,7 @@ impl HttpContext for StreamContext { // ensure that the provider has an endpoint if the access key is missing else return a bad request if self.llm_provider.as_ref().unwrap().endpoint.is_none() && self.llm_provider.as_ref().unwrap().provider_interface - != LlmProviderType::Arch + != LlmProviderType::Plano { self.send_server_error(error, Some(StatusCode::BAD_REQUEST)); } diff --git a/demos/agent_orchestration/travel_agents/README.md b/demos/agent_orchestration/travel_agents/README.md index 7886539d..239ba938 100644 --- a/demos/agent_orchestration/travel_agents/README.md +++ b/demos/agent_orchestration/travel_agents/README.md @@ -123,6 +123,42 @@ Each agent: Both agents run as native local processes and communicate with Plano running natively on the host. +## Running with local Plano-Orchestrator (via vLLM) + +By default, Plano uses a hosted Plano-Orchestrator endpoint. To self-host the orchestrator model locally using vLLM on a server with an NVIDIA GPU: + +1. Install vLLM and download the model: +```bash +pip install vllm +``` + +2. Start the vLLM server with the 4B model: +```bash +vllm serve katanemo/Plano-Orchestrator-4B \ + --host 0.0.0.0 \ + --port 8000 \ + --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.3 \ + --tokenizer katanemo/Plano-Orchestrator-4B \ + --chat-template chat_template.jinja \ + --served-model-name katanemo/Plano-Orchestrator-4B \ + --enable-prefix-caching +``` + +3. Start the demo with the local orchestrator config: +```bash +./run_demo.sh --local-orchestrator +``` + +4. Test with curl: +```bash +curl -X POST http://localhost:8001/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{"model": "gpt-5.2", "messages": [{"role": "user", "content": "What is the weather in Istanbul?"}]}' +``` + +You should see Plano use your local orchestrator to route the request to the weather agent. + ## Observability This demo includes full OpenTelemetry (OTel) compatible distributed tracing to monitor and debug agent interactions: diff --git a/demos/agent_orchestration/travel_agents/config_local_orchestrator.yaml b/demos/agent_orchestration/travel_agents/config_local_orchestrator.yaml new file mode 100644 index 00000000..1d3a0be8 --- /dev/null +++ b/demos/agent_orchestration/travel_agents/config_local_orchestrator.yaml @@ -0,0 +1,66 @@ +version: v0.3.0 + +overrides: + agent_orchestration_model: plano/katanemo/Plano-Orchestrator-4B + +agents: + - id: weather_agent + url: http://localhost:10510 + - id: flight_agent + url: http://localhost:10520 + +model_providers: + - model: plano/katanemo/Plano-Orchestrator-4B + base_url: http://localhost:8000 + + - model: openai/gpt-5.2 + access_key: $OPENAI_API_KEY + default: true + - model: openai/gpt-4o-mini + access_key: $OPENAI_API_KEY # smaller, faster, cheaper model for extracting entities like location + +listeners: + - type: agent + name: travel_booking_service + port: 8001 + router: plano_orchestrator_v1 + agents: + - id: weather_agent + description: | + + WeatherAgent is a specialized AI assistant for real-time weather information and forecasts. It provides accurate weather data for any city worldwide using the Open-Meteo API, helping travelers plan their trips with up-to-date weather conditions. + + Capabilities: + * Get real-time weather conditions and multi-day forecasts for any city worldwide using Open-Meteo API (free, no API key needed) + * Provides current temperature + * Provides multi-day forecasts + * Provides weather conditions + * Provides sunrise/sunset times + * Provides detailed weather information + * Understands conversation context to resolve location references from previous messages + * Handles weather-related questions including "What's the weather in [city]?", "What's the forecast for [city]?", "How's the weather in [city]?" + * When queries include both weather and other travel questions (e.g., flights, currency), this agent answers ONLY the weather part + + - id: flight_agent + description: | + + FlightAgent is an AI-powered tool specialized in providing live flight information between airports. It leverages the FlightAware AeroAPI to deliver real-time flight status, gate information, and delay updates. + + Capabilities: + * Get live flight information between airports using FlightAware AeroAPI + * Shows real-time flight status + * Shows scheduled/estimated/actual departure and arrival times + * Shows gate and terminal information + * Shows delays + * Shows aircraft type + * Shows flight status + * Automatically resolves city names to airport codes (IATA/ICAO) + * Understands conversation context to infer origin/destination from follow-up questions + * Handles flight-related questions including "What flights go from [city] to [city]?", "Do flights go to [city]?", "Are there direct flights from [city]?" + * When queries include both flight and other travel questions (e.g., weather, currency), this agent answers ONLY the flight part + +tracing: + random_sampling: 100 + span_attributes: + header_prefixes: + - x-acme- diff --git a/demos/agent_orchestration/travel_agents/run_demo.sh b/demos/agent_orchestration/travel_agents/run_demo.sh index 643a0aa2..35166b85 100755 --- a/demos/agent_orchestration/travel_agents/run_demo.sh +++ b/demos/agent_orchestration/travel_agents/run_demo.sh @@ -31,8 +31,13 @@ start_demo() { fi # Step 4: Start Plano - echo "Starting Plano with config.yaml..." - planoai up config.yaml + PLANO_CONFIG="config.yaml" + if [ "$1" == "--local-orchestrator" ]; then + PLANO_CONFIG="config_local_orchestrator.yaml" + echo "Using local orchestrator config..." + fi + echo "Starting Plano with $PLANO_CONFIG..." + planoai up "$PLANO_CONFIG" # Step 5: Start agents natively echo "Starting agents..." diff --git a/demos/llm_routing/openclaw_routing/config.yaml b/demos/llm_routing/openclaw_routing/config.yaml index 3106b5dd..9690e747 100644 --- a/demos/llm_routing/openclaw_routing/config.yaml +++ b/demos/llm_routing/openclaw_routing/config.yaml @@ -1,8 +1,7 @@ version: v0.1.0 -routing: - model: Arch-Router - llm_provider: arch-router +overrides: + llm_routing_model: Arch-Router listeners: egress_traffic: diff --git a/demos/llm_routing/preference_based_routing/plano_config_local.yaml b/demos/llm_routing/preference_based_routing/plano_config_local.yaml index dbd287dd..01adb097 100644 --- a/demos/llm_routing/preference_based_routing/plano_config_local.yaml +++ b/demos/llm_routing/preference_based_routing/plano_config_local.yaml @@ -1,8 +1,7 @@ version: v0.3.0 -routing: - model: Arch-Router - llm_provider: arch-router +overrides: + llm_routing_model: plano/hf.co/katanemo/Arch-Router-1.5B.gguf:Q4_K_M listeners: - type: model @@ -11,8 +10,7 @@ listeners: model_providers: - - name: arch-router - model: arch/hf.co/katanemo/Arch-Router-1.5B.gguf:Q4_K_M + - model: plano/hf.co/katanemo/Arch-Router-1.5B.gguf:Q4_K_M base_url: http://localhost:11434 - model: openai/gpt-4o-mini diff --git a/docs/source/guides/llm_router.rst b/docs/source/guides/llm_router.rst index 41c51b4a..0073a664 100644 --- a/docs/source/guides/llm_router.rst +++ b/docs/source/guides/llm_router.rst @@ -253,13 +253,11 @@ Using Ollama (recommended for local development) .. code-block:: yaml - routing: - model: Arch-Router - llm_provider: arch-router + overrides: + llm_routing_model: plano/hf.co/katanemo/Arch-Router-1.5B.gguf:Q4_K_M model_providers: - - name: arch-router - model: arch/hf.co/katanemo/Arch-Router-1.5B.gguf:Q4_K_M + - model: plano/hf.co/katanemo/Arch-Router-1.5B.gguf:Q4_K_M base_url: http://localhost:11434 - model: openai/gpt-5.2 @@ -324,13 +322,11 @@ vLLM provides higher throughput and GPU optimizations suitable for production de .. code-block:: yaml - routing: - model: Arch-Router - llm_provider: arch-router + overrides: + llm_routing_model: plano/Arch-Router model_providers: - - name: arch-router - model: Arch-Router + - model: plano/Arch-Router base_url: http://:10000 - model: openai/gpt-5.2 diff --git a/docs/source/guides/orchestration.rst b/docs/source/guides/orchestration.rst index 3170b65f..1a153e83 100644 --- a/docs/source/guides/orchestration.rst +++ b/docs/source/guides/orchestration.rst @@ -335,6 +335,90 @@ Combine RAG agents for documentation lookup with specialized troubleshooting age - id: troubleshoot_agent description: Diagnoses and resolves technical issues step by step +Self-hosting Plano-Orchestrator +------------------------------- + +By default, Plano uses a hosted Plano-Orchestrator endpoint. To self-host the orchestrator model, you can serve it using **vLLM** on a server with an NVIDIA GPU. + +.. note:: + vLLM requires a Linux server with an NVIDIA GPU (CUDA). For local development on macOS, a GGUF version for Ollama is coming soon. + +The following model variants are available on HuggingFace: + +* `Plano-Orchestrator-4B `_ — lighter model, suitable for development and testing +* `Plano-Orchestrator-4B-FP8 `_ — FP8 quantized 4B model, lower memory usage +* `Plano-Orchestrator-30B-A3B `_ — full-size model for production +* `Plano-Orchestrator-30B-A3B-FP8 `_ — FP8 quantized 30B model, recommended for production deployments + +Using vLLM +~~~~~~~~~~ + +1. **Install vLLM** + + .. code-block:: bash + + pip install vllm + +2. **Download the model and chat template** + + .. code-block:: bash + + pip install huggingface_hub + huggingface-cli download katanemo/Plano-Orchestrator-4B + +3. **Start the vLLM server** + + For the 4B model (development): + + .. code-block:: bash + + vllm serve katanemo/Plano-Orchestrator-4B \ + --host 0.0.0.0 \ + --port 8000 \ + --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.3 \ + --tokenizer katanemo/Plano-Orchestrator-4B \ + --chat-template chat_template.jinja \ + --served-model-name katanemo/Plano-Orchestrator-4B \ + --enable-prefix-caching + + For the 30B-A3B-FP8 model (production): + + .. code-block:: bash + + vllm serve katanemo/Plano-Orchestrator-30B-A3B-FP8 \ + --host 0.0.0.0 \ + --port 8000 \ + --tensor-parallel-size 1 \ + --gpu-memory-utilization 0.9 \ + --tokenizer katanemo/Plano-Orchestrator-30B-A3B-FP8 \ + --chat-template chat_template.jinja \ + --max-model-len 32768 \ + --served-model-name katanemo/Plano-Orchestrator-30B-A3B-FP8 \ + --enable-prefix-caching + +4. **Configure Plano to use the local orchestrator** + + Use the model name matching your ``--served-model-name``: + + .. code-block:: yaml + + overrides: + agent_orchestration_model: plano/katanemo/Plano-Orchestrator-4B + + model_providers: + - model: katanemo/Plano-Orchestrator-4B + provider_interface: plano + base_url: http://:8000 + +5. **Verify the server is running** + + .. code-block:: bash + + curl http://localhost:8000/health + curl http://localhost:8000/v1/models + + Next Steps ---------- diff --git a/docs/source/resources/includes/plano_config_full_reference_rendered.yaml b/docs/source/resources/includes/plano_config_full_reference_rendered.yaml index 9717b53a..64ee1f91 100644 --- a/docs/source/resources/includes/plano_config_full_reference_rendered.yaml +++ b/docs/source/resources/includes/plano_config_full_reference_rendered.yaml @@ -107,11 +107,11 @@ model_providers: - internal: true model: Arch-Function name: arch-function - provider_interface: arch + provider_interface: plano - internal: true model: Plano-Orchestrator - name: plano-orchestrator - provider_interface: arch + name: plano/orchestrator + provider_interface: plano prompt_targets: - description: Get current weather at a location. endpoint: