diff --git a/cli/planoai/config_generator.py b/cli/planoai/config_generator.py index 4bc9c4b7..6e47055d 100644 --- a/cli/planoai/config_generator.py +++ b/cli/planoai/config_generator.py @@ -8,13 +8,13 @@ from urllib.parse import urlparse from copy import deepcopy from planoai.consts import DEFAULT_OTEL_TRACING_GRPC_ENDPOINT - SUPPORTED_PROVIDERS_WITH_BASE_URL = [ "azure_openai", "ollama", "qwen", "amazon_bedrock", "arch", + "plano", ] SUPPORTED_PROVIDERS_WITHOUT_BASE_URL = [ @@ -368,29 +368,25 @@ def validate_and_render_schema(): llms_with_endpoint.append(model_provider) llms_with_endpoint_cluster_names.add(cluster_name) - if len(model_usage_name_keys) > 0: - routing_model_provider = config_yaml.get("routing", {}).get( - "model_provider", None + overrides_config = config_yaml.get("overrides", {}) + # Build lookup of model names (already prefix-stripped by config processing) + model_name_set = {mp.get("model") for mp in updated_model_providers} + + # Auto-add arch-router provider if routing preferences exist and no provider matches the router model + router_model = overrides_config.get("router_model", "Arch-Router") + # Strip provider prefix for comparison since config processing strips prefixes from model names + router_model_id = ( + router_model.split("/", 1)[1] if "/" in router_model else router_model + ) + if len(model_usage_name_keys) > 0 and router_model_id not in model_name_set: + updated_model_providers.append( + { + "name": "arch-router", + "provider_interface": "arch", + "model": router_model_id, + "internal": True, + } ) - if ( - routing_model_provider - and routing_model_provider not in model_provider_name_set - ): - raise Exception( - f"Routing model_provider {routing_model_provider} is not defined in model_providers" - ) - if ( - routing_model_provider is None - and "arch-router" not in model_provider_name_set - ): - updated_model_providers.append( - { - "name": "arch-router", - "provider_interface": "arch", - "model": config_yaml.get("routing", {}).get("model", "Arch-Router"), - "internal": True, - } - ) # Always add arch-function model provider if not already defined if "arch-function" not in model_provider_name_set: @@ -403,26 +399,21 @@ def validate_and_render_schema(): } ) - orchestration_config = config_yaml.get("orchestration", {}) - orchestration_model_provider = orchestration_config.get("llm_provider", None) - - if ( - orchestration_model_provider - and orchestration_model_provider not in model_provider_name_set - ): - raise Exception( - f"Orchestration llm_provider {orchestration_model_provider} is not defined in model_providers" - ) - - if ( - orchestration_model_provider is None - and "plano-orchestrator" not in model_provider_name_set - ): + # Auto-add plano-orchestrator provider if no provider matches the orchestrator model + orchestrator_model = overrides_config.get( + "orchestrator_model", "Plano-Orchestrator" + ) + orchestrator_model_id = ( + orchestrator_model.split("/", 1)[1] + if "/" in orchestrator_model + else orchestrator_model + ) + if orchestrator_model_id not in model_name_set: updated_model_providers.append( { "name": "plano-orchestrator", "provider_interface": "arch", - "model": orchestration_config.get("model", "Plano-Orchestrator"), + "model": orchestrator_model_id, "internal": True, } ) diff --git a/config/plano_config_schema.yaml b/config/plano_config_schema.yaml index d2293650..00533596 100644 --- a/config/plano_config_schema.yaml +++ b/config/plano_config_schema.yaml @@ -174,6 +174,7 @@ properties: type: string enum: - arch + - plano - claude - deepseek - groq @@ -221,6 +222,7 @@ properties: type: string enum: - arch + - plano - claude - deepseek - groq @@ -271,6 +273,12 @@ properties: upstream_tls_ca_path: type: string description: "Path to the trusted CA bundle for upstream TLS verification. Default is '/etc/ssl/certs/ca-certificates.crt'." + router_model: + type: string + description: "Model name for the LLM router (e.g., 'Arch-Router'). Must match a model in model_providers." + orchestrator_model: + type: string + description: "Model name for the agent orchestrator (e.g., 'Plano-Orchestrator'). Must match a model in model_providers." system_prompt: type: string prompt_targets: @@ -408,22 +416,6 @@ properties: enum: - llm - prompt - routing: - type: object - properties: - llm_provider: - type: string - model: - type: string - additionalProperties: false - orchestration: - type: object - properties: - llm_provider: - type: string - model: - type: string - additionalProperties: false state_storage: type: object properties: diff --git a/crates/brightstaff/src/main.rs b/crates/brightstaff/src/main.rs index 025ff545..18f01cab 100644 --- a/crates/brightstaff/src/main.rs +++ b/crates/brightstaff/src/main.rs @@ -90,16 +90,21 @@ async fn main() -> Result<(), Box> { env::var("LLM_PROVIDER_ENDPOINT").unwrap_or_else(|_| "http://localhost:12001".to_string()); let listener = TcpListener::bind(bind_address).await?; - let routing_model_name: String = plano_config - .routing - .as_ref() - .and_then(|r| r.model.clone()) - .unwrap_or_else(|| DEFAULT_ROUTING_MODEL_NAME.to_string()); + let overrides = plano_config.overrides.clone().unwrap_or_default(); + + // Strip provider prefix (e.g. "arch/") to get the model ID used in upstream requests + let routing_model_name: String = overrides + .router_model + .as_deref() + .map(|m| m.split_once('/').map(|(_, id)| id).unwrap_or(m)) + .unwrap_or(DEFAULT_ROUTING_MODEL_NAME) + .to_string(); let routing_llm_provider = plano_config - .routing - .as_ref() - .and_then(|r| r.model_provider.clone()) + .model_providers + .iter() + .find(|p| p.model.as_deref() == Some(routing_model_name.as_str())) + .map(|p| p.name.clone()) .unwrap_or_else(|| DEFAULT_ROUTING_LLM_PROVIDER.to_string()); let router_service: Arc = Arc::new(RouterService::new( @@ -109,16 +114,19 @@ async fn main() -> Result<(), Box> { routing_llm_provider, )); - let orchestrator_model_name: String = plano_config - .orchestration - .as_ref() - .and_then(|o| o.model.clone()) - .unwrap_or_else(|| DEFAULT_ORCHESTRATOR_MODEL_NAME.to_string()); + // Strip provider prefix (e.g. "arch/") to get the model ID used in upstream requests + let orchestrator_model_name: String = overrides + .orchestrator_model + .as_deref() + .map(|m| m.split_once('/').map(|(_, id)| id).unwrap_or(m)) + .unwrap_or(DEFAULT_ORCHESTRATOR_MODEL_NAME) + .to_string(); let orchestrator_llm_provider: String = plano_config - .orchestration - .as_ref() - .and_then(|o| o.model_provider.clone()) + .model_providers + .iter() + .find(|p| p.model.as_deref() == Some(orchestrator_model_name.as_str())) + .map(|p| p.name.clone()) .unwrap_or_else(|| DEFAULT_ORCHESTRATOR_LLM_PROVIDER.to_string()); let orchestrator_service: Arc = Arc::new(OrchestratorService::new( diff --git a/crates/common/src/configuration.rs b/crates/common/src/configuration.rs index 948526b2..2d7289c5 100644 --- a/crates/common/src/configuration.rs +++ b/crates/common/src/configuration.rs @@ -7,18 +7,6 @@ use crate::api::open_ai::{ ChatCompletionTool, FunctionDefinition, FunctionParameter, FunctionParameters, ParameterType, }; -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct Routing { - pub model_provider: Option, - pub model: Option, -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct Orchestration { - pub model_provider: Option, - pub model: Option, -} - #[derive(Debug, Clone, Serialize, Deserialize)] pub struct ModelAlias { pub target: String, @@ -78,8 +66,6 @@ pub struct Configuration { pub ratelimits: Option>, pub tracing: Option, pub mode: Option, - pub routing: Option, - pub orchestration: Option, pub agents: Option>, pub filters: Option>, pub listeners: Vec, @@ -91,6 +77,8 @@ pub struct Overrides { pub prompt_target_intent_matching_threshold: Option, pub optimize_context_window: Option, pub use_agent_orchestrator: Option, + pub router_model: Option, + pub orchestrator_model: Option, } #[derive(Debug, Clone, Serialize, Deserialize, Default)] @@ -244,6 +232,8 @@ pub enum LlmProviderType { Qwen, #[serde(rename = "amazon_bedrock")] AmazonBedrock, + #[serde(rename = "plano")] + Plano, } impl Display for LlmProviderType { @@ -264,6 +254,7 @@ impl Display for LlmProviderType { LlmProviderType::Zhipu => write!(f, "zhipu"), LlmProviderType::Qwen => write!(f, "qwen"), LlmProviderType::AmazonBedrock => write!(f, "amazon_bedrock"), + LlmProviderType::Plano => write!(f, "plano"), } } } @@ -272,7 +263,15 @@ impl LlmProviderType { /// Get the ProviderId for this LlmProviderType /// Used with the new function-based hermesllm API pub fn to_provider_id(&self) -> hermesllm::ProviderId { - hermesllm::ProviderId::try_from(self.to_string().as_str()) + // Plano provider uses the same interface as Arch + let provider_str = match self { + LlmProviderType::Plano => "arch", + other => { + return hermesllm::ProviderId::try_from(other.to_string().as_str()) + .expect("LlmProviderType should always map to a valid ProviderId") + } + }; + hermesllm::ProviderId::try_from(provider_str) .expect("LlmProviderType should always map to a valid ProviderId") } } diff --git a/demos/agent_orchestration/travel_agents/config_local_orchestrator.yaml b/demos/agent_orchestration/travel_agents/config_local_orchestrator.yaml index babc9401..57a376e1 100644 --- a/demos/agent_orchestration/travel_agents/config_local_orchestrator.yaml +++ b/demos/agent_orchestration/travel_agents/config_local_orchestrator.yaml @@ -1,8 +1,7 @@ version: v0.3.0 -orchestration: - model: Plano-Orchestrator - llm_provider: plano-orchestrator +overrides: + orchestrator_model: arch/Plano-Orchestrator agents: - id: weather_agent @@ -11,8 +10,7 @@ agents: url: http://localhost:10520 model_providers: - - name: plano-orchestrator - model: Plano-Orchestrator + - model: arch/Plano-Orchestrator base_url: http://localhost:8000 - model: openai/gpt-5.2 diff --git a/demos/llm_routing/openclaw_routing/config.yaml b/demos/llm_routing/openclaw_routing/config.yaml index 3106b5dd..b8d183bc 100644 --- a/demos/llm_routing/openclaw_routing/config.yaml +++ b/demos/llm_routing/openclaw_routing/config.yaml @@ -1,8 +1,7 @@ version: v0.1.0 -routing: - model: Arch-Router - llm_provider: arch-router +overrides: + router_model: Arch-Router listeners: egress_traffic: diff --git a/demos/llm_routing/preference_based_routing/plano_config_local.yaml b/demos/llm_routing/preference_based_routing/plano_config_local.yaml index dbd287dd..6c2b375a 100644 --- a/demos/llm_routing/preference_based_routing/plano_config_local.yaml +++ b/demos/llm_routing/preference_based_routing/plano_config_local.yaml @@ -1,8 +1,7 @@ version: v0.3.0 -routing: - model: Arch-Router - llm_provider: arch-router +overrides: + router_model: plano/hf.co/katanemo/Arch-Router-1.5B.gguf:Q4_K_M listeners: - type: model @@ -11,8 +10,7 @@ listeners: model_providers: - - name: arch-router - model: arch/hf.co/katanemo/Arch-Router-1.5B.gguf:Q4_K_M + - model: plano/hf.co/katanemo/Arch-Router-1.5B.gguf:Q4_K_M base_url: http://localhost:11434 - model: openai/gpt-4o-mini diff --git a/docs/source/guides/llm_router.rst b/docs/source/guides/llm_router.rst index 41c51b4a..e1f58a0d 100644 --- a/docs/source/guides/llm_router.rst +++ b/docs/source/guides/llm_router.rst @@ -253,13 +253,11 @@ Using Ollama (recommended for local development) .. code-block:: yaml - routing: - model: Arch-Router - llm_provider: arch-router + overrides: + router_model: arch/hf.co/katanemo/Arch-Router-1.5B.gguf:Q4_K_M model_providers: - - name: arch-router - model: arch/hf.co/katanemo/Arch-Router-1.5B.gguf:Q4_K_M + - model: arch/hf.co/katanemo/Arch-Router-1.5B.gguf:Q4_K_M base_url: http://localhost:11434 - model: openai/gpt-5.2 @@ -324,13 +322,11 @@ vLLM provides higher throughput and GPU optimizations suitable for production de .. code-block:: yaml - routing: - model: Arch-Router - llm_provider: arch-router + overrides: + router_model: Arch-Router model_providers: - - name: arch-router - model: Arch-Router + - model: Arch-Router base_url: http://:10000 - model: openai/gpt-5.2 diff --git a/docs/source/guides/orchestration.rst b/docs/source/guides/orchestration.rst index 1d300f38..4b508658 100644 --- a/docs/source/guides/orchestration.rst +++ b/docs/source/guides/orchestration.rst @@ -401,13 +401,11 @@ Using vLLM .. code-block:: yaml - orchestration: - model: Plano-Orchestrator - llm_provider: plano-orchestrator + overrides: + orchestrator_model: arch/Plano-Orchestrator model_providers: - - name: plano-orchestrator - model: Plano-Orchestrator + - model: arch/Plano-Orchestrator base_url: http://:8000 5. **Verify the server is running**