diff --git a/.claude/skills/release/SKILL.md b/.claude/skills/release/SKILL.md
index 80510004..ba101bd3 100644
--- a/.claude/skills/release/SKILL.md
+++ b/.claude/skills/release/SKILL.md
@@ -25,4 +25,6 @@ Update the version string in ALL of these files:
Do NOT change version strings in `*.lock` files or `Cargo.lock`.
+After updating all version strings, run `cd cli && uv lock` to update the lock file with the new version.
+
After making changes, show a summary of all files modified and the old → new version.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 25e6f99d..01d5c33f 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -133,13 +133,13 @@ jobs:
load: true
tags: |
${{ env.PLANO_DOCKER_IMAGE }}
- ${{ env.DOCKER_IMAGE }}:0.4.11
+ ${{ env.DOCKER_IMAGE }}:0.4.12
${{ env.DOCKER_IMAGE }}:latest
cache-from: type=gha
cache-to: type=gha,mode=max
- name: Save image as artifact
- run: docker save ${{ env.PLANO_DOCKER_IMAGE }} ${{ env.DOCKER_IMAGE }}:0.4.11 ${{ env.DOCKER_IMAGE }}:latest -o /tmp/plano-image.tar
+ run: docker save ${{ env.PLANO_DOCKER_IMAGE }} ${{ env.DOCKER_IMAGE }}:0.4.12 ${{ env.DOCKER_IMAGE }}:latest -o /tmp/plano-image.tar
- name: Upload image artifact
uses: actions/upload-artifact@v6
diff --git a/.gitignore b/.gitignore
index af706ea4..391c17fa 100644
--- a/.gitignore
+++ b/.gitignore
@@ -152,3 +152,4 @@ apps/*/dist/
.cursor/
.agents
+docs/do/
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 84001c45..22a18416 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -4,6 +4,7 @@ repos:
hooks:
- id: check-yaml
exclude: config/envoy.template*
+ args: [--allow-multiple-documents]
- id: end-of-file-fixer
- id: trailing-whitespace
- repo: local
diff --git a/apps/www/src/components/Hero.tsx b/apps/www/src/components/Hero.tsx
index 7952c68f..fcfe5f01 100644
--- a/apps/www/src/components/Hero.tsx
+++ b/apps/www/src/components/Hero.tsx
@@ -24,7 +24,7 @@ export function Hero() {
>
- v0.4.11
+ v0.4.12
—
diff --git a/build_filter_image.sh b/build_filter_image.sh
index 8e041894..15d3d10e 100644
--- a/build_filter_image.sh
+++ b/build_filter_image.sh
@@ -1 +1 @@
-docker build -f Dockerfile . -t katanemo/plano -t katanemo/plano:0.4.11
+docker build -f Dockerfile . -t katanemo/plano -t katanemo/plano:0.4.12
diff --git a/cli/planoai/__init__.py b/cli/planoai/__init__.py
index b94eadc2..e69352e8 100644
--- a/cli/planoai/__init__.py
+++ b/cli/planoai/__init__.py
@@ -1,3 +1,3 @@
"""Plano CLI - Intelligent Prompt Gateway."""
-__version__ = "0.4.11"
+__version__ = "0.4.12"
diff --git a/cli/planoai/config_generator.py b/cli/planoai/config_generator.py
index a4f9eb21..929b7657 100644
--- a/cli/planoai/config_generator.py
+++ b/cli/planoai/config_generator.py
@@ -3,18 +3,17 @@ import os
from planoai.utils import convert_legacy_listeners
from jinja2 import Environment, FileSystemLoader
import yaml
-from jsonschema import validate
+from jsonschema import validate, ValidationError
from urllib.parse import urlparse
from copy import deepcopy
from planoai.consts import DEFAULT_OTEL_TRACING_GRPC_ENDPOINT
-
SUPPORTED_PROVIDERS_WITH_BASE_URL = [
"azure_openai",
"ollama",
"qwen",
"amazon_bedrock",
- "arch",
+ "plano",
]
SUPPORTED_PROVIDERS_WITHOUT_BASE_URL = [
@@ -368,47 +367,52 @@ def validate_and_render_schema():
llms_with_endpoint.append(model_provider)
llms_with_endpoint_cluster_names.add(cluster_name)
- if len(model_usage_name_keys) > 0:
- routing_model_provider = config_yaml.get("routing", {}).get(
- "model_provider", None
+ overrides_config = config_yaml.get("overrides", {})
+ # Build lookup of model names (already prefix-stripped by config processing)
+ model_name_set = {mp.get("model") for mp in updated_model_providers}
+
+ # Auto-add arch-router provider if routing preferences exist and no provider matches the router model
+ router_model = overrides_config.get("llm_routing_model", "Arch-Router")
+ # Strip provider prefix for comparison since config processing strips prefixes from model names
+ router_model_id = (
+ router_model.split("/", 1)[1] if "/" in router_model else router_model
+ )
+ if len(model_usage_name_keys) > 0 and router_model_id not in model_name_set:
+ updated_model_providers.append(
+ {
+ "name": "arch-router",
+ "provider_interface": "plano",
+ "model": router_model_id,
+ "internal": True,
+ }
)
- if (
- routing_model_provider
- and routing_model_provider not in model_provider_name_set
- ):
- raise Exception(
- f"Routing model_provider {routing_model_provider} is not defined in model_providers"
- )
- if (
- routing_model_provider is None
- and "arch-router" not in model_provider_name_set
- ):
- updated_model_providers.append(
- {
- "name": "arch-router",
- "provider_interface": "arch",
- "model": config_yaml.get("routing", {}).get("model", "Arch-Router"),
- "internal": True,
- }
- )
# Always add arch-function model provider if not already defined
if "arch-function" not in model_provider_name_set:
updated_model_providers.append(
{
"name": "arch-function",
- "provider_interface": "arch",
+ "provider_interface": "plano",
"model": "Arch-Function",
"internal": True,
}
)
- if "plano-orchestrator" not in model_provider_name_set:
+ # Auto-add plano-orchestrator provider if no provider matches the orchestrator model
+ orchestrator_model = overrides_config.get(
+ "agent_orchestration_model", "Plano-Orchestrator"
+ )
+ orchestrator_model_id = (
+ orchestrator_model.split("/", 1)[1]
+ if "/" in orchestrator_model
+ else orchestrator_model
+ )
+ if orchestrator_model_id not in model_name_set:
updated_model_providers.append(
{
- "name": "plano-orchestrator",
- "provider_interface": "arch",
- "model": "Plano-Orchestrator",
+ "name": "plano/orchestrator",
+ "provider_interface": "plano",
+ "model": orchestrator_model_id,
"internal": True,
}
)
@@ -513,11 +517,15 @@ def validate_prompt_config(plano_config_file, plano_config_schema_file):
try:
validate(config_yaml, config_schema_yaml)
- except Exception as e:
- print(
- f"Error validating plano_config file: {plano_config_file}, schema file: {plano_config_schema_file}, error: {e}"
+ except ValidationError as e:
+ path = (
+ " → ".join(str(p) for p in e.absolute_path) if e.absolute_path else "root"
)
- raise e
+ raise ValidationError(
+ f"{e.message}\n Location: {path}\n Value: {e.instance}"
+ ) from None
+ except Exception as e:
+ raise
if __name__ == "__main__":
diff --git a/cli/planoai/consts.py b/cli/planoai/consts.py
index 145fb640..9c330caa 100644
--- a/cli/planoai/consts.py
+++ b/cli/planoai/consts.py
@@ -5,7 +5,7 @@ PLANO_COLOR = "#969FF4"
SERVICE_NAME_ARCHGW = "plano"
PLANO_DOCKER_NAME = "plano"
-PLANO_DOCKER_IMAGE = os.getenv("PLANO_DOCKER_IMAGE", "katanemo/plano:0.4.11")
+PLANO_DOCKER_IMAGE = os.getenv("PLANO_DOCKER_IMAGE", "katanemo/plano:0.4.12")
DEFAULT_OTEL_TRACING_GRPC_ENDPOINT = "http://localhost:4317"
# Native mode constants
diff --git a/cli/planoai/native_runner.py b/cli/planoai/native_runner.py
index 0e39a1fd..ed44e8ad 100644
--- a/cli/planoai/native_runner.py
+++ b/cli/planoai/native_runner.py
@@ -420,9 +420,16 @@ def native_validate_config(plano_config_file):
with _temporary_env(overrides):
from planoai.config_generator import validate_and_render_schema
- # Suppress verbose print output from config_generator
- with contextlib.redirect_stdout(io.StringIO()):
- validate_and_render_schema()
+ # Suppress verbose print output from config_generator but capture errors
+ captured = io.StringIO()
+ try:
+ with contextlib.redirect_stdout(captured):
+ validate_and_render_schema()
+ except SystemExit:
+ # validate_and_render_schema calls exit(1) on failure after
+ # printing to stdout; re-raise so the caller gets a useful message.
+ output = captured.getvalue().strip()
+ raise Exception(output) if output else Exception("Config validation failed")
def native_logs(debug=False, follow=False):
diff --git a/cli/pyproject.toml b/cli/pyproject.toml
index 3f9be272..25cc81a4 100644
--- a/cli/pyproject.toml
+++ b/cli/pyproject.toml
@@ -1,6 +1,6 @@
[project]
name = "planoai"
-version = "0.4.11"
+version = "0.4.12"
description = "Python-based CLI tool to manage Plano."
authors = [{name = "Katanemo Labs, Inc."}]
readme = "README.md"
diff --git a/cli/uv.lock b/cli/uv.lock
index 9d85bf85..dfca2484 100644
--- a/cli/uv.lock
+++ b/cli/uv.lock
@@ -337,7 +337,7 @@ wheels = [
[[package]]
name = "planoai"
-version = "0.4.9"
+version = "0.4.12"
source = { editable = "." }
dependencies = [
{ name = "click" },
diff --git a/config/envoy.template.yaml b/config/envoy.template.yaml
index a780c3f1..c2dd5ed0 100644
--- a/config/envoy.template.yaml
+++ b/config/envoy.template.yaml
@@ -594,13 +594,13 @@ static_resources:
clusters:
- - name: arch
+ - name: plano
connect_timeout: {{ upstream_connect_timeout | default('5s') }}
type: LOGICAL_DNS
dns_lookup_family: V4_ONLY
lb_policy: ROUND_ROBIN
load_assignment:
- cluster_name: arch
+ cluster_name: plano
endpoints:
- lb_endpoints:
- endpoint:
diff --git a/config/plano_config_schema.yaml b/config/plano_config_schema.yaml
index b65fdb17..e0a6eef1 100644
--- a/config/plano_config_schema.yaml
+++ b/config/plano_config_schema.yaml
@@ -181,7 +181,7 @@ properties:
provider_interface:
type: string
enum:
- - arch
+ - plano
- claude
- deepseek
- groq
@@ -228,7 +228,7 @@ properties:
provider_interface:
type: string
enum:
- - arch
+ - plano
- claude
- deepseek
- groq
@@ -279,6 +279,12 @@ properties:
upstream_tls_ca_path:
type: string
description: "Path to the trusted CA bundle for upstream TLS verification. Default is '/etc/ssl/certs/ca-certificates.crt'."
+ llm_routing_model:
+ type: string
+ description: "Model name for the LLM router (e.g., 'Arch-Router'). Must match a model in model_providers."
+ agent_orchestration_model:
+ type: string
+ description: "Model name for the agent orchestrator (e.g., 'Plano-Orchestrator'). Must match a model in model_providers."
system_prompt:
type: string
prompt_targets:
@@ -416,14 +422,6 @@ properties:
enum:
- llm
- prompt
- routing:
- type: object
- properties:
- llm_provider:
- type: string
- model:
- type: string
- additionalProperties: false
state_storage:
type: object
properties:
diff --git a/crates/brightstaff/src/handlers/agent_selector.rs b/crates/brightstaff/src/handlers/agent_selector.rs
index 33cf73ff..0c9b018e 100644
--- a/crates/brightstaff/src/handlers/agent_selector.rs
+++ b/crates/brightstaff/src/handlers/agent_selector.rs
@@ -178,6 +178,7 @@ mod tests {
Arc::new(OrchestratorService::new(
"http://localhost:8080".to_string(),
"test-model".to_string(),
+ "plano-orchestrator".to_string(),
))
}
diff --git a/crates/brightstaff/src/handlers/integration_tests.rs b/crates/brightstaff/src/handlers/integration_tests.rs
index 8013ed0a..c3153d3d 100644
--- a/crates/brightstaff/src/handlers/integration_tests.rs
+++ b/crates/brightstaff/src/handlers/integration_tests.rs
@@ -23,6 +23,7 @@ mod tests {
Arc::new(OrchestratorService::new(
"http://localhost:8080".to_string(),
"test-model".to_string(),
+ "plano-orchestrator".to_string(),
))
}
diff --git a/crates/brightstaff/src/main.rs b/crates/brightstaff/src/main.rs
index c8e34002..96a66c60 100644
--- a/crates/brightstaff/src/main.rs
+++ b/crates/brightstaff/src/main.rs
@@ -11,9 +11,7 @@ use brightstaff::state::StateStorage;
use brightstaff::utils::tracing::init_tracer;
use bytes::Bytes;
use common::configuration::{Agent, Configuration, ListenerType};
-use common::consts::{
- CHAT_COMPLETIONS_PATH, MESSAGES_PATH, OPENAI_RESPONSES_API_PATH, PLANO_ORCHESTRATOR_MODEL_NAME,
-};
+use common::consts::{CHAT_COMPLETIONS_PATH, MESSAGES_PATH, OPENAI_RESPONSES_API_PATH};
use common::llm_providers::LlmProviders;
use http_body_util::{combinators::BoxBody, BodyExt, Empty};
use hyper::body::Incoming;
@@ -36,6 +34,8 @@ pub mod router;
const BIND_ADDRESS: &str = "0.0.0.0:9091";
const DEFAULT_ROUTING_LLM_PROVIDER: &str = "arch-router";
const DEFAULT_ROUTING_MODEL_NAME: &str = "Arch-Router";
+const DEFAULT_ORCHESTRATOR_LLM_PROVIDER: &str = "plano-orchestrator";
+const DEFAULT_ORCHESTRATOR_MODEL_NAME: &str = "Plano-Orchestrator";
// Utility function to extract the context from the incoming request headers
fn extract_context_from_request(req: &Request) -> Context {
@@ -139,16 +139,21 @@ async fn main() -> Result<(), Box> {
env::var("LLM_PROVIDER_ENDPOINT").unwrap_or_else(|_| "http://localhost:12001".to_string());
let listener = TcpListener::bind(bind_address).await?;
- let routing_model_name: String = plano_config
- .routing
- .as_ref()
- .and_then(|r| r.model.clone())
- .unwrap_or_else(|| DEFAULT_ROUTING_MODEL_NAME.to_string());
+ let overrides = plano_config.overrides.clone().unwrap_or_default();
+
+ // Strip provider prefix (e.g. "arch/") to get the model ID used in upstream requests
+ let routing_model_name: String = overrides
+ .llm_routing_model
+ .as_deref()
+ .map(|m| m.split_once('/').map(|(_, id)| id).unwrap_or(m))
+ .unwrap_or(DEFAULT_ROUTING_MODEL_NAME)
+ .to_string();
let routing_llm_provider = plano_config
- .routing
- .as_ref()
- .and_then(|r| r.model_provider.clone())
+ .model_providers
+ .iter()
+ .find(|p| p.model.as_deref() == Some(routing_model_name.as_str()))
+ .map(|p| p.name.clone())
.unwrap_or_else(|| DEFAULT_ROUTING_LLM_PROVIDER.to_string());
let router_service: Arc = Arc::new(RouterService::new(
@@ -158,9 +163,25 @@ async fn main() -> Result<(), Box> {
routing_llm_provider,
));
+ // Strip provider prefix (e.g. "arch/") to get the model ID used in upstream requests
+ let orchestrator_model_name: String = overrides
+ .agent_orchestration_model
+ .as_deref()
+ .map(|m| m.split_once('/').map(|(_, id)| id).unwrap_or(m))
+ .unwrap_or(DEFAULT_ORCHESTRATOR_MODEL_NAME)
+ .to_string();
+
+ let orchestrator_llm_provider: String = plano_config
+ .model_providers
+ .iter()
+ .find(|p| p.model.as_deref() == Some(orchestrator_model_name.as_str()))
+ .map(|p| p.name.clone())
+ .unwrap_or_else(|| DEFAULT_ORCHESTRATOR_LLM_PROVIDER.to_string());
+
let orchestrator_service: Arc = Arc::new(OrchestratorService::new(
format!("{llm_provider_url}{CHAT_COMPLETIONS_PATH}"),
- PLANO_ORCHESTRATOR_MODEL_NAME.to_string(),
+ orchestrator_model_name,
+ orchestrator_llm_provider,
));
let model_aliases = Arc::new(plano_config.model_aliases.clone());
diff --git a/crates/brightstaff/src/router/plano_orchestrator.rs b/crates/brightstaff/src/router/plano_orchestrator.rs
index cf2688b9..12140570 100644
--- a/crates/brightstaff/src/router/plano_orchestrator.rs
+++ b/crates/brightstaff/src/router/plano_orchestrator.rs
@@ -2,7 +2,7 @@ use std::{collections::HashMap, sync::Arc};
use common::{
configuration::{AgentUsagePreference, OrchestrationPreference},
- consts::{ARCH_PROVIDER_HINT_HEADER, PLANO_ORCHESTRATOR_MODEL_NAME, REQUEST_ID_HEADER},
+ consts::{ARCH_PROVIDER_HINT_HEADER, REQUEST_ID_HEADER},
};
use hermesllm::apis::openai::{ChatCompletionsResponse, Message};
use hyper::header;
@@ -19,6 +19,7 @@ pub struct OrchestratorService {
orchestrator_url: String,
client: reqwest::Client,
orchestrator_model: Arc,
+ orchestrator_provider_name: String,
}
#[derive(Debug, Error)]
@@ -36,7 +37,11 @@ pub enum OrchestrationError {
pub type Result = std::result::Result;
impl OrchestratorService {
- pub fn new(orchestrator_url: String, orchestration_model_name: String) -> Self {
+ pub fn new(
+ orchestrator_url: String,
+ orchestration_model_name: String,
+ orchestrator_provider_name: String,
+ ) -> Self {
// Empty agent orchestrations - will be provided via usage_preferences in requests
let agent_orchestrations: HashMap> = HashMap::new();
@@ -50,6 +55,7 @@ impl OrchestratorService {
orchestrator_url,
client: reqwest::Client::new(),
orchestrator_model,
+ orchestrator_provider_name,
}
}
@@ -75,12 +81,12 @@ impl OrchestratorService {
debug!(
model = %self.orchestrator_model.get_model_name(),
endpoint = %self.orchestrator_url,
- "sending request to arch-orchestrator"
+ "sending request to plano-orchestrator"
);
debug!(
body = %serde_json::to_string(&orchestrator_request).unwrap(),
- "arch orchestrator request"
+ "plano orchestrator request"
);
let mut orchestration_request_headers = header::HeaderMap::new();
@@ -91,7 +97,7 @@ impl OrchestratorService {
orchestration_request_headers.insert(
header::HeaderName::from_static(ARCH_PROVIDER_HINT_HEADER),
- header::HeaderValue::from_str(PLANO_ORCHESTRATOR_MODEL_NAME).unwrap(),
+ header::HeaderValue::from_str(&self.orchestrator_provider_name).unwrap(),
);
// Inject OpenTelemetry trace context from current span
@@ -110,7 +116,7 @@ impl OrchestratorService {
orchestration_request_headers.insert(
header::HeaderName::from_static("model"),
- header::HeaderValue::from_static(PLANO_ORCHESTRATOR_MODEL_NAME),
+ header::HeaderValue::from_str(&self.orchestrator_provider_name).unwrap(),
);
let start_time = std::time::Instant::now();
diff --git a/crates/common/src/configuration.rs b/crates/common/src/configuration.rs
index 3050eac0..30187dd8 100644
--- a/crates/common/src/configuration.rs
+++ b/crates/common/src/configuration.rs
@@ -7,12 +7,6 @@ use crate::api::open_ai::{
ChatCompletionTool, FunctionDefinition, FunctionParameter, FunctionParameters, ParameterType,
};
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct Routing {
- pub model_provider: Option,
- pub model: Option,
-}
-
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ModelAlias {
pub target: String,
@@ -84,7 +78,6 @@ pub struct Configuration {
pub ratelimits: Option>,
pub tracing: Option,
pub mode: Option,
- pub routing: Option,
pub agents: Option>,
pub filters: Option>,
pub listeners: Vec,
@@ -96,6 +89,8 @@ pub struct Overrides {
pub prompt_target_intent_matching_threshold: Option,
pub optimize_context_window: Option,
pub use_agent_orchestrator: Option,
+ pub llm_routing_model: Option,
+ pub agent_orchestration_model: Option,
}
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
@@ -219,8 +214,6 @@ pub struct EmbeddingProviver {
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
pub enum LlmProviderType {
- #[serde(rename = "arch")]
- Arch,
#[serde(rename = "anthropic")]
Anthropic,
#[serde(rename = "deepseek")]
@@ -249,12 +242,13 @@ pub enum LlmProviderType {
Qwen,
#[serde(rename = "amazon_bedrock")]
AmazonBedrock,
+ #[serde(rename = "plano")]
+ Plano,
}
impl Display for LlmProviderType {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
- LlmProviderType::Arch => write!(f, "arch"),
LlmProviderType::Anthropic => write!(f, "anthropic"),
LlmProviderType::Deepseek => write!(f, "deepseek"),
LlmProviderType::Groq => write!(f, "groq"),
@@ -269,6 +263,7 @@ impl Display for LlmProviderType {
LlmProviderType::Zhipu => write!(f, "zhipu"),
LlmProviderType::Qwen => write!(f, "qwen"),
LlmProviderType::AmazonBedrock => write!(f, "amazon_bedrock"),
+ LlmProviderType::Plano => write!(f, "plano"),
}
}
}
@@ -603,14 +598,14 @@ mod test {
},
LlmProvider {
name: "arch-router".to_string(),
- provider_interface: LlmProviderType::Arch,
+ provider_interface: LlmProviderType::Plano,
model: Some("Arch-Router".to_string()),
internal: Some(true),
..Default::default()
},
LlmProvider {
name: "plano-orchestrator".to_string(),
- provider_interface: LlmProviderType::Arch,
+ provider_interface: LlmProviderType::Plano,
model: Some("Plano-Orchestrator".to_string()),
internal: Some(true),
..Default::default()
diff --git a/crates/common/src/consts.rs b/crates/common/src/consts.rs
index cafc8e80..dbd0bc41 100644
--- a/crates/common/src/consts.rs
+++ b/crates/common/src/consts.rs
@@ -33,5 +33,4 @@ pub const OTEL_COLLECTOR_HTTP: &str = "opentelemetry_collector_http";
pub const LLM_ROUTE_HEADER: &str = "x-arch-llm-route";
pub const ENVOY_RETRY_HEADER: &str = "x-envoy-max-retries";
pub const BRIGHT_STAFF_SERVICE_NAME: &str = "brightstaff";
-pub const PLANO_ORCHESTRATOR_MODEL_NAME: &str = "Plano-Orchestrator";
-pub const ARCH_FC_CLUSTER: &str = "arch";
+pub const PLANO_FC_CLUSTER: &str = "plano";
diff --git a/crates/hermesllm/src/bin/provider_models.yaml b/crates/hermesllm/src/bin/provider_models.yaml
index a748e7a7..a1425d43 100644
--- a/crates/hermesllm/src/bin/provider_models.yaml
+++ b/crates/hermesllm/src/bin/provider_models.yaml
@@ -1,183 +1,16 @@
version: '1.0'
source: canonical-apis
providers:
- mistralai:
- - mistralai/mistral-medium-2505
- - mistralai/mistral-medium-2508
- - mistralai/mistral-medium-latest
- - mistralai/mistral-medium
- - mistralai/mistral-vibe-cli-with-tools
- - mistralai/open-mistral-nemo
- - mistralai/open-mistral-nemo-2407
- - mistralai/mistral-tiny-2407
- - mistralai/mistral-tiny-latest
- - mistralai/mistral-large-2411
- - mistralai/pixtral-large-2411
- - mistralai/pixtral-large-latest
- - mistralai/mistral-large-pixtral-2411
- - mistralai/codestral-2508
- - mistralai/codestral-latest
- - mistralai/devstral-small-2507
- - mistralai/devstral-medium-2507
- - mistralai/devstral-2512
- - mistralai/mistral-vibe-cli-latest
- - mistralai/devstral-medium-latest
- - mistralai/devstral-latest
- - mistralai/labs-devstral-small-2512
- - mistralai/devstral-small-latest
- - mistralai/mistral-small-2506
- - mistralai/mistral-small-latest
- - mistralai/labs-mistral-small-creative
- - mistralai/magistral-medium-2509
- - mistralai/magistral-medium-latest
- - mistralai/magistral-small-2509
- - mistralai/magistral-small-latest
- - mistralai/mistral-large-2512
- - mistralai/mistral-large-latest
- - mistralai/ministral-3b-2512
- - mistralai/ministral-3b-latest
- - mistralai/ministral-8b-2512
- - mistralai/ministral-8b-latest
- - mistralai/ministral-14b-2512
- - mistralai/ministral-14b-latest
- - mistralai/mistral-small-2501
- - mistralai/mistral-embed-2312
- - mistralai/mistral-embed
- - mistralai/codestral-embed
- - mistralai/codestral-embed-2505
- openai:
- - openai/gpt-4-0613
- - openai/gpt-4
- - openai/gpt-3.5-turbo
- - openai/gpt-5.2-codex
- - openai/gpt-3.5-turbo-instruct
- - openai/gpt-3.5-turbo-instruct-0914
- - openai/gpt-4-1106-preview
- - openai/gpt-3.5-turbo-1106
- - openai/gpt-4-0125-preview
- - openai/gpt-4-turbo-preview
- - openai/gpt-3.5-turbo-0125
- - openai/gpt-4-turbo
- - openai/gpt-4-turbo-2024-04-09
- - openai/gpt-4o
- - openai/gpt-4o-2024-05-13
- - openai/gpt-4o-mini-2024-07-18
- - openai/gpt-4o-mini
- - openai/gpt-4o-2024-08-06
- - openai/chatgpt-4o-latest
- - openai/o1-2024-12-17
- - openai/o1
- - openai/computer-use-preview
- - openai/o3-mini
- - openai/o3-mini-2025-01-31
- - openai/gpt-4o-2024-11-20
- - openai/computer-use-preview-2025-03-11
- - openai/gpt-4o-search-preview-2025-03-11
- - openai/gpt-4o-search-preview
- - openai/gpt-4o-mini-search-preview-2025-03-11
- - openai/gpt-4o-mini-search-preview
- - openai/o1-pro-2025-03-19
- - openai/o1-pro
- - openai/o3-2025-04-16
- - openai/o4-mini-2025-04-16
- - openai/o3
- - openai/o4-mini
- - openai/gpt-4.1-2025-04-14
- - openai/gpt-4.1
- - openai/gpt-4.1-mini-2025-04-14
- - openai/gpt-4.1-mini
- - openai/gpt-4.1-nano-2025-04-14
- - openai/gpt-4.1-nano
- - openai/o3-pro
- - openai/o3-pro-2025-06-10
- - openai/o4-mini-deep-research
- - openai/o3-deep-research
- - openai/o3-deep-research-2025-06-26
- - openai/o4-mini-deep-research-2025-06-26
- - openai/gpt-5-chat-latest
- - openai/gpt-5-2025-08-07
- - openai/gpt-5
- - openai/gpt-5-mini-2025-08-07
- - openai/gpt-5-mini
- - openai/gpt-5-nano-2025-08-07
- - openai/gpt-5-nano
- - openai/gpt-5-codex
- - openai/gpt-5-pro-2025-10-06
- - openai/gpt-5-pro
- - openai/gpt-5-search-api
- - openai/gpt-5-search-api-2025-10-14
- - openai/gpt-5.1-chat-latest
- - openai/gpt-5.1-2025-11-13
- - openai/gpt-5.1
- - openai/gpt-5.1-codex
- - openai/gpt-5.1-codex-mini
- - openai/gpt-5.1-codex-max
- - openai/gpt-5.2-2025-12-11
- - openai/gpt-5.2
- - openai/gpt-5.2-pro-2025-12-11
- - openai/gpt-5.2-pro
- - openai/gpt-5.2-chat-latest
- - openai/gpt-3.5-turbo-16k
- - openai/ft:gpt-3.5-turbo-0613:katanemo::8CMZbm0P
deepseek:
- deepseek/deepseek-chat
- deepseek/deepseek-reasoner
- x-ai:
- - x-ai/grok-2-vision-1212
- - x-ai/grok-3
- - x-ai/grok-3-mini
- - x-ai/grok-4-0709
- - x-ai/grok-4-1-fast-non-reasoning
- - x-ai/grok-4-1-fast-reasoning
- - x-ai/grok-4-fast-non-reasoning
- - x-ai/grok-4-fast-reasoning
- - x-ai/grok-code-fast-1
- - x-ai/grok-imagine-image
- - x-ai/grok-imagine-video
- moonshotai:
- - moonshotai/kimi-k2-thinking
- - moonshotai/kimi-k2.5
- - moonshotai/moonshot-v1-128k-vision-preview
- - moonshotai/moonshot-v1-8k
- - moonshotai/kimi-k2-turbo-preview
- - moonshotai/moonshot-v1-128k
- - moonshotai/moonshot-v1-32k-vision-preview
- - moonshotai/kimi-k2-thinking-turbo
- - moonshotai/kimi-latest
- - moonshotai/moonshot-v1-32k
- - moonshotai/moonshot-v1-auto
- - moonshotai/kimi-k2-0711-preview
- - moonshotai/kimi-k2-0905-preview
- - moonshotai/moonshot-v1-8k-vision-preview
- anthropic:
- - anthropic/claude-opus-4-6
- - anthropic/claude-opus-4-5-20251101
- - anthropic/claude-opus-4-5
- - anthropic/claude-haiku-4-5-20251001
- - anthropic/claude-haiku-4-5
- - anthropic/claude-sonnet-4-5-20250929
- - anthropic/claude-sonnet-4-5
- - anthropic/claude-opus-4-1-20250805
- - anthropic/claude-opus-4-1
- - anthropic/claude-opus-4-20250514
- - anthropic/claude-opus-4
- - anthropic/claude-sonnet-4-20250514
- - anthropic/claude-sonnet-4
- - anthropic/claude-3-7-sonnet-20250219
- - anthropic/claude-3-7-sonnet
- - anthropic/claude-3-5-haiku-20241022
- - anthropic/claude-3-5-haiku
- - anthropic/claude-3-haiku-20240307
- - anthropic/claude-3-haiku
google:
- google/gemini-2.5-flash
- google/gemini-2.5-pro
- google/gemini-2.0-flash
- google/gemini-2.0-flash-001
- - google/gemini-2.0-flash-exp-image-generation
- google/gemini-2.0-flash-lite-001
- google/gemini-2.0-flash-lite
- - google/gemini-exp-1206
- google/gemini-2.5-flash-preview-tts
- google/gemini-2.5-pro-preview-tts
- google/gemma-3-1b-it
@@ -191,12 +24,15 @@ providers:
- google/gemini-pro-latest
- google/gemini-2.5-flash-lite
- google/gemini-2.5-flash-image
- - google/gemini-2.5-flash-preview-09-2025
- google/gemini-2.5-flash-lite-preview-09-2025
- google/gemini-3-pro-preview
- google/gemini-3-flash-preview
+ - google/gemini-3.1-pro-preview
+ - google/gemini-3.1-pro-preview-customtools
+ - google/gemini-3.1-flash-lite-preview
- google/gemini-3-pro-image-preview
- google/nano-banana-pro-preview
+ - google/gemini-3.1-flash-image-preview
- google/gemini-robotics-er-1.5-preview
- google/gemini-2.5-computer-use-preview-10-2025
- google/deep-research-pro-preview-12-2025
@@ -212,7 +48,37 @@ providers:
- amazon/amazon.nova-premier-v1:0
- amazon/amazon.nova-lite-v1:0
- amazon/amazon.nova-micro-v1:0
+ x-ai:
+ - x-ai/grok-3
+ - x-ai/grok-3-mini
+ - x-ai/grok-4-0709
+ - x-ai/grok-4-1-fast-non-reasoning
+ - x-ai/grok-4-1-fast-reasoning
+ - x-ai/grok-4-fast-non-reasoning
+ - x-ai/grok-4-fast-reasoning
+ - x-ai/grok-4.20-beta-0309-non-reasoning
+ - x-ai/grok-4.20-beta-0309-reasoning
+ - x-ai/grok-4.20-multi-agent-beta-0309
+ - x-ai/grok-code-fast-1
+ - x-ai/grok-imagine-image
+ - x-ai/grok-imagine-video
+ z-ai:
+ - z-ai/glm-4.5
+ - z-ai/glm-4.5-air
+ - z-ai/glm-4.6
+ - z-ai/glm-4.7
+ - z-ai/glm-5
qwen:
+ - qwen/qwen3-asr-flash-2026-02-10
+ - qwen/qwen3.5-flash-2026-02-23
+ - qwen/qwen3.5-flash
+ - qwen/qwen3.5-122b-a10b
+ - qwen/qwen3.5-35b-a3b
+ - qwen/qwen3.5-27b
+ - qwen/qwen3-coder-next
+ - qwen/qwen3.5-397b-a17b
+ - qwen/qwen3.5-plus-2026-02-15
+ - qwen/qwen3.5-plus
- qwen/qwen3-vl-flash-2026-01-22
- qwen/qwen3-max-2026-01-23
- qwen/qwen-plus-character
@@ -294,13 +160,161 @@ providers:
- qwen/qwen-max
- qwen/qwen-plus
- qwen/qwen-turbo
- z-ai:
- - z-ai/glm-4.5
- - z-ai/glm-4.5-air
- - z-ai/glm-4.6
- - z-ai/glm-4.7
- - z-ai/glm-5
+ mistralai:
+ - mistralai/mistral-medium-2505
+ - mistralai/mistral-medium-2508
+ - mistralai/mistral-medium-latest
+ - mistralai/mistral-medium
+ - mistralai/mistral-vibe-cli-with-tools
+ - mistralai/open-mistral-nemo
+ - mistralai/open-mistral-nemo-2407
+ - mistralai/mistral-tiny-2407
+ - mistralai/mistral-tiny-latest
+ - mistralai/codestral-2508
+ - mistralai/codestral-latest
+ - mistralai/devstral-2512
+ - mistralai/mistral-vibe-cli-latest
+ - mistralai/devstral-medium-latest
+ - mistralai/devstral-latest
+ - mistralai/mistral-small-2506
+ - mistralai/mistral-small-latest
+ - mistralai/labs-mistral-small-creative
+ - mistralai/magistral-medium-2509
+ - mistralai/magistral-medium-latest
+ - mistralai/magistral-small-2509
+ - mistralai/magistral-small-latest
+ - mistralai/mistral-large-2512
+ - mistralai/mistral-large-latest
+ - mistralai/ministral-3b-2512
+ - mistralai/ministral-3b-latest
+ - mistralai/ministral-8b-2512
+ - mistralai/ministral-8b-latest
+ - mistralai/ministral-14b-2512
+ - mistralai/ministral-14b-latest
+ - mistralai/mistral-large-2411
+ - mistralai/pixtral-large-2411
+ - mistralai/pixtral-large-latest
+ - mistralai/mistral-large-pixtral-2411
+ - mistralai/devstral-small-2507
+ - mistralai/devstral-medium-2507
+ - mistralai/labs-devstral-small-2512
+ - mistralai/devstral-small-latest
+ - mistralai/mistral-squarepoint-2602
+ - mistralai/mistral-embed-2312
+ - mistralai/mistral-embed
+ - mistralai/codestral-embed
+ - mistralai/codestral-embed-2505
+ moonshotai:
+ - moonshotai/kimi-k2.5
+ - moonshotai/kimi-k2-0905-preview
+ - moonshotai/moonshot-v1-32k
+ - moonshotai/moonshot-v1-128k
+ - moonshotai/kimi-k2-thinking-turbo
+ - moonshotai/moonshot-v1-8k-vision-preview
+ - moonshotai/kimi-k2-0711-preview
+ - moonshotai/moonshot-v1-auto
+ - moonshotai/kimi-k2-thinking
+ - moonshotai/moonshot-v1-128k-vision-preview
+ - moonshotai/kimi-k2-turbo-preview
+ - moonshotai/moonshot-v1-32k-vision-preview
+ - moonshotai/moonshot-v1-8k
+ anthropic:
+ - anthropic/claude-sonnet-4-6
+ - anthropic/claude-opus-4-6
+ - anthropic/claude-opus-4-5-20251101
+ - anthropic/claude-opus-4-5
+ - anthropic/claude-haiku-4-5-20251001
+ - anthropic/claude-haiku-4-5
+ - anthropic/claude-sonnet-4-5-20250929
+ - anthropic/claude-sonnet-4-5
+ - anthropic/claude-opus-4-1-20250805
+ - anthropic/claude-opus-4-1
+ - anthropic/claude-opus-4-20250514
+ - anthropic/claude-opus-4
+ - anthropic/claude-sonnet-4-20250514
+ - anthropic/claude-sonnet-4
+ - anthropic/claude-3-haiku-20240307
+ - anthropic/claude-3-haiku
+ openai:
+ - openai/gpt-4-0613
+ - openai/gpt-4
+ - openai/gpt-3.5-turbo
+ - openai/gpt-5.4
+ - openai/gpt-5.3-chat-latest
+ - openai/gpt-5.4-2026-03-05
+ - openai/gpt-5.4-pro
+ - openai/gpt-5.4-pro-2026-03-05
+ - openai/gpt-3.5-turbo-instruct
+ - openai/gpt-3.5-turbo-instruct-0914
+ - openai/gpt-4-1106-preview
+ - openai/gpt-3.5-turbo-1106
+ - openai/gpt-4-0125-preview
+ - openai/gpt-4-turbo-preview
+ - openai/gpt-3.5-turbo-0125
+ - openai/gpt-4-turbo
+ - openai/gpt-4-turbo-2024-04-09
+ - openai/gpt-4o
+ - openai/gpt-4o-2024-05-13
+ - openai/gpt-4o-mini-2024-07-18
+ - openai/gpt-4o-mini
+ - openai/gpt-4o-2024-08-06
+ - openai/o1-2024-12-17
+ - openai/o1
+ - openai/computer-use-preview
+ - openai/o3-mini
+ - openai/o3-mini-2025-01-31
+ - openai/gpt-4o-2024-11-20
+ - openai/computer-use-preview-2025-03-11
+ - openai/gpt-4o-mini-search-preview-2025-03-11
+ - openai/gpt-4o-mini-search-preview
+ - openai/o1-pro-2025-03-19
+ - openai/o1-pro
+ - openai/o3-2025-04-16
+ - openai/o4-mini-2025-04-16
+ - openai/o3
+ - openai/o4-mini
+ - openai/gpt-4.1-2025-04-14
+ - openai/gpt-4.1
+ - openai/gpt-4.1-mini-2025-04-14
+ - openai/gpt-4.1-mini
+ - openai/gpt-4.1-nano-2025-04-14
+ - openai/gpt-4.1-nano
+ - openai/o3-pro
+ - openai/o3-pro-2025-06-10
+ - openai/o4-mini-deep-research
+ - openai/o3-deep-research
+ - openai/o3-deep-research-2025-06-26
+ - openai/o4-mini-deep-research-2025-06-26
+ - openai/gpt-5-chat-latest
+ - openai/gpt-5-2025-08-07
+ - openai/gpt-5
+ - openai/gpt-5-mini-2025-08-07
+ - openai/gpt-5-mini
+ - openai/gpt-5-nano-2025-08-07
+ - openai/gpt-5-nano
+ - openai/gpt-5-codex
+ - openai/gpt-5-pro-2025-10-06
+ - openai/gpt-5-pro
+ - openai/gpt-5-search-api
+ - openai/gpt-5-search-api-2025-10-14
+ - openai/gpt-5.1-chat-latest
+ - openai/gpt-5.1-2025-11-13
+ - openai/gpt-5.1
+ - openai/gpt-5.1-codex
+ - openai/gpt-5.1-codex-mini
+ - openai/gpt-5.1-codex-max
+ - openai/gpt-5.2-2025-12-11
+ - openai/gpt-5.2
+ - openai/gpt-5.2-pro-2025-12-11
+ - openai/gpt-5.2-pro
+ - openai/gpt-5.2-chat-latest
+ - openai/gpt-5.2-codex
+ - openai/gpt-5.3-codex
+ - openai/gpt-4o-search-preview
+ - openai/gpt-4o-search-preview-2025-03-11
+ - openai/gpt-3.5-turbo-16k
+ - openai/ft:gpt-3.5-turbo-0613:katanemo::8CMZbm0P
metadata:
total_providers: 10
- total_models: 289
- last_updated: 2026-02-13T22:44:30.413065+00:00
+ total_models: 303
+ last_updated: 2026-03-15T16:47:22.207197+00:00
diff --git a/crates/hermesllm/src/lib.rs b/crates/hermesllm/src/lib.rs
index 997fc72a..3b9611e0 100644
--- a/crates/hermesllm/src/lib.rs
+++ b/crates/hermesllm/src/lib.rs
@@ -35,7 +35,7 @@ mod tests {
ProviderId::Mistral
);
assert_eq!(ProviderId::try_from("groq").unwrap(), ProviderId::Groq);
- assert_eq!(ProviderId::try_from("arch").unwrap(), ProviderId::Arch);
+ assert_eq!(ProviderId::try_from("plano").unwrap(), ProviderId::Plano);
// Test aliases
assert_eq!(ProviderId::try_from("google").unwrap(), ProviderId::Gemini);
diff --git a/crates/hermesllm/src/providers/id.rs b/crates/hermesllm/src/providers/id.rs
index 11008711..9f5f42c9 100644
--- a/crates/hermesllm/src/providers/id.rs
+++ b/crates/hermesllm/src/providers/id.rs
@@ -34,7 +34,7 @@ pub enum ProviderId {
Gemini,
Anthropic,
GitHub,
- Arch,
+ Plano,
AzureOpenAI,
XAI,
TogetherAI,
@@ -58,7 +58,7 @@ impl TryFrom<&str> for ProviderId {
"google" => Ok(ProviderId::Gemini), // alias
"anthropic" => Ok(ProviderId::Anthropic),
"github" => Ok(ProviderId::GitHub),
- "arch" => Ok(ProviderId::Arch),
+ "plano" => Ok(ProviderId::Plano),
"azure_openai" => Ok(ProviderId::AzureOpenAI),
"xai" => Ok(ProviderId::XAI),
"together_ai" => Ok(ProviderId::TogetherAI),
@@ -135,7 +135,7 @@ impl ProviderId {
| ProviderId::Groq
| ProviderId::Mistral
| ProviderId::Deepseek
- | ProviderId::Arch
+ | ProviderId::Plano
| ProviderId::Gemini
| ProviderId::GitHub
| ProviderId::AzureOpenAI
@@ -153,7 +153,7 @@ impl ProviderId {
| ProviderId::Groq
| ProviderId::Mistral
| ProviderId::Deepseek
- | ProviderId::Arch
+ | ProviderId::Plano
| ProviderId::Gemini
| ProviderId::GitHub
| ProviderId::AzureOpenAI
@@ -219,7 +219,7 @@ impl Display for ProviderId {
ProviderId::Gemini => write!(f, "Gemini"),
ProviderId::Anthropic => write!(f, "Anthropic"),
ProviderId::GitHub => write!(f, "GitHub"),
- ProviderId::Arch => write!(f, "Arch"),
+ ProviderId::Plano => write!(f, "Plano"),
ProviderId::AzureOpenAI => write!(f, "azure_openai"),
ProviderId::XAI => write!(f, "xai"),
ProviderId::TogetherAI => write!(f, "together_ai"),
diff --git a/crates/llm_gateway/src/stream_context.rs b/crates/llm_gateway/src/stream_context.rs
index 7a353bcb..f62631fa 100644
--- a/crates/llm_gateway/src/stream_context.rs
+++ b/crates/llm_gateway/src/stream_context.rs
@@ -873,7 +873,7 @@ impl HttpContext for StreamContext {
// ensure that the provider has an endpoint if the access key is missing else return a bad request
if self.llm_provider.as_ref().unwrap().endpoint.is_none()
&& self.llm_provider.as_ref().unwrap().provider_interface
- != LlmProviderType::Arch
+ != LlmProviderType::Plano
{
self.send_server_error(error, Some(StatusCode::BAD_REQUEST));
}
diff --git a/demos/agent_orchestration/travel_agents/README.md b/demos/agent_orchestration/travel_agents/README.md
index 7886539d..239ba938 100644
--- a/demos/agent_orchestration/travel_agents/README.md
+++ b/demos/agent_orchestration/travel_agents/README.md
@@ -123,6 +123,42 @@ Each agent:
Both agents run as native local processes and communicate with Plano running natively on the host.
+## Running with local Plano-Orchestrator (via vLLM)
+
+By default, Plano uses a hosted Plano-Orchestrator endpoint. To self-host the orchestrator model locally using vLLM on a server with an NVIDIA GPU:
+
+1. Install vLLM and download the model:
+```bash
+pip install vllm
+```
+
+2. Start the vLLM server with the 4B model:
+```bash
+vllm serve katanemo/Plano-Orchestrator-4B \
+ --host 0.0.0.0 \
+ --port 8000 \
+ --tensor-parallel-size 1 \
+ --gpu-memory-utilization 0.3 \
+ --tokenizer katanemo/Plano-Orchestrator-4B \
+ --chat-template chat_template.jinja \
+ --served-model-name katanemo/Plano-Orchestrator-4B \
+ --enable-prefix-caching
+```
+
+3. Start the demo with the local orchestrator config:
+```bash
+./run_demo.sh --local-orchestrator
+```
+
+4. Test with curl:
+```bash
+curl -X POST http://localhost:8001/v1/chat/completions \
+ -H "Content-Type: application/json" \
+ -d '{"model": "gpt-5.2", "messages": [{"role": "user", "content": "What is the weather in Istanbul?"}]}'
+```
+
+You should see Plano use your local orchestrator to route the request to the weather agent.
+
## Observability
This demo includes full OpenTelemetry (OTel) compatible distributed tracing to monitor and debug agent interactions:
diff --git a/demos/agent_orchestration/travel_agents/config_local_orchestrator.yaml b/demos/agent_orchestration/travel_agents/config_local_orchestrator.yaml
new file mode 100644
index 00000000..1d3a0be8
--- /dev/null
+++ b/demos/agent_orchestration/travel_agents/config_local_orchestrator.yaml
@@ -0,0 +1,66 @@
+version: v0.3.0
+
+overrides:
+ agent_orchestration_model: plano/katanemo/Plano-Orchestrator-4B
+
+agents:
+ - id: weather_agent
+ url: http://localhost:10510
+ - id: flight_agent
+ url: http://localhost:10520
+
+model_providers:
+ - model: plano/katanemo/Plano-Orchestrator-4B
+ base_url: http://localhost:8000
+
+ - model: openai/gpt-5.2
+ access_key: $OPENAI_API_KEY
+ default: true
+ - model: openai/gpt-4o-mini
+ access_key: $OPENAI_API_KEY # smaller, faster, cheaper model for extracting entities like location
+
+listeners:
+ - type: agent
+ name: travel_booking_service
+ port: 8001
+ router: plano_orchestrator_v1
+ agents:
+ - id: weather_agent
+ description: |
+
+ WeatherAgent is a specialized AI assistant for real-time weather information and forecasts. It provides accurate weather data for any city worldwide using the Open-Meteo API, helping travelers plan their trips with up-to-date weather conditions.
+
+ Capabilities:
+ * Get real-time weather conditions and multi-day forecasts for any city worldwide using Open-Meteo API (free, no API key needed)
+ * Provides current temperature
+ * Provides multi-day forecasts
+ * Provides weather conditions
+ * Provides sunrise/sunset times
+ * Provides detailed weather information
+ * Understands conversation context to resolve location references from previous messages
+ * Handles weather-related questions including "What's the weather in [city]?", "What's the forecast for [city]?", "How's the weather in [city]?"
+ * When queries include both weather and other travel questions (e.g., flights, currency), this agent answers ONLY the weather part
+
+ - id: flight_agent
+ description: |
+
+ FlightAgent is an AI-powered tool specialized in providing live flight information between airports. It leverages the FlightAware AeroAPI to deliver real-time flight status, gate information, and delay updates.
+
+ Capabilities:
+ * Get live flight information between airports using FlightAware AeroAPI
+ * Shows real-time flight status
+ * Shows scheduled/estimated/actual departure and arrival times
+ * Shows gate and terminal information
+ * Shows delays
+ * Shows aircraft type
+ * Shows flight status
+ * Automatically resolves city names to airport codes (IATA/ICAO)
+ * Understands conversation context to infer origin/destination from follow-up questions
+ * Handles flight-related questions including "What flights go from [city] to [city]?", "Do flights go to [city]?", "Are there direct flights from [city]?"
+ * When queries include both flight and other travel questions (e.g., weather, currency), this agent answers ONLY the flight part
+
+tracing:
+ random_sampling: 100
+ span_attributes:
+ header_prefixes:
+ - x-acme-
diff --git a/demos/agent_orchestration/travel_agents/run_demo.sh b/demos/agent_orchestration/travel_agents/run_demo.sh
index 643a0aa2..35166b85 100755
--- a/demos/agent_orchestration/travel_agents/run_demo.sh
+++ b/demos/agent_orchestration/travel_agents/run_demo.sh
@@ -31,8 +31,13 @@ start_demo() {
fi
# Step 4: Start Plano
- echo "Starting Plano with config.yaml..."
- planoai up config.yaml
+ PLANO_CONFIG="config.yaml"
+ if [ "$1" == "--local-orchestrator" ]; then
+ PLANO_CONFIG="config_local_orchestrator.yaml"
+ echo "Using local orchestrator config..."
+ fi
+ echo "Starting Plano with $PLANO_CONFIG..."
+ planoai up "$PLANO_CONFIG"
# Step 5: Start agents natively
echo "Starting agents..."
diff --git a/demos/llm_routing/model_routing_service/README.md b/demos/llm_routing/model_routing_service/README.md
index 85d56abf..72b672f3 100644
--- a/demos/llm_routing/model_routing_service/README.md
+++ b/demos/llm_routing/model_routing_service/README.md
@@ -1,6 +1,54 @@
# Model Routing Service Demo
-This demo shows how to use the `/routing/v1/*` endpoints to get routing decisions without proxying requests to an LLM. The endpoint accepts standard LLM request formats and returns which model Plano's router would select.
+Plano is an AI-native proxy and data plane for agentic apps — with built-in orchestration, safety, observability, and intelligent LLM routing.
+
+```
+┌───────────┐ ┌─────────────────────────────────┐ ┌──────────────┐
+│ Client │ ───► │ Plano │ ───► │ OpenAI │
+│ (any │ │ │ │ Anthropic │
+│ language)│ │ Arch-Router (1.5B model) │ │ Any Provider│
+└───────────┘ │ analyzes intent → picks model │ └──────────────┘
+ └─────────────────────────────────┘
+```
+
+- **One endpoint, many models** — apps call Plano using standard OpenAI/Anthropic APIs; Plano handles provider selection, keys, and failover
+- **Intelligent routing** — a lightweight 1.5B router model classifies user intent and picks the best model per request
+- **Platform governance** — centralize API keys, rate limits, guardrails, and observability without touching app code
+- **Runs anywhere** — single binary; self-host the router for full data privacy
+
+## How Routing Works
+
+The entire routing configuration is plain YAML — no code:
+
+```yaml
+model_providers:
+ - model: openai/gpt-4o-mini
+ default: true # fallback for unmatched requests
+
+ - model: openai/gpt-4o
+ routing_preferences:
+ - name: complex_reasoning
+ description: complex reasoning tasks, multi-step analysis
+
+ - model: anthropic/claude-sonnet-4-20250514
+ routing_preferences:
+ - name: code_generation
+ description: generating new code, writing functions
+```
+
+When a request arrives, Plano sends the conversation and routing preferences to Arch-Router, which classifies the intent and returns the matching route:
+
+```
+1. Request arrives → "Write binary search in Python"
+2. Preferences serialized → [{"name":"code_generation", ...}, {"name":"complex_reasoning", ...}]
+3. Arch-Router classifies → {"route": "code_generation"}
+4. Route → Model lookup → code_generation → anthropic/claude-sonnet-4-20250514
+5. Request forwarded → Claude generates the response
+```
+
+No match? Arch-Router returns `other` → Plano falls back to the default model.
+
+The `/routing/v1/*` endpoints return the routing decision **without** forwarding to the LLM — useful for testing and validating routing behavior before going to production.
## Setup
@@ -55,6 +103,69 @@ Response:
The response tells you which model would handle this request and which route was matched, without actually making the LLM call.
+## Kubernetes Deployment (Self-hosted Arch-Router on GPU)
+
+To run Arch-Router in-cluster using vLLM instead of the default hosted endpoint:
+
+**0. Check your GPU node labels and taints**
+
+```bash
+kubectl get nodes --show-labels | grep -i gpu
+kubectl get node -o jsonpath='{.spec.taints}'
+```
+
+GPU nodes commonly have a `nvidia.com/gpu:NoSchedule` taint — `vllm-deployment.yaml` includes a matching toleration. If you have multiple GPU node pools and need to pin to a specific one, uncomment and set the `nodeSelector` in `vllm-deployment.yaml` using the label for your cloud provider.
+
+**1. Deploy Arch-Router and Plano:**
+
+```bash
+
+# arch-router deployment
+kubectl apply -f vllm-deployment.yaml
+
+# plano deployment
+kubectl create secret generic plano-secrets \
+ --from-literal=OPENAI_API_KEY=$OPENAI_API_KEY \
+ --from-literal=ANTHROPIC_API_KEY=$ANTHROPIC_API_KEY
+
+kubectl create configmap plano-config \
+ --from-file=plano_config.yaml=config_k8s.yaml \
+ --dry-run=client -o yaml | kubectl apply -f -
+
+kubectl apply -f plano-deployment.yaml
+```
+
+**3. Wait for both pods to be ready:**
+
+```bash
+# Arch-Router downloads the model (~1 min) then vLLM loads it (~2 min)
+kubectl get pods -l app=arch-router -w
+kubectl rollout status deployment/plano
+```
+
+**4. Test:**
+
+```bash
+kubectl port-forward svc/plano 12000:12000
+./demo.sh
+```
+
+To confirm requests are hitting your in-cluster Arch-Router (not just health checks):
+
+```bash
+kubectl logs -l app=arch-router -f --tail=0
+# Look for POST /v1/chat/completions entries
+```
+
+**Updating the config:**
+
+```bash
+kubectl create configmap plano-config \
+ --from-file=plano_config.yaml=config_k8s.yaml \
+ --dry-run=client -o yaml | kubectl apply -f -
+kubectl rollout restart deployment/plano
+```
+
## Demo Output
```
diff --git a/demos/llm_routing/model_routing_service/config_k8s.yaml b/demos/llm_routing/model_routing_service/config_k8s.yaml
new file mode 100644
index 00000000..bdf98bfa
--- /dev/null
+++ b/demos/llm_routing/model_routing_service/config_k8s.yaml
@@ -0,0 +1,33 @@
+version: v0.3.0
+
+overrides:
+ llm_routing_model: plano/Arch-Router
+
+listeners:
+ - type: model
+ name: model_listener
+ port: 12000
+
+model_providers:
+
+ - model: plano/Arch-Router
+ base_url: http://arch-router:10000
+
+ - model: openai/gpt-4o-mini
+ access_key: $OPENAI_API_KEY
+ default: true
+
+ - model: openai/gpt-4o
+ access_key: $OPENAI_API_KEY
+ routing_preferences:
+ - name: complex_reasoning
+ description: complex reasoning tasks, multi-step analysis, or detailed explanations
+
+ - model: anthropic/claude-sonnet-4-20250514
+ access_key: $ANTHROPIC_API_KEY
+ routing_preferences:
+ - name: code_generation
+ description: generating new code, writing functions, or creating boilerplate
+
+tracing:
+ random_sampling: 100
diff --git a/demos/llm_routing/model_routing_service/plano-deployment.yaml b/demos/llm_routing/model_routing_service/plano-deployment.yaml
new file mode 100644
index 00000000..e093f404
--- /dev/null
+++ b/demos/llm_routing/model_routing_service/plano-deployment.yaml
@@ -0,0 +1,68 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+ name: plano
+ labels:
+ app: plano
+spec:
+ replicas: 1
+ selector:
+ matchLabels:
+ app: plano
+ template:
+ metadata:
+ labels:
+ app: plano
+ spec:
+ containers:
+ - name: plano
+ image: katanemo/plano:0.4.12
+ ports:
+ - containerPort: 12000 # LLM gateway (chat completions, model routing)
+ name: llm-gateway
+ envFrom:
+ - secretRef:
+ name: plano-secrets
+ env:
+ - name: LOG_LEVEL
+ value: "info"
+ volumeMounts:
+ - name: plano-config
+ mountPath: /app/plano_config.yaml
+ subPath: plano_config.yaml
+ readOnly: true
+ readinessProbe:
+ httpGet:
+ path: /healthz
+ port: 12000
+ initialDelaySeconds: 5
+ periodSeconds: 10
+ livenessProbe:
+ httpGet:
+ path: /healthz
+ port: 12000
+ initialDelaySeconds: 10
+ periodSeconds: 30
+ resources:
+ requests:
+ memory: "256Mi"
+ cpu: "250m"
+ limits:
+ memory: "512Mi"
+ cpu: "1000m"
+ volumes:
+ - name: plano-config
+ configMap:
+ name: plano-config
+---
+apiVersion: v1
+kind: Service
+metadata:
+ name: plano
+spec:
+ selector:
+ app: plano
+ ports:
+ - name: llm-gateway
+ port: 12000
+ targetPort: 12000
diff --git a/demos/llm_routing/model_routing_service/test.rest b/demos/llm_routing/model_routing_service/test.rest
new file mode 100644
index 00000000..b41d75f2
--- /dev/null
+++ b/demos/llm_routing/model_routing_service/test.rest
@@ -0,0 +1,36 @@
+### Code generation query (OpenAI format) — expects anthropic/claude-sonnet
+POST http://localhost:12000/routing/v1/chat/completions
+Content-Type: application/json
+
+{
+ "model": "gpt-4o-mini",
+ "messages": [{"role": "user", "content": "Write a Python function for binary search"}]
+}
+
+### Complex reasoning query (OpenAI format) — expects openai/gpt-4o
+POST http://localhost:12000/routing/v1/chat/completions
+Content-Type: application/json
+
+{
+ "model": "gpt-4o-mini",
+ "messages": [{"role": "user", "content": "Analyze the trade-offs between microservices and monolithic architecture"}]
+}
+
+### Simple query — no routing match, expects default model
+POST http://localhost:12000/routing/v1/chat/completions
+Content-Type: application/json
+
+{
+ "model": "gpt-4o-mini",
+ "messages": [{"role": "user", "content": "Hello"}]
+}
+
+### Code generation query (Anthropic format)
+POST http://localhost:12000/routing/v1/messages
+Content-Type: application/json
+
+{
+ "model": "claude-sonnet-4-20250514",
+ "max_tokens": 1024,
+ "messages": [{"role": "user", "content": "Write a REST API in Go using Gin"}]
+}
diff --git a/demos/llm_routing/model_routing_service/vllm-deployment.yaml b/demos/llm_routing/model_routing_service/vllm-deployment.yaml
new file mode 100644
index 00000000..1debe15e
--- /dev/null
+++ b/demos/llm_routing/model_routing_service/vllm-deployment.yaml
@@ -0,0 +1,104 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+ name: arch-router
+ labels:
+ app: arch-router
+spec:
+ replicas: 1
+ selector:
+ matchLabels:
+ app: arch-router
+ template:
+ metadata:
+ labels:
+ app: arch-router
+ spec:
+ tolerations:
+ - key: nvidia.com/gpu
+ operator: Exists
+ effect: NoSchedule
+ # Optional: add a nodeSelector to pin to a specific GPU node pool.
+ # The nvidia.com/gpu resource request below is sufficient for most clusters.
+ # nodeSelector:
+ # DigitalOcean: doks.digitalocean.com/gpu-model: l40s
+ # GKE: cloud.google.com/gke-accelerator: nvidia-l4
+ # EKS: eks.amazonaws.com/nodegroup: gpu-nodes
+ # AKS: kubernetes.azure.com/agentpool: gpupool
+ initContainers:
+ - name: download-model
+ image: python:3.11-slim
+ command:
+ - sh
+ - -c
+ - |
+ pip install huggingface_hub[cli] && \
+ python -c "from huggingface_hub import snapshot_download; snapshot_download('katanemo/Arch-Router-1.5B.gguf', local_dir='/models/Arch-Router-1.5B.gguf')"
+ volumeMounts:
+ - name: model-cache
+ mountPath: /models
+ containers:
+ - name: vllm
+ image: vllm/vllm-openai:latest
+ command:
+ - vllm
+ - serve
+ - /models/Arch-Router-1.5B.gguf/Arch-Router-1.5B-Q4_K_M.gguf
+ - "--host"
+ - "0.0.0.0"
+ - "--port"
+ - "10000"
+ - "--load-format"
+ - "gguf"
+ - "--tokenizer"
+ - "katanemo/Arch-Router-1.5B"
+ - "--served-model-name"
+ - "Arch-Router"
+ - "--gpu-memory-utilization"
+ - "0.3"
+ - "--tensor-parallel-size"
+ - "1"
+ - "--enable-prefix-caching"
+ ports:
+ - name: http
+ containerPort: 10000
+ protocol: TCP
+ resources:
+ requests:
+ cpu: "1"
+ memory: "4Gi"
+ nvidia.com/gpu: "1"
+ limits:
+ cpu: "4"
+ memory: "8Gi"
+ nvidia.com/gpu: "1"
+ volumeMounts:
+ - name: model-cache
+ mountPath: /models
+ readinessProbe:
+ httpGet:
+ path: /health
+ port: 10000
+ initialDelaySeconds: 60
+ periodSeconds: 10
+ livenessProbe:
+ httpGet:
+ path: /health
+ port: 10000
+ initialDelaySeconds: 180
+ periodSeconds: 30
+ volumes:
+ - name: model-cache
+ emptyDir: {}
+---
+apiVersion: v1
+kind: Service
+metadata:
+ name: arch-router
+spec:
+ selector:
+ app: arch-router
+ ports:
+ - name: http
+ port: 10000
+ targetPort: 10000
diff --git a/demos/llm_routing/openclaw_routing/config.yaml b/demos/llm_routing/openclaw_routing/config.yaml
index 3106b5dd..9690e747 100644
--- a/demos/llm_routing/openclaw_routing/config.yaml
+++ b/demos/llm_routing/openclaw_routing/config.yaml
@@ -1,8 +1,7 @@
version: v0.1.0
-routing:
- model: Arch-Router
- llm_provider: arch-router
+overrides:
+ llm_routing_model: Arch-Router
listeners:
egress_traffic:
diff --git a/demos/llm_routing/preference_based_routing/plano_config_local.yaml b/demos/llm_routing/preference_based_routing/plano_config_local.yaml
index dbd287dd..01adb097 100644
--- a/demos/llm_routing/preference_based_routing/plano_config_local.yaml
+++ b/demos/llm_routing/preference_based_routing/plano_config_local.yaml
@@ -1,8 +1,7 @@
version: v0.3.0
-routing:
- model: Arch-Router
- llm_provider: arch-router
+overrides:
+ llm_routing_model: plano/hf.co/katanemo/Arch-Router-1.5B.gguf:Q4_K_M
listeners:
- type: model
@@ -11,8 +10,7 @@ listeners:
model_providers:
- - name: arch-router
- model: arch/hf.co/katanemo/Arch-Router-1.5B.gguf:Q4_K_M
+ - model: plano/hf.co/katanemo/Arch-Router-1.5B.gguf:Q4_K_M
base_url: http://localhost:11434
- model: openai/gpt-4o-mini
diff --git a/docs/source/conf.py b/docs/source/conf.py
index ec476136..e554329f 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -17,7 +17,7 @@ from sphinxawesome_theme.postprocess import Icons
project = "Plano Docs"
copyright = "2025, Katanemo Labs, Inc"
author = "Katanemo Labs, Inc"
-release = " v0.4.11"
+release = " v0.4.12"
# -- General configuration ---------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
diff --git a/docs/source/get_started/quickstart.rst b/docs/source/get_started/quickstart.rst
index 279fde2d..9d51d1c4 100644
--- a/docs/source/get_started/quickstart.rst
+++ b/docs/source/get_started/quickstart.rst
@@ -43,7 +43,7 @@ Plano's CLI allows you to manage and interact with the Plano efficiently. To ins
.. code-block:: console
- $ uv tool install planoai==0.4.11
+ $ uv tool install planoai==0.4.12
**Option 2: Install with pip (Traditional)**
@@ -51,7 +51,7 @@ Plano's CLI allows you to manage and interact with the Plano efficiently. To ins
$ python -m venv venv
$ source venv/bin/activate # On Windows, use: venv\Scripts\activate
- $ pip install planoai==0.4.11
+ $ pip install planoai==0.4.12
.. _llm_routing_quickstart:
diff --git a/docs/source/guides/llm_router.rst b/docs/source/guides/llm_router.rst
index 41c51b4a..7c4ad685 100644
--- a/docs/source/guides/llm_router.rst
+++ b/docs/source/guides/llm_router.rst
@@ -253,13 +253,11 @@ Using Ollama (recommended for local development)
.. code-block:: yaml
- routing:
- model: Arch-Router
- llm_provider: arch-router
+ overrides:
+ llm_routing_model: plano/hf.co/katanemo/Arch-Router-1.5B.gguf:Q4_K_M
model_providers:
- - name: arch-router
- model: arch/hf.co/katanemo/Arch-Router-1.5B.gguf:Q4_K_M
+ - model: plano/hf.co/katanemo/Arch-Router-1.5B.gguf:Q4_K_M
base_url: http://localhost:11434
- model: openai/gpt-5.2
@@ -324,13 +322,11 @@ vLLM provides higher throughput and GPU optimizations suitable for production de
.. code-block:: yaml
- routing:
- model: Arch-Router
- llm_provider: arch-router
+ overrides:
+ llm_routing_model: plano/Arch-Router
model_providers:
- - name: arch-router
- model: Arch-Router
+ - model: plano/Arch-Router
base_url: http://:10000
- model: openai/gpt-5.2
@@ -351,6 +347,35 @@ vLLM provides higher throughput and GPU optimizations suitable for production de
curl http://localhost:10000/v1/models
+Using vLLM on Kubernetes (GPU nodes)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+For teams running Kubernetes, Arch-Router and Plano can be deployed as in-cluster services.
+The ``demos/llm_routing/model_routing_service/`` directory includes ready-to-use manifests:
+
+- ``vllm-deployment.yaml`` — Arch-Router served by vLLM, with an init container to download
+ the model from HuggingFace
+- ``plano-deployment.yaml`` — Plano proxy configured to use the in-cluster Arch-Router
+- ``config_k8s.yaml`` — Plano config with ``llm_routing_model`` pointing at
+ ``http://arch-router:10000`` instead of the default hosted endpoint
+
+Key things to know before deploying:
+
+- GPU nodes commonly have a ``nvidia.com/gpu:NoSchedule`` taint — the ``vllm-deployment.yaml``
+ includes a matching toleration. The ``nvidia.com/gpu: "1"`` resource request is sufficient
+ for scheduling in most clusters; a ``nodeSelector`` is optional and commented out in the
+ manifest for cases where you need to pin to a specific GPU node pool.
+- Model download takes ~1 minute; vLLM loads the model in ~1-2 minutes after that. The
+ ``livenessProbe`` has a 180-second ``initialDelaySeconds`` to avoid premature restarts.
+- The Plano config ConfigMap must use ``--from-file=plano_config.yaml=config_k8s.yaml`` with
+ ``subPath`` in the Deployment — omitting ``subPath`` causes Kubernetes to mount a directory
+ instead of a file.
+
+For the canonical Plano Kubernetes deployment (ConfigMap, Secrets, Deployment YAML), see
+:ref:`deployment`. For full step-by-step commands specific to this demo, see the
+`demo README `_.
+
+
Combining Routing Methods
-------------------------
diff --git a/docs/source/guides/orchestration.rst b/docs/source/guides/orchestration.rst
index 3170b65f..1a153e83 100644
--- a/docs/source/guides/orchestration.rst
+++ b/docs/source/guides/orchestration.rst
@@ -335,6 +335,90 @@ Combine RAG agents for documentation lookup with specialized troubleshooting age
- id: troubleshoot_agent
description: Diagnoses and resolves technical issues step by step
+Self-hosting Plano-Orchestrator
+-------------------------------
+
+By default, Plano uses a hosted Plano-Orchestrator endpoint. To self-host the orchestrator model, you can serve it using **vLLM** on a server with an NVIDIA GPU.
+
+.. note::
+ vLLM requires a Linux server with an NVIDIA GPU (CUDA). For local development on macOS, a GGUF version for Ollama is coming soon.
+
+The following model variants are available on HuggingFace:
+
+* `Plano-Orchestrator-4B `_ — lighter model, suitable for development and testing
+* `Plano-Orchestrator-4B-FP8 `_ — FP8 quantized 4B model, lower memory usage
+* `Plano-Orchestrator-30B-A3B `_ — full-size model for production
+* `Plano-Orchestrator-30B-A3B-FP8 `_ — FP8 quantized 30B model, recommended for production deployments
+
+Using vLLM
+~~~~~~~~~~
+
+1. **Install vLLM**
+
+ .. code-block:: bash
+
+ pip install vllm
+
+2. **Download the model and chat template**
+
+ .. code-block:: bash
+
+ pip install huggingface_hub
+ huggingface-cli download katanemo/Plano-Orchestrator-4B
+
+3. **Start the vLLM server**
+
+ For the 4B model (development):
+
+ .. code-block:: bash
+
+ vllm serve katanemo/Plano-Orchestrator-4B \
+ --host 0.0.0.0 \
+ --port 8000 \
+ --tensor-parallel-size 1 \
+ --gpu-memory-utilization 0.3 \
+ --tokenizer katanemo/Plano-Orchestrator-4B \
+ --chat-template chat_template.jinja \
+ --served-model-name katanemo/Plano-Orchestrator-4B \
+ --enable-prefix-caching
+
+ For the 30B-A3B-FP8 model (production):
+
+ .. code-block:: bash
+
+ vllm serve katanemo/Plano-Orchestrator-30B-A3B-FP8 \
+ --host 0.0.0.0 \
+ --port 8000 \
+ --tensor-parallel-size 1 \
+ --gpu-memory-utilization 0.9 \
+ --tokenizer katanemo/Plano-Orchestrator-30B-A3B-FP8 \
+ --chat-template chat_template.jinja \
+ --max-model-len 32768 \
+ --served-model-name katanemo/Plano-Orchestrator-30B-A3B-FP8 \
+ --enable-prefix-caching
+
+4. **Configure Plano to use the local orchestrator**
+
+ Use the model name matching your ``--served-model-name``:
+
+ .. code-block:: yaml
+
+ overrides:
+ agent_orchestration_model: plano/katanemo/Plano-Orchestrator-4B
+
+ model_providers:
+ - model: katanemo/Plano-Orchestrator-4B
+ provider_interface: plano
+ base_url: http://:8000
+
+5. **Verify the server is running**
+
+ .. code-block:: bash
+
+ curl http://localhost:8000/health
+ curl http://localhost:8000/v1/models
+
+
Next Steps
----------
diff --git a/docs/source/resources/deployment.rst b/docs/source/resources/deployment.rst
index 7b8b0554..2689384e 100644
--- a/docs/source/resources/deployment.rst
+++ b/docs/source/resources/deployment.rst
@@ -65,7 +65,7 @@ Create a ``docker-compose.yml`` file with the following configuration:
# docker-compose.yml
services:
plano:
- image: katanemo/plano:0.4.11
+ image: katanemo/plano:0.4.12
container_name: plano
ports:
- "10000:10000" # ingress (client -> plano)
@@ -153,7 +153,7 @@ Create a ``plano-deployment.yaml``:
spec:
containers:
- name: plano
- image: katanemo/plano:0.4.11
+ image: katanemo/plano:0.4.12
ports:
- containerPort: 12000 # LLM gateway (chat completions, model routing)
name: llm-gateway
diff --git a/docs/source/resources/includes/plano_config_full_reference_rendered.yaml b/docs/source/resources/includes/plano_config_full_reference_rendered.yaml
index 9717b53a..64ee1f91 100644
--- a/docs/source/resources/includes/plano_config_full_reference_rendered.yaml
+++ b/docs/source/resources/includes/plano_config_full_reference_rendered.yaml
@@ -107,11 +107,11 @@ model_providers:
- internal: true
model: Arch-Function
name: arch-function
- provider_interface: arch
+ provider_interface: plano
- internal: true
model: Plano-Orchestrator
- name: plano-orchestrator
- provider_interface: arch
+ name: plano/orchestrator
+ provider_interface: plano
prompt_targets:
- description: Get current weather at a location.
endpoint: