feat(skills): add Agent Skills support with orchestrator-driven activation

This commit is contained in:
Spherrrical 2026-05-13 15:44:16 -07:00
parent 5a4487fc6e
commit 7f5bf641bb
24 changed files with 2777 additions and 97 deletions

View file

@ -132,11 +132,15 @@ impl AgentSelector {
.determine_orchestration(messages, Some(usage_preferences), request_id)
.await
{
Ok(Some(routes)) => {
debug!(count = routes.len(), "determined agents via orchestration");
Ok(Some(selection)) => {
debug!(
count = selection.routes.len(),
skill_count = selection.skills.len(),
"determined agents via orchestration"
);
let mut selected_agents = Vec::new();
for (route_name, agent_name) in routes {
for (route_name, agent_name) in selection.routes {
debug!(route = %route_name, agent = %agent_name, "processing route");
let selected_agent = agents
.iter()

View file

@ -37,7 +37,7 @@ use crate::tracing::{
collect_custom_trace_attributes, llm as tracing_llm, operation_component,
plano as tracing_plano, set_service_name,
};
use model_selection::router_chat_get_upstream_model;
use model_selection::{inject_activated_skills_into_request, router_chat_get_upstream_model};
const PERPLEXITY_PROVIDER_PREFIX: &str = "perplexity/";
@ -282,26 +282,16 @@ async fn llm_chat_inner(
Err(response) => return Ok(response),
};
// Serialize request for upstream BEFORE router consumes it
let client_request_bytes_for_upstream: Bytes =
match ProviderRequestType::to_bytes(&client_request) {
Ok(bytes) => bytes.into(),
Err(err) => {
warn!(error = %err, "failed to serialize request for upstream");
let mut r = Response::new(full(format!("Failed to serialize request: {}", err)));
*r.status_mut() = StatusCode::INTERNAL_SERVER_ERROR;
return Ok(r);
}
};
// --- Phase 3: Route the request (or use pinned model from session cache) ---
let resolved_model = if let Some(cached_model) = pinned_model {
// Route the request (or use pinned model from session cache) BEFORE
// serializing for upstream — skill body injection happens here, so the
// upstream bytes must be produced after routing returns.
let (resolved_model, activated_skills) = if let Some(cached_model) = pinned_model {
info!(
session_id = %session_id.as_deref().unwrap_or(""),
model = %cached_model,
"using pinned routing decision from cache"
);
cached_model
(cached_model, Vec::new())
} else {
let routing_span = info_span!(
"routing",
@ -313,11 +303,16 @@ async fn llm_chat_inner(
route.selected_model = tracing::field::Empty,
routing.determination_ms = tracing::field::Empty,
);
// The router consumes the request (it converts it to OpenAI format
// internally to extract conversation messages). Clone so we can
// still mutate the original below when Plano-Orchestrator activates
// any Agent Skills.
let request_for_routing = client_request.clone();
let routing_result = match async {
set_service_name(operation_component::ROUTING);
router_chat_get_upstream_model(
Arc::clone(&state.orchestrator_service),
client_request,
request_for_routing,
&request_path,
&request_id,
inline_routing_preferences,
@ -335,8 +330,11 @@ async fn llm_chat_inner(
}
};
let (router_selected_model, route_name) =
(routing_result.model_name, routing_result.route_name);
let (router_selected_model, route_name, activated) = (
routing_result.model_name,
routing_result.route_name,
routing_result.activated_skills,
);
let model = if router_selected_model != "none" {
router_selected_model
} else {
@ -362,10 +360,34 @@ async fn llm_chat_inner(
.await;
}
model
(model, activated)
};
tracing::Span::current().record(tracing_llm::MODEL_NAME, resolved_model.as_str());
// If Plano-Orchestrator activated any Agent Skills for this route, inject
// their SKILL.md bodies into the system prompt before we hand the bytes
// off to the upstream provider.
if !activated_skills.is_empty() {
info!(
count = activated_skills.len(),
skills = ?activated_skills.iter().map(|s| s.name.as_str()).collect::<Vec<_>>(),
"injecting activated Agent Skills into upstream system prompt"
);
inject_activated_skills_into_request(&mut client_request, &activated_skills);
}
// Serialize request for upstream AFTER potential skill injection.
let client_request_bytes_for_upstream: Bytes =
match ProviderRequestType::to_bytes(&client_request) {
Ok(bytes) => bytes.into(),
Err(err) => {
warn!(error = %err, "failed to serialize request for upstream");
let mut r = Response::new(full(format!("Failed to serialize request: {}", err)));
*r.status_mut() = StatusCode::INTERNAL_SERVER_ERROR;
return Ok(r);
}
};
// --- Phase 4: Forward to upstream and stream back ---
send_upstream(
&state.http_client,

View file

@ -1,5 +1,9 @@
use common::configuration::TopLevelRoutingPreference;
use common::configuration::{SkillRef, TopLevelRoutingPreference};
use common::skills_runtime::augment_system_prompt_with_skills;
use hermesllm::apis::openai::{Message, MessageContent, Role};
use hermesllm::clients::endpoints::SupportedUpstreamAPIs;
use hermesllm::providers::request::ProviderRequest;
use hermesllm::transforms::lib::ExtractText;
use hermesllm::ProviderRequestType;
use hyper::StatusCode;
use std::sync::Arc;
@ -29,6 +33,10 @@ pub struct RoutingResult {
/// Full ranked list — use subsequent entries as fallbacks on 429/5xx.
pub models: Vec<String>,
pub route_name: Option<String>,
/// Agent Skills activated by Plano-Orchestrator for this request.
/// Their `body` field (the SKILL.md content) is prepended to the
/// upstream system prompt by the caller in `send_upstream`.
pub activated_skills: Vec<SkillRef>,
}
pub struct RoutingError {
@ -128,8 +136,8 @@ pub async fn router_chat_get_upstream_model(
match routing_result {
Ok(route) => match route {
Some((route_name, ranked_models)) => {
let model_name = ranked_models.first().cloned().unwrap_or_default();
Some(decision) => {
let model_name = decision.models.first().cloned().unwrap_or_default();
current_span.record("route.selected_model", model_name.as_str());
bs_metrics::record_router_decision(
route_label,
@ -139,8 +147,9 @@ pub async fn router_chat_get_upstream_model(
);
Ok(RoutingResult {
model_name,
models: ranked_models,
route_name: Some(route_name),
models: decision.models,
route_name: Some(decision.route_name),
activated_skills: decision.activated_skills,
})
}
None => {
@ -159,6 +168,7 @@ pub async fn router_chat_get_upstream_model(
model_name: "none".to_string(),
models: vec!["none".to_string()],
route_name: None,
activated_skills: Vec::new(),
})
}
},
@ -172,3 +182,61 @@ pub async fn router_chat_get_upstream_model(
}
}
}
/// Prepend the bodies of `activated_skills` to the system prompt of the
/// upstream request so the chosen LLM has access to each skill's instructions.
/// Works across every provider variant by going through the OpenAI message
/// shape (`get_messages`/`set_messages`).
///
/// When there is already a leading system message we augment it in place;
/// otherwise a new system message is inserted at position 0. No-op when
/// `activated_skills` is empty.
pub fn inject_activated_skills_into_request(
client_request: &mut ProviderRequestType,
activated_skills: &[SkillRef],
) {
if activated_skills.is_empty() {
return;
}
let skill_refs: Vec<&SkillRef> = activated_skills.iter().collect();
let mut messages = client_request.get_messages();
let (system_idx, base_text) = match messages.iter().position(|m| m.role == Role::System) {
Some(idx) => {
let text = messages[idx]
.content
.as_ref()
.map(|c| c.extract_text())
.unwrap_or_default();
(Some(idx), Some(text))
}
None => (None, None),
};
let augmented = augment_system_prompt_with_skills(base_text, &skill_refs);
let Some(augmented_text) = augmented else {
return;
};
match system_idx {
Some(idx) => {
messages[idx].content = Some(MessageContent::Text(augmented_text));
}
None => {
messages.insert(
0,
Message {
role: Role::System,
content: Some(MessageContent::Text(augmented_text)),
name: None,
tool_calls: None,
tool_call_id: None,
},
);
}
}
client_request.set_messages(&messages);
}

View file

@ -314,11 +314,12 @@ async fn init_app_state(
.orchestrator_model_context_length
.unwrap_or(brightstaff::router::orchestrator_model_v1::MAX_TOKEN_LEN);
let orchestrator_service = Arc::new(OrchestratorService::with_routing(
let orchestrator_service = Arc::new(OrchestratorService::with_routing_and_skills(
format!("{llm_provider_url}{CHAT_COMPLETIONS_PATH}"),
orchestrator_model_name,
orchestrator_llm_provider,
config.routing_preferences.clone(),
config.skills.clone(),
metrics_service,
session_ttl_seconds,
session_cache,

View file

@ -1,7 +1,9 @@
use std::{borrow::Cow, collections::HashMap, sync::Arc, time::Duration};
use common::{
configuration::{AgentUsagePreference, OrchestrationPreference, TopLevelRoutingPreference},
configuration::{
AgentUsagePreference, OrchestrationPreference, SkillRef, TopLevelRoutingPreference,
},
consts::{ARCH_PROVIDER_HINT_HEADER, REQUEST_ID_HEADER},
};
use hermesllm::apis::openai::Message;
@ -13,7 +15,7 @@ use tracing::{debug, info};
use super::http::{self, post_and_extract_content};
use super::model_metrics::ModelMetricsService;
use super::orchestrator_model::OrchestratorModel;
use super::orchestrator_model::{OrchestratorModel, OrchestratorSelection};
use crate::metrics as bs_metrics;
use crate::metrics::labels as metric_labels;
@ -30,12 +32,27 @@ pub struct OrchestratorService {
orchestrator_model: Arc<dyn OrchestratorModel>,
orchestrator_provider_name: String,
top_level_preferences: HashMap<String, TopLevelRoutingPreference>,
/// Agent Skills catalog (deduplicated by name) attached to any
/// `routing_preferences[].skills` list. Empty when no route has skills.
skills_catalog: Vec<SkillRef>,
metrics_service: Option<Arc<ModelMetricsService>>,
session_cache: Option<Arc<dyn SessionCache>>,
session_ttl: Duration,
tenant_header: Option<String>,
}
/// Result of `determine_route`: which route was picked, the ranked candidate
/// models for that route, and the Agent Skill bodies the orchestrator chose
/// to activate alongside it. Skills are resolved against
/// `routing_preferences[<route>].skills`, so unknown / cross-route names are
/// silently dropped.
#[derive(Debug, Clone, Default)]
pub struct RouteDecision {
pub route_name: String,
pub models: Vec<String>,
pub activated_skills: Vec<SkillRef>,
}
#[derive(Debug, Error)]
pub enum OrchestrationError {
#[error(transparent)]
@ -66,6 +83,7 @@ impl OrchestratorService {
orchestrator_model,
orchestrator_provider_name,
top_level_preferences: HashMap::new(),
skills_catalog: Vec::new(),
metrics_service: None,
session_cache: None,
session_ttl: Duration::from_secs(DEFAULT_SESSION_TTL_SECONDS),
@ -84,14 +102,53 @@ impl OrchestratorService {
session_cache: Arc<dyn SessionCache>,
tenant_header: Option<String>,
max_token_length: usize,
) -> Self {
Self::with_routing_and_skills(
orchestrator_url,
orchestration_model_name,
orchestrator_provider_name,
top_level_prefs,
None,
metrics_service,
session_ttl_seconds,
session_cache,
tenant_header,
max_token_length,
)
}
/// Like `with_routing`, but also seeds the orchestrator with a catalog of
/// Agent Skills referenced by `routing_preferences[].skills`. The
/// orchestrator gets a `<skills>` block in its system prompt and may
/// select zero or more skills alongside the picked route; this enables
/// the LLM handler to inject the chosen SKILL.md bodies into the
/// upstream request.
#[allow(clippy::too_many_arguments)]
pub fn with_routing_and_skills(
orchestrator_url: String,
orchestration_model_name: String,
orchestrator_provider_name: String,
top_level_prefs: Option<Vec<TopLevelRoutingPreference>>,
skills_catalog: Option<Vec<SkillRef>>,
metrics_service: Option<Arc<ModelMetricsService>>,
session_ttl_seconds: Option<u64>,
session_cache: Arc<dyn SessionCache>,
tenant_header: Option<String>,
max_token_length: usize,
) -> Self {
let top_level_preferences: HashMap<String, TopLevelRoutingPreference> = top_level_prefs
.map_or_else(HashMap::new, |prefs| {
prefs.into_iter().map(|p| (p.name.clone(), p)).collect()
});
let orchestrator_model = Arc::new(orchestrator_model_v1::OrchestratorModelV1::new(
let skills_catalog = build_skills_catalog_for_routes(
skills_catalog.as_deref().unwrap_or(&[]),
&top_level_preferences,
);
let orchestrator_model = Arc::new(orchestrator_model_v1::OrchestratorModelV1::with_skills(
HashMap::new(),
skills_catalog.clone(),
orchestration_model_name,
max_token_length,
));
@ -105,6 +162,7 @@ impl OrchestratorService {
orchestrator_model,
orchestrator_provider_name,
top_level_preferences,
skills_catalog,
metrics_service,
session_cache: Some(session_cache),
session_ttl,
@ -170,7 +228,7 @@ impl OrchestratorService {
messages: &[Message],
inline_routing_preferences: Option<Vec<TopLevelRoutingPreference>>,
request_id: &str,
) -> Result<Option<(String, Vec<String>)>> {
) -> Result<Option<RouteDecision>> {
if messages.is_empty() {
return Ok(None);
}
@ -206,9 +264,13 @@ impl OrchestratorService {
)
.await?;
let result = if let Some(ref routes) = orchestration_result {
if routes.len() > 1 {
let all_routes: Vec<&str> = routes.iter().map(|(name, _)| name.as_str()).collect();
let result = if let Some(ref selection) = orchestration_result {
if selection.routes.len() > 1 {
let all_routes: Vec<&str> = selection
.routes
.iter()
.map(|(name, _)| name.as_str())
.collect();
info!(
routes = ?all_routes,
using = %all_routes.first().unwrap_or(&"none"),
@ -216,7 +278,7 @@ impl OrchestratorService {
);
}
if let Some((route_name, _)) = routes.first() {
if let Some((route_name, _)) = selection.routes.first() {
let top_pref = inline_top_map
.as_ref()
.and_then(|m| m.get(route_name))
@ -227,7 +289,16 @@ impl OrchestratorService {
Some(svc) => svc.rank_models(&pref.models, &pref.selection_policy).await,
None => pref.models.clone(),
};
Some((route_name.clone(), ranked))
let activated_skills = resolve_activated_skills(
&self.skills_catalog,
pref.skills.as_deref().unwrap_or(&[]),
&selection.skills,
);
Some(RouteDecision {
route_name: route_name.clone(),
models: ranked,
activated_skills,
})
} else {
None
}
@ -239,7 +310,7 @@ impl OrchestratorService {
};
info!(
selected_model = ?result,
selected_route = ?result.as_ref().map(|r| (&r.route_name, r.models.first(), r.activated_skills.iter().map(|s| s.name.as_str()).collect::<Vec<_>>())),
"plano-orchestrator determined route"
);
@ -253,7 +324,7 @@ impl OrchestratorService {
messages: &[Message],
usage_preferences: Option<Vec<AgentUsagePreference>>,
request_id: Option<String>,
) -> Result<Option<Vec<(String, String)>>> {
) -> Result<Option<OrchestratorSelection>> {
if messages.is_empty() {
return Ok(None);
}
@ -328,6 +399,61 @@ impl OrchestratorService {
}
}
/// Build the orchestrator-visible skills catalog (deduplicated by name) from
/// the union of every skill name referenced under
/// `routing_preferences[].skills`. Skills that are not referenced by any
/// route are excluded — they would just clutter the prompt with no way for
/// the orchestrator to attach them to a route.
fn build_skills_catalog_for_routes(
catalog: &[SkillRef],
routes: &HashMap<String, TopLevelRoutingPreference>,
) -> Vec<SkillRef> {
let mut referenced: std::collections::HashSet<&str> = std::collections::HashSet::new();
for route in routes.values() {
if let Some(names) = route.skills.as_ref() {
for name in names {
referenced.insert(name.as_str());
}
}
}
let mut out: Vec<SkillRef> = Vec::new();
let mut seen: std::collections::HashSet<String> = std::collections::HashSet::new();
for skill in catalog {
if referenced.contains(skill.name.as_str()) && seen.insert(skill.name.clone()) {
out.push(skill.clone());
}
}
out
}
/// Filter the orchestrator-selected skill names down to the SKILL.md bodies
/// allowed for the chosen route, preserving the order the orchestrator
/// returned. Unknown names (either not in the catalog or not allowed by the
/// route) are silently dropped; the orchestrator can hallucinate.
fn resolve_activated_skills(
catalog: &[SkillRef],
route_allowlist: &[String],
selected: &[String],
) -> Vec<SkillRef> {
let allowed: std::collections::HashSet<&str> =
route_allowlist.iter().map(String::as_str).collect();
let mut out: Vec<SkillRef> = Vec::with_capacity(selected.len());
let mut taken: std::collections::HashSet<&str> = std::collections::HashSet::new();
for name in selected {
if !allowed.contains(name.as_str()) {
continue;
}
if !taken.insert(name.as_str()) {
continue;
}
if let Some(skill) = catalog.iter().find(|s| &s.name == name) {
out.push(skill.clone());
}
}
out
}
#[cfg(test)]
mod tests {
use super::*;

View file

@ -10,20 +10,37 @@ pub enum OrchestratorModelError {
pub type Result<T> = std::result::Result<T, OrchestratorModelError>;
/// The result of running Plano-Orchestrator over a conversation: zero or more
/// selected routes (each mapped to its upstream model name) plus zero or more
/// selected Agent Skills. Skills are filtered down by the consumer to the
/// catalog defined under `routing_preferences[].skills` for the chosen route.
#[derive(Debug, Clone, Default, PartialEq, Eq)]
pub struct OrchestratorSelection {
pub routes: Vec<(String, String)>,
pub skills: Vec<String>,
}
impl OrchestratorSelection {
pub fn is_empty(&self) -> bool {
self.routes.is_empty() && self.skills.is_empty()
}
}
/// OrchestratorModel trait for handling orchestration requests.
/// Returns multiple routes as the model output format is:
/// {"route": ["route_name_1", "route_name_2", ...]}
/// Returns multiple routes and skills as the model output format is:
/// {"route": ["route_name_1", ...], "skills": ["skill_name_1", ...]}
pub trait OrchestratorModel: Send + Sync {
fn generate_request(
&self,
messages: &[Message],
usage_preferences: &Option<Vec<AgentUsagePreference>>,
) -> ChatCompletionsRequest;
/// Returns a vector of (route_name, model_name) tuples for all matched routes.
/// Parses the orchestrator's raw model output into selected routes (each
/// mapped to a model) and selected skill names.
fn parse_response(
&self,
content: &str,
usage_preferences: &Option<Vec<AgentUsagePreference>>,
) -> Result<Option<Vec<(String, String)>>>;
) -> Result<Option<OrchestratorSelection>>;
fn get_model_name(&self) -> String;
}

View file

@ -1,12 +1,12 @@
use std::collections::HashMap;
use common::configuration::{AgentUsagePreference, OrchestrationPreference};
use common::configuration::{AgentUsagePreference, OrchestrationPreference, SkillRef};
use hermesllm::apis::openai::{ChatCompletionsRequest, Message, MessageContent, Role};
use hermesllm::transforms::lib::ExtractText;
use serde::{ser::Serialize as SerializeTrait, Deserialize, Serialize};
use tracing::{debug, warn};
use super::orchestrator_model::{OrchestratorModel, OrchestratorModelError};
use super::orchestrator_model::{OrchestratorModel, OrchestratorModelError, OrchestratorSelection};
pub const MAX_TOKEN_LEN: usize = 8192; // Default max token length for the orchestration model
@ -138,10 +138,47 @@ Return your answer strictly in JSON as follows:
If no routes are needed, return an empty list for `route`.
"#;
/// System prompt used when one or more Agent Skills are attached to candidate
/// routes. Adds a `<skills>` block alongside `<routes>` and asks the model to
/// also pick zero or more skills that should be loaded into the downstream
/// LLM's system prompt.
pub const ARCH_ORCHESTRATOR_V1_SYSTEM_PROMPT_WITH_SKILLS: &str = r#"
You are a helpful assistant that selects the most suitable routes and Agent Skills based on user intent.
You are provided with a list of available routes enclosed within <routes></routes> XML tags:
<routes>
{routes}
</routes>
You are provided with a list of available Agent Skills enclosed within <skills></skills> XML tags:
<skills>
{skills}
</skills>
You are also given the conversation context enclosed within <conversation></conversation> XML tags:
<conversation>
{conversation}
</conversation>
## Instructions
1. Analyze the latest user intent from the conversation.
2. Compare it against the available routes to find which routes can help fulfill the request.
3. Independently compare it against the available skills; pick the skills whose descriptions match what the user is trying to do. Skills can be combined with any route. Activating a skill loads detailed instructions into the next response's system prompt.
4. Respond only with exact names from <routes> and <skills>.
5. If no routes or skills can help, return empty lists.
## Response Format
Return your answer strictly in JSON as follows:
{{"route": ["route_name_1", "..."], "skills": ["skill_name_1", "..."]}}
Use empty lists for `route` and/or `skills` when nothing applies.
"#;
pub type Result<T> = std::result::Result<T, OrchestratorModelError>;
pub struct OrchestratorModelV1 {
agent_orchestration_json_str: String,
agent_orchestration_to_model_map: HashMap<String, String>,
/// Pre-rendered `<skills>` block (one JSON entry per skill, name +
/// description). Empty when no skills are attached to any route.
skills_catalog_json_str: String,
orchestration_model: String,
max_token_length: usize,
}
@ -151,10 +188,27 @@ impl OrchestratorModelV1 {
agent_orchestrations: HashMap<String, Vec<OrchestrationPreference>>,
orchestration_model: String,
max_token_length: usize,
) -> Self {
Self::with_skills(
agent_orchestrations,
Vec::new(),
orchestration_model,
max_token_length,
)
}
/// Like `new`, but additionally seeds the orchestrator with an Agent
/// Skills catalog. When `skills_catalog` is empty the orchestrator uses
/// the routes-only system prompt; otherwise it asks the model to also
/// pick zero or more skills from the catalog.
pub fn with_skills(
agent_orchestrations: HashMap<String, Vec<OrchestrationPreference>>,
skills_catalog: Vec<SkillRef>,
orchestration_model: String,
max_token_length: usize,
) -> Self {
let agent_orchestration_values: Vec<OrchestrationPreference> =
agent_orchestrations.values().flatten().cloned().collect();
// Format routes: each route as JSON on its own line with standard spacing
let agent_orchestration_json_str = agent_orchestration_values
.iter()
.map(to_spaced_json)
@ -165,19 +219,48 @@ impl OrchestratorModelV1 {
.flat_map(|(model, prefs)| prefs.iter().map(|pref| (pref.name.clone(), model.clone())))
.collect();
let skills_catalog_json_str = render_skills_catalog(&skills_catalog);
OrchestratorModelV1 {
orchestration_model,
max_token_length,
agent_orchestration_json_str,
agent_orchestration_to_model_map,
skills_catalog_json_str,
}
}
}
/// JSON shape suitable for the `<skills>` block in the orchestrator prompt:
/// `{"name": "...", "description": "..."}`. Only metadata that helps the
/// orchestrator pick a skill is included; the full SKILL.md body is injected
/// separately, after a skill has been selected.
#[derive(Debug, Clone, Serialize)]
struct SkillCatalogEntry<'a> {
name: &'a str,
description: &'a str,
}
fn render_skills_catalog(skills: &[SkillRef]) -> String {
skills
.iter()
.map(|s| SkillCatalogEntry {
name: &s.name,
description: s.catalog_description(),
})
.map(|entry| to_spaced_json(&entry))
.collect::<Vec<String>>()
.join("\n")
}
#[derive(Debug, Clone, Serialize, Deserialize)]
struct AgentOrchestratorResponse {
/// The route field now expects an array of route names: ["route_name_1", "route_name_2", ...]
/// The route field expects an array of route names: ["route_name_1", "route_name_2", ...].
pub route: Option<Vec<String>>,
/// Optional array of Agent Skill names the orchestrator chose to activate.
/// Absent or empty when no skills should be loaded for this turn.
#[serde(default)]
pub skills: Option<Vec<String>>,
}
const TOKEN_LENGTH_DIVISOR: usize = 4; // Approximate token length divisor for UTF-8 characters
@ -209,7 +292,13 @@ impl OrchestratorModel for OrchestratorModelV1 {
// Ensure the conversation does not exceed the configured token budget.
// We use `len() / TOKEN_LENGTH_DIVISOR` as a cheap token estimate to
// avoid running a real tokenizer on the hot path.
let mut token_count = ARCH_ORCHESTRATOR_V1_SYSTEM_PROMPT.len() / TOKEN_LENGTH_DIVISOR;
let template_len = if self.skills_catalog_json_str.is_empty() {
ARCH_ORCHESTRATOR_V1_SYSTEM_PROMPT.len()
} else {
ARCH_ORCHESTRATOR_V1_SYSTEM_PROMPT_WITH_SKILLS.len()
+ self.skills_catalog_json_str.len()
};
let mut token_count = template_len / TOKEN_LENGTH_DIVISOR;
let mut selected_messages_list_reversed: Vec<Message> = vec![];
for (selected_messsage_count, message) in messages_vec.iter().rev().enumerate() {
let message_text = message.content.extract_text();
@ -289,14 +378,16 @@ impl OrchestratorModel for OrchestratorModelV1 {
// Generate the orchestrator request message based on the usage preferences.
// If preferences are passed in request then we use them;
// Otherwise, we use the default orchestration modelpreferences.
let orchestrator_message =
match convert_to_orchestrator_preferences(usage_preferences_from_request) {
Some(prefs) => generate_orchestrator_message(&prefs, &selected_conversation_list),
None => generate_orchestrator_message(
&self.agent_orchestration_json_str,
&selected_conversation_list,
),
};
let routes_block = match convert_to_orchestrator_preferences(usage_preferences_from_request)
{
Some(prefs) => prefs,
None => self.agent_orchestration_json_str.clone(),
};
let orchestrator_message = generate_orchestrator_message(
&routes_block,
&self.skills_catalog_json_str,
&selected_conversation_list,
);
ChatCompletionsRequest {
model: self.orchestration_model.clone(),
@ -316,7 +407,7 @@ impl OrchestratorModel for OrchestratorModelV1 {
&self,
content: &str,
usage_preferences: &Option<Vec<AgentUsagePreference>>,
) -> Result<Option<Vec<(String, String)>>> {
) -> Result<Option<OrchestratorSelection>> {
if content.is_empty() {
return Ok(None);
}
@ -326,20 +417,21 @@ impl OrchestratorModel for OrchestratorModelV1 {
let selected_routes = orchestrator_response.route.unwrap_or_default();
// Filter out empty routes
let valid_routes: Vec<String> = selected_routes
.into_iter()
.filter(|route| !route.is_empty())
.collect();
if valid_routes.is_empty() {
return Ok(None);
}
let selected_skills: Vec<String> = orchestrator_response
.skills
.unwrap_or_default()
.into_iter()
.filter(|s| !s.is_empty())
.collect();
let mut result: Vec<(String, String)> = Vec::new();
let mut routes: Vec<(String, String)> = Vec::new();
if let Some(usage_preferences) = usage_preferences {
// If usage preferences are defined, we need to find the model that matches each selected route
for selected_route in valid_routes {
let model_name: Option<String> = usage_preferences
.iter()
@ -351,7 +443,7 @@ impl OrchestratorModel for OrchestratorModelV1 {
.map(|pref| pref.model.clone());
if let Some(model_name) = model_name {
result.push((selected_route, model_name));
routes.push((selected_route, model_name));
} else {
warn!(
route = %selected_route,
@ -361,14 +453,13 @@ impl OrchestratorModel for OrchestratorModelV1 {
}
}
} else {
// If no usage preferences are passed in request then use the default orchestration model preferences
for selected_route in valid_routes {
if let Some(model) = self
.agent_orchestration_to_model_map
.get(&selected_route)
.cloned()
{
result.push((selected_route, model));
routes.push((selected_route, model));
} else {
warn!(
route = %selected_route,
@ -379,11 +470,14 @@ impl OrchestratorModel for OrchestratorModelV1 {
}
}
if result.is_empty() {
if routes.is_empty() && selected_skills.is_empty() {
return Ok(None);
}
Ok(Some(result))
Ok(Some(OrchestratorSelection {
routes,
skills: selected_skills,
}))
}
fn get_model_name(&self) -> String {
@ -391,7 +485,11 @@ impl OrchestratorModel for OrchestratorModelV1 {
}
}
fn generate_orchestrator_message(prefs: &str, selected_conversation_list: &Vec<Message>) -> String {
fn generate_orchestrator_message(
prefs: &str,
skills_catalog: &str,
selected_conversation_list: &Vec<Message>,
) -> String {
// Format conversation with 4-space indentation (equivalent to Python's json.dumps(obj, indent=4))
let formatter = serde_json::ser::PrettyFormatter::with_indent(b" ");
let mut conversation_buf = Vec::new();
@ -399,9 +497,19 @@ fn generate_orchestrator_message(prefs: &str, selected_conversation_list: &Vec<M
SerializeTrait::serialize(&selected_conversation_list, &mut serializer).unwrap();
let conversation_json = String::from_utf8(conversation_buf).unwrap_or_default();
ARCH_ORCHESTRATOR_V1_SYSTEM_PROMPT
let template = if skills_catalog.is_empty() {
ARCH_ORCHESTRATOR_V1_SYSTEM_PROMPT
} else {
ARCH_ORCHESTRATOR_V1_SYSTEM_PROMPT_WITH_SKILLS
};
let mut out = template
.replace("{routes}", prefs)
.replace("{conversation}", &conversation_json)
.replace("{conversation}", &conversation_json);
if !skills_catalog.is_empty() {
out = out.replace("{skills}", skills_catalog);
}
out
}
fn convert_to_orchestrator_preferences(
@ -1349,23 +1457,30 @@ If no routes are needed, return an empty list for `route`.
let orchestrator =
OrchestratorModelV1::new(agent_orchestrations, "test-model".to_string(), 2000);
fn routes(pairs: &[(&str, &str)]) -> OrchestratorSelection {
OrchestratorSelection {
routes: pairs
.iter()
.map(|(r, m)| (r.to_string(), m.to_string()))
.collect(),
skills: Vec::new(),
}
}
// Case 1: Valid JSON with single route in array
let input = r#"{"route": ["Image generation"]}"#;
let result = orchestrator.parse_response(input, &None).unwrap();
assert_eq!(
result,
Some(vec![("Image generation".to_string(), "gpt-4o".to_string())])
);
assert_eq!(result, Some(routes(&[("Image generation", "gpt-4o")])));
// Case 2: Valid JSON with multiple routes in array
let input = r#"{"route": ["Image generation", "Code generation"]}"#;
let result = orchestrator.parse_response(input, &None).unwrap();
assert_eq!(
result,
Some(vec![
("Image generation".to_string(), "gpt-4o".to_string()),
("Code generation".to_string(), "gpt-4o".to_string())
])
Some(routes(&[
("Image generation", "gpt-4o"),
("Code generation", "gpt-4o")
]))
);
// Case 3: Valid JSON with empty array
@ -1396,14 +1511,65 @@ If no routes are needed, return an empty list for `route`.
// Case 7: Single quotes and \n in JSON
let input = "{'route': ['Image generation']}\\n";
let result = orchestrator.parse_response(input, &None).unwrap();
assert_eq!(
result,
Some(vec![("Image generation".to_string(), "gpt-4o".to_string())])
);
assert_eq!(result, Some(routes(&[("Image generation", "gpt-4o")])));
// Case 8: Array with unknown route (not in orchestrations map)
let input = r#"{"route": ["Unknown route"]}"#;
let result = orchestrator.parse_response(input, &None).unwrap();
assert_eq!(result, None);
// Case 9: Routes plus selected skills are propagated through.
let input = r#"{"route": ["Image generation"], "skills": ["pdf-processing"]}"#;
let result = orchestrator.parse_response(input, &None).unwrap().unwrap();
assert_eq!(result.routes.len(), 1);
assert_eq!(result.skills, vec!["pdf-processing".to_string()]);
// Case 10: Skills-only selection (no routes) still surfaces as Some.
let input = r#"{"route": [], "skills": ["pdf-processing"]}"#;
let result = orchestrator.parse_response(input, &None).unwrap().unwrap();
assert!(result.routes.is_empty());
assert_eq!(result.skills, vec!["pdf-processing".to_string()]);
}
#[test]
fn test_system_prompt_with_skills_block() {
let orchestrator = OrchestratorModelV1::with_skills(
HashMap::from([(
"gpt-4o".to_string(),
vec![OrchestrationPreference {
name: "Image generation".to_string(),
description: "generating image".to_string(),
}],
)]),
vec![SkillRef {
name: "pdf-processing".to_string(),
description: "Extract structured data from PDFs.".to_string(),
path: None,
base_dir: None,
body: None,
scope: None,
compatibility: None,
license: None,
metadata: None,
allowed_tools: None,
}],
"test-model".to_string(),
usize::MAX,
);
let conversation: Vec<Message> = serde_json::from_str(
r#"[{"role": "user", "content": "extract the invoice totals from this pdf"}]"#,
)
.unwrap();
let req = orchestrator.generate_request(&conversation, &None);
let prompt = req.messages[0].content.extract_text();
assert!(prompt.contains("<skills>"));
assert!(prompt.contains("</skills>"));
assert!(prompt.contains(r#""name": "pdf-processing""#));
assert!(prompt.contains("Extract structured data from PDFs."));
// Response format documentation must mention the `skills` array
// so the orchestrator emits it.
assert!(prompt.contains(r#""skills""#));
}
}

View file

@ -33,6 +33,7 @@ mod tests {
"openai/gpt-4o".to_string(),
"openai/gpt-4o-mini".to_string(),
],
skills: None,
selection_policy: SelectionPolicy {
prefer: SelectionPreference::None,
},
@ -44,6 +45,7 @@ mod tests {
"anthropic/claude-3-sonnet".to_string(),
"openai/gpt-4o-mini".to_string(),
],
skills: None,
selection_policy: SelectionPolicy {
prefer: SelectionPreference::None,
},

View file

@ -163,6 +163,12 @@ pub struct TopLevelRoutingPreference {
pub name: String,
pub description: String,
pub models: Vec<String>,
/// Agent Skills associated with this route. When Plano-Orchestrator
/// selects this route, every skill listed here is also offered to the
/// orchestrator in the `<skills>` block; selected skills have their
/// SKILL.md body prepended to the upstream system prompt.
#[serde(default)]
pub skills: Option<Vec<String>>,
#[serde(default)]
pub selection_policy: SelectionPolicy,
}
@ -224,6 +230,17 @@ pub struct Configuration {
pub state_storage: Option<StateStorageConfig>,
pub routing_preferences: Option<Vec<TopLevelRoutingPreference>>,
pub model_metrics_sources: Option<Vec<MetricsSource>>,
/// Agent Skills (https://agentskills.io) installed for this project.
///
/// The Plano CLI discovers `.plano/skills/<name>/SKILL.md` files at render
/// time and materializes them into this list with `body` already loaded so
/// downstream consumers do not need filesystem access. Skills are scoped
/// to specific routes via `routing_preferences[].skills`; Plano-Orchestrator
/// receives a `<skills>` block alongside `<routes>` for any skills attached
/// to candidate routes, and selected skills have their SKILL.md body
/// injected into the upstream system prompt.
#[serde(default)]
pub skills: Option<Vec<SkillRef>>,
}
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
@ -611,6 +628,45 @@ pub struct PromptTarget {
pub auto_llm_dispatch_on_response: Option<bool>,
}
/// An Agent Skill (https://agentskills.io) as materialized by the Plano CLI.
///
/// At runtime brightstaff and the WASM filters reason over the catalog
/// (`name` + `description`) and, when a skill is selected, inject the
/// pre-loaded `body` into the downstream system prompt.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct SkillRef {
pub name: String,
pub description: String,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub path: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub base_dir: Option<String>,
/// Full SKILL.md markdown body (post-frontmatter). Inlined here at render
/// time so the WASM sandbox does not need filesystem access.
#[serde(default, skip_serializing_if = "Option::is_none")]
pub body: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub scope: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub compatibility: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub license: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub metadata: Option<HashMap<String, String>>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub allowed_tools: Option<String>,
}
impl SkillRef {
/// Best-effort short summary suitable for the `<skills>` block sent to
/// Plano-Orchestrator: only the public-facing description, never the
/// full SKILL.md body. The body is injected separately, after a skill
/// has been selected.
pub fn catalog_description(&self) -> &str {
&self.description
}
}
// convert PromptTarget to ChatCompletionTool
impl From<&PromptTarget> for ChatCompletionTool {
fn from(val: &PromptTarget) -> Self {
@ -807,4 +863,34 @@ disable_signals: false
let overrides: super::Overrides = serde_yaml::from_str(yaml_missing).unwrap();
assert_eq!(overrides.disable_signals, None);
}
#[test]
fn test_top_level_routing_preference_skills_deserialize() {
let yaml = r#"
name: code review
description: reviewing, analyzing, and suggesting improvements to existing code
models:
- openai/gpt-4o
skills:
- code-review-skill
"#;
let pref: super::TopLevelRoutingPreference = serde_yaml::from_str(yaml).unwrap();
assert_eq!(pref.name, "code review");
assert_eq!(
pref.skills.as_deref(),
Some(&["code-review-skill".to_string()][..])
);
}
#[test]
fn test_top_level_routing_preference_skills_optional() {
let yaml = r#"
name: code generation
description: generating new code
models:
- openai/gpt-4o
"#;
let pref: super::TopLevelRoutingPreference = serde_yaml::from_str(yaml).unwrap();
assert!(pref.skills.is_none());
}
}

View file

@ -8,6 +8,7 @@ pub mod path;
pub mod pii;
pub mod ratelimit;
pub mod routing;
pub mod skills_runtime;
pub mod stats;
pub mod tokenizer;
pub mod traces;

View file

@ -0,0 +1,215 @@
//! Runtime helpers for handling Agent Skills selected by Plano-Orchestrator.
//!
//! These functions live in `common` (rather than `brightstaff` or a WASM
//! crate) so they can be unit-tested on the native target without dragging
//! in proxy-wasm host-call symbols or tokio runtime dependencies.
use crate::configuration::{SkillRef, TopLevelRoutingPreference};
/// Filter `skills` down to the subset attached to `route_name` via
/// `routing_preferences[].skills`. When the selected route has no `skills:`
/// list, returns an empty vector — skills are scoped to routes, not global.
///
/// `routing_preferences` is the source of truth for which skills are even
/// eligible for the orchestrator to activate on a given route.
pub fn skills_for_route<'a>(
skills: &'a [SkillRef],
routing_preferences: &[TopLevelRoutingPreference],
route_name: &str,
) -> Vec<&'a SkillRef> {
let Some(route) = routing_preferences.iter().find(|p| p.name == route_name) else {
return Vec::new();
};
let Some(allow) = route.skills.as_ref() else {
return Vec::new();
};
let mut out: Vec<&SkillRef> = Vec::with_capacity(allow.len());
for name in allow {
if let Some(skill) = skills.iter().find(|s| &s.name == name) {
out.push(skill);
}
}
out
}
/// Resolve a list of orchestrator-selected skill names to their `SkillRef`s.
/// Unknown names are dropped silently — the orchestrator can hallucinate.
/// Results are deduplicated by name, preserving the order Plano-Orchestrator
/// returned.
pub fn resolve_selected_skills<'a>(
skills: &'a [SkillRef],
selected_names: &[String],
) -> Vec<&'a SkillRef> {
let mut out: Vec<&SkillRef> = Vec::with_capacity(selected_names.len());
for name in selected_names {
if out.iter().any(|s| &s.name == name) {
continue;
}
if let Some(skill) = skills.iter().find(|s| &s.name == name) {
out.push(skill);
}
}
out
}
/// Append the bodies of activated skills to a system prompt, wrapped in
/// `<skill_content name="...">` tags so a future context-management pass can
/// recognize and recompact them.
///
/// Returns `None` only if no base system prompt was supplied and no skills
/// were activated. When skills are present the wrapper text always appears so
/// the downstream model receives a clear, well-structured instruction block.
pub fn augment_system_prompt_with_skills(
base_system_prompt: Option<String>,
activated_skills: &[&SkillRef],
) -> Option<String> {
if activated_skills.is_empty() {
return base_system_prompt;
}
let mut buf = String::new();
if let Some(base) = base_system_prompt {
if !base.is_empty() {
buf.push_str(&base);
buf.push('\n');
buf.push('\n');
}
}
buf.push_str(
"The following Agent Skills have been activated for this request. \
Follow their instructions when relevant; resolve relative paths \
against each skill's base directory.\n\n",
);
for skill in activated_skills {
buf.push_str(&format!("<skill_content name=\"{}\"", skill.name));
if let Some(base_dir) = skill.base_dir.as_deref() {
buf.push_str(&format!(" base_dir=\"{}\"", base_dir));
}
buf.push_str(">\n");
if let Some(body) = skill.body.as_deref() {
buf.push_str(body.trim_end());
buf.push('\n');
} else {
buf.push_str(&format!("(skill description) {}\n", skill.description));
}
buf.push_str("</skill_content>\n\n");
}
Some(buf.trim_end().to_string())
}
#[cfg(test)]
mod tests {
use super::*;
use crate::configuration::SelectionPolicy;
fn skill(name: &str, body: &str) -> SkillRef {
SkillRef {
name: name.to_string(),
description: format!("desc for {}", name),
path: Some(format!("/skills/{}/SKILL.md", name)),
base_dir: Some(format!("/skills/{}", name)),
body: Some(body.to_string()),
scope: Some("project".to_string()),
compatibility: None,
license: None,
metadata: None,
allowed_tools: None,
}
}
fn route(name: &str, skill_names: Option<Vec<&str>>) -> TopLevelRoutingPreference {
TopLevelRoutingPreference {
name: name.to_string(),
description: format!("desc for {}", name),
models: vec!["openai/gpt-4o".to_string()],
skills: skill_names.map(|v| v.into_iter().map(String::from).collect()),
selection_policy: SelectionPolicy::default(),
}
}
#[test]
fn skills_for_route_returns_attached_skills() {
let catalog = vec![
skill("pdf-processing", "extract"),
skill("code-review", "review"),
];
let routes = vec![
route("code review", Some(vec!["code-review"])),
route("doc work", Some(vec!["pdf-processing"])),
];
let resolved = skills_for_route(&catalog, &routes, "code review");
assert_eq!(resolved.len(), 1);
assert_eq!(resolved[0].name, "code-review");
}
#[test]
fn skills_for_route_empty_when_route_has_no_skills_list() {
let catalog = vec![skill("pdf-processing", "extract")];
let routes = vec![route("code review", None)];
let resolved = skills_for_route(&catalog, &routes, "code review");
assert!(resolved.is_empty());
}
#[test]
fn skills_for_route_empty_when_route_missing() {
let catalog = vec![skill("pdf-processing", "extract")];
let routes = vec![route("code review", Some(vec!["pdf-processing"]))];
let resolved = skills_for_route(&catalog, &routes, "no-such-route");
assert!(resolved.is_empty());
}
#[test]
fn skills_for_route_drops_unknown_skill_names() {
let catalog = vec![skill("pdf-processing", "extract")];
let routes = vec![route(
"code review",
Some(vec!["pdf-processing", "ghost-skill"]),
)];
let resolved = skills_for_route(&catalog, &routes, "code review");
assert_eq!(resolved.len(), 1);
assert_eq!(resolved[0].name, "pdf-processing");
}
#[test]
fn resolve_selected_skills_drops_unknown_and_dedupes() {
let catalog = vec![
skill("pdf-processing", "extract"),
skill("code-review", "review"),
];
let names = vec![
"code-review".to_string(),
"ghost".to_string(),
"code-review".to_string(),
"pdf-processing".to_string(),
];
let resolved = resolve_selected_skills(&catalog, &names);
assert_eq!(resolved.len(), 2);
assert_eq!(resolved[0].name, "code-review");
assert_eq!(resolved[1].name, "pdf-processing");
}
#[test]
fn augment_passthrough_with_no_skills() {
let augmented = augment_system_prompt_with_skills(Some("you are helpful".to_string()), &[]);
assert_eq!(augmented.as_deref(), Some("you are helpful"));
}
#[test]
fn augment_includes_skill_bodies() {
let s = skill("pdf-processing", "extract text and tables");
let augmented =
augment_system_prompt_with_skills(Some("you are helpful".to_string()), &[&s])
.expect("augmented");
assert!(augmented.starts_with("you are helpful"));
assert!(augmented.contains("<skill_content name=\"pdf-processing\""));
assert!(augmented.contains("extract text and tables"));
assert!(augmented.contains("base_dir=\"/skills/pdf-processing\""));
}
#[test]
fn augment_without_base_prompt_still_works() {
let s = skill("code-review", "look at diffs");
let augmented = augment_system_prompt_with_skills(None, &[&s]).expect("augmented");
assert!(augmented.contains("<skill_content name=\"code-review\""));
assert!(augmented.contains("look at diffs"));
}
}

View file

@ -209,19 +209,15 @@ impl StreamContext {
} else {
info!("no default prompt target found, forwarding request to upstream llm");
let mut messages = Vec::new();
// add system prompt
match self.system_prompt.as_ref() {
None => {}
Some(system_prompt) => {
let system_prompt_message = Message {
role: SYSTEM_ROLE.to_string(),
content: Some(ContentType::Text(system_prompt.clone())),
model: None,
tool_calls: None,
tool_call_id: None,
};
messages.push(system_prompt_message);
}
if let Some(system_prompt) = self.system_prompt.as_ref().clone() {
let system_prompt_message = Message {
role: SYSTEM_ROLE.to_string(),
content: Some(ContentType::Text(system_prompt)),
model: None,
tool_calls: None,
tool_call_id: None,
};
messages.push(system_prompt_message);
}
messages.append(