This commit is contained in:
Musa 2026-06-03 10:10:11 -07:00 committed by GitHub
commit a24710dfaf
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
24 changed files with 3278 additions and 98 deletions

View file

@ -132,11 +132,15 @@ impl AgentSelector {
.determine_orchestration(messages, Some(usage_preferences), request_id)
.await
{
Ok(Some(routes)) => {
debug!(count = routes.len(), "determined agents via orchestration");
Ok(Some(selection)) => {
debug!(
count = selection.routes.len(),
skill_count = selection.skills.len(),
"determined agents via orchestration"
);
let mut selected_agents = Vec::new();
for (route_name, agent_name) in routes {
for (route_name, agent_name) in selection.routes {
debug!(route = %route_name, agent = %agent_name, "processing route");
let selected_agent = agents
.iter()

View file

@ -37,7 +37,7 @@ use crate::tracing::{
collect_custom_trace_attributes, llm as tracing_llm, operation_component,
plano as tracing_plano, set_service_name,
};
use model_selection::router_chat_get_upstream_model;
use model_selection::{inject_activated_skills_into_request, router_chat_get_upstream_model};
const PERPLEXITY_PROVIDER_PREFIX: &str = "perplexity/";
@ -282,26 +282,16 @@ async fn llm_chat_inner(
Err(response) => return Ok(response),
};
// Serialize request for upstream BEFORE router consumes it
let client_request_bytes_for_upstream: Bytes =
match ProviderRequestType::to_bytes(&client_request) {
Ok(bytes) => bytes.into(),
Err(err) => {
warn!(error = %err, "failed to serialize request for upstream");
let mut r = Response::new(full(format!("Failed to serialize request: {}", err)));
*r.status_mut() = StatusCode::INTERNAL_SERVER_ERROR;
return Ok(r);
}
};
// --- Phase 3: Route the request (or use pinned model from session cache) ---
let resolved_model = if let Some(cached_model) = pinned_model {
// Route the request (or use pinned model from session cache) BEFORE
// serializing for upstream — skill body injection happens here, so the
// upstream bytes must be produced after routing returns.
let (resolved_model, activated_skills) = if let Some(cached_model) = pinned_model {
info!(
session_id = %session_id.as_deref().unwrap_or(""),
model = %cached_model,
"using pinned routing decision from cache"
);
cached_model
(cached_model, Vec::new())
} else {
let routing_span = info_span!(
"routing",
@ -313,11 +303,16 @@ async fn llm_chat_inner(
route.selected_model = tracing::field::Empty,
routing.determination_ms = tracing::field::Empty,
);
// The router consumes the request (it converts it to OpenAI format
// internally to extract conversation messages). Clone so we can
// still mutate the original below when Plano-Orchestrator activates
// any Agent Skills.
let request_for_routing = client_request.clone();
let routing_result = match async {
set_service_name(operation_component::ROUTING);
router_chat_get_upstream_model(
Arc::clone(&state.orchestrator_service),
client_request,
request_for_routing,
&request_path,
&request_id,
inline_routing_preferences,
@ -335,8 +330,11 @@ async fn llm_chat_inner(
}
};
let (router_selected_model, route_name) =
(routing_result.model_name, routing_result.route_name);
let (router_selected_model, route_name, activated) = (
routing_result.model_name,
routing_result.route_name,
routing_result.activated_skills,
);
let model = if router_selected_model != "none" {
router_selected_model
} else {
@ -362,10 +360,34 @@ async fn llm_chat_inner(
.await;
}
model
(model, activated)
};
tracing::Span::current().record(tracing_llm::MODEL_NAME, resolved_model.as_str());
// If Plano-Orchestrator activated any Agent Skills for this route, inject
// their SKILL.md bodies into the system prompt before we hand the bytes
// off to the upstream provider.
if !activated_skills.is_empty() {
info!(
count = activated_skills.len(),
skills = ?activated_skills.iter().map(|s| s.name.as_str()).collect::<Vec<_>>(),
"injecting activated Agent Skills into upstream system prompt"
);
inject_activated_skills_into_request(&mut client_request, &activated_skills);
}
// Serialize request for upstream AFTER potential skill injection.
let client_request_bytes_for_upstream: Bytes =
match ProviderRequestType::to_bytes(&client_request) {
Ok(bytes) => bytes.into(),
Err(err) => {
warn!(error = %err, "failed to serialize request for upstream");
let mut r = Response::new(full(format!("Failed to serialize request: {}", err)));
*r.status_mut() = StatusCode::INTERNAL_SERVER_ERROR;
return Ok(r);
}
};
// --- Phase 4: Forward to upstream and stream back ---
send_upstream(
&state.http_client,

View file

@ -1,5 +1,9 @@
use common::configuration::TopLevelRoutingPreference;
use common::configuration::{SkillRef, TopLevelRoutingPreference};
use common::skills_runtime::augment_system_prompt_with_skills;
use hermesllm::apis::openai::{Message, MessageContent, Role};
use hermesllm::clients::endpoints::SupportedUpstreamAPIs;
use hermesllm::providers::request::ProviderRequest;
use hermesllm::transforms::lib::ExtractText;
use hermesllm::ProviderRequestType;
use hyper::StatusCode;
use std::sync::Arc;
@ -29,6 +33,10 @@ pub struct RoutingResult {
/// Full ranked list — use subsequent entries as fallbacks on 429/5xx.
pub models: Vec<String>,
pub route_name: Option<String>,
/// Agent Skills activated by Plano-Orchestrator for this request.
/// Their `body` field (the SKILL.md content) is prepended to the
/// upstream system prompt by the caller in `send_upstream`.
pub activated_skills: Vec<SkillRef>,
}
pub struct RoutingError {
@ -128,8 +136,36 @@ pub async fn router_chat_get_upstream_model(
match routing_result {
Ok(route) => match route {
Some((route_name, ranked_models)) => {
let model_name = ranked_models.first().cloned().unwrap_or_default();
Some(decision) => {
// Skills-only decision (no route, no models) -> fall through
// to the "none" sentinel so the original / aliased model is
// used, but propagate activated_skills so they still get
// injected. Documented at docs/source/resources/skills.rst.
if decision.route_name.is_none() && decision.models.is_empty() {
current_span.record("route.selected_model", "none");
info!(
skills = ?decision
.activated_skills
.iter()
.map(|s| s.name.as_str())
.collect::<Vec<_>>(),
"no route determined; activating skills against default model"
);
bs_metrics::record_router_decision(
route_label,
"none",
true,
determination_elapsed,
);
return Ok(RoutingResult {
model_name: "none".to_string(),
models: vec!["none".to_string()],
route_name: None,
activated_skills: decision.activated_skills,
});
}
let model_name = decision.models.first().cloned().unwrap_or_default();
current_span.record("route.selected_model", model_name.as_str());
bs_metrics::record_router_decision(
route_label,
@ -139,8 +175,9 @@ pub async fn router_chat_get_upstream_model(
);
Ok(RoutingResult {
model_name,
models: ranked_models,
route_name: Some(route_name),
models: decision.models,
route_name: decision.route_name,
activated_skills: decision.activated_skills,
})
}
None => {
@ -159,6 +196,7 @@ pub async fn router_chat_get_upstream_model(
model_name: "none".to_string(),
models: vec!["none".to_string()],
route_name: None,
activated_skills: Vec::new(),
})
}
},
@ -172,3 +210,250 @@ pub async fn router_chat_get_upstream_model(
}
}
}
/// Prepend the bodies of `activated_skills` to the system prompt of the
/// upstream request so the chosen LLM has access to each skill's instructions.
/// Works across every provider variant by going through the OpenAI message
/// shape (`get_messages` / `set_messages`).
///
/// # Behavior contract
///
/// * **No-op** when `activated_skills` is empty.
/// * **Augments the first system message in place** when one is present at
/// any position in `messages` (typically index 0, but we look for the
/// first `Role::System` rather than assuming). Subsequent system messages
/// (rare but legal for some providers) are left untouched. We pick "first"
/// so the skill content appears as early in the prompt as possible —
/// models weight earlier system content more heavily and an Anthropic
/// tools+system combo is conventionally a single leading block.
/// * **Inserts a new leading system message** at index 0 when no system
/// message exists in the request.
/// * **Flattens `MessageContent::Parts` system content to a single
/// `MessageContent::Text`** when extracting the base prompt. This is
/// intentional: every supported upstream API accepts text in system
/// messages, and the alternative — preserving each `ContentPart` and
/// appending a new text part — fails on providers that disallow
/// multi-part system content. The trade-off is that non-text system parts
/// (e.g. images attached to a system message, which no production
/// provider supports anyway) are dropped on the floor. Verified by
/// `flattens_parts_system_content` below.
pub fn inject_activated_skills_into_request(
client_request: &mut ProviderRequestType,
activated_skills: &[SkillRef],
) {
if activated_skills.is_empty() {
return;
}
let skill_refs: Vec<&SkillRef> = activated_skills.iter().collect();
let mut messages = client_request.get_messages();
let (system_idx, base_text) = match messages.iter().position(|m| m.role == Role::System) {
Some(idx) => {
let text = messages[idx]
.content
.as_ref()
.map(|c| c.extract_text())
.unwrap_or_default();
(Some(idx), Some(text))
}
None => (None, None),
};
let augmented = augment_system_prompt_with_skills(base_text, &skill_refs);
let Some(augmented_text) = augmented else {
return;
};
match system_idx {
Some(idx) => {
messages[idx].content = Some(MessageContent::Text(augmented_text));
}
None => {
messages.insert(
0,
Message {
role: Role::System,
content: Some(MessageContent::Text(augmented_text)),
name: None,
tool_calls: None,
tool_call_id: None,
},
);
}
}
client_request.set_messages(&messages);
}
#[cfg(test)]
mod tests {
use super::*;
use hermesllm::apis::openai::{ChatCompletionsRequest, ContentPart};
fn req_with_messages(msgs: Vec<Message>) -> ProviderRequestType {
ProviderRequestType::ChatCompletionsRequest(ChatCompletionsRequest {
model: "test".to_string(),
messages: msgs,
..Default::default()
})
}
fn user_msg(text: &str) -> Message {
Message {
role: Role::User,
content: Some(MessageContent::Text(text.to_string())),
name: None,
tool_calls: None,
tool_call_id: None,
}
}
fn system_msg(text: &str) -> Message {
Message {
role: Role::System,
content: Some(MessageContent::Text(text.to_string())),
name: None,
tool_calls: None,
tool_call_id: None,
}
}
fn skill(name: &str, body: &str) -> SkillRef {
SkillRef {
name: name.to_string(),
description: format!("desc for {name}"),
path: None,
base_dir: None,
body: Some(body.to_string()),
scope: Some("project".to_string()),
compatibility: None,
license: None,
metadata: None,
allowed_tools: None,
}
}
fn first_system_text(req: &ProviderRequestType) -> String {
req.get_messages()
.iter()
.find(|m| m.role == Role::System)
.and_then(|m| m.content.as_ref())
.map(|c| c.extract_text())
.unwrap_or_default()
}
#[test]
fn injects_new_system_message_when_none_present() {
let mut req = req_with_messages(vec![user_msg("hi")]);
inject_activated_skills_into_request(&mut req, &[skill("pdf", "process pdfs")]);
let messages = req.get_messages();
assert_eq!(messages.len(), 2);
assert_eq!(messages[0].role, Role::System);
let txt = first_system_text(&req);
assert!(txt.contains("<skill_content name=\"pdf\""));
assert!(txt.contains("process pdfs"));
}
#[test]
fn augments_existing_system_message_in_place_preserving_user_message() {
let mut req = req_with_messages(vec![system_msg("you are helpful"), user_msg("hi")]);
inject_activated_skills_into_request(&mut req, &[skill("pdf", "process pdfs")]);
let messages = req.get_messages();
assert_eq!(messages.len(), 2);
let txt = first_system_text(&req);
assert!(txt.starts_with("you are helpful"));
assert!(txt.contains("<skill_content name=\"pdf\""));
assert_eq!(messages[1].role, Role::User);
}
#[test]
fn noop_when_no_skills_activated() {
let mut req = req_with_messages(vec![system_msg("base"), user_msg("hi")]);
let before = req.get_messages();
inject_activated_skills_into_request(&mut req, &[]);
let after = req.get_messages();
assert_eq!(after.len(), before.len());
assert_eq!(first_system_text(&req), "base");
}
#[test]
fn augments_only_the_first_system_message() {
// Two system messages (rare in practice). Only the first one is
// augmented; the trailing one is left untouched. Documented contract.
let mut req = req_with_messages(vec![
system_msg("primary"),
system_msg("secondary"),
user_msg("hi"),
]);
inject_activated_skills_into_request(&mut req, &[skill("pdf", "process pdfs")]);
let messages = req.get_messages();
assert!(messages[0]
.content
.as_ref()
.unwrap()
.extract_text()
.contains("primary"));
assert!(messages[0]
.content
.as_ref()
.unwrap()
.extract_text()
.contains("<skill_content"));
assert_eq!(
messages[1].content.as_ref().unwrap().extract_text(),
"secondary"
);
}
#[test]
fn flattens_parts_system_content() {
// Documented behavior: `MessageContent::Parts` system content is
// extracted to plain text via ExtractText and re-emitted as
// `MessageContent::Text`. Non-text parts (e.g. images) are dropped
// — no production provider ships images in a system message.
let parts_system = Message {
role: Role::System,
content: Some(MessageContent::Parts(vec![
ContentPart::Text {
text: "be brief".to_string(),
},
ContentPart::Text {
text: " and polite".to_string(),
},
])),
name: None,
tool_calls: None,
tool_call_id: None,
};
let mut req = req_with_messages(vec![parts_system, user_msg("hi")]);
inject_activated_skills_into_request(&mut req, &[skill("pdf", "body")]);
let messages = req.get_messages();
let system = &messages[0];
match system.content.as_ref().unwrap() {
MessageContent::Text(t) => {
assert!(t.contains("be brief"));
assert!(t.contains(" and polite"));
assert!(t.contains("<skill_content name=\"pdf\""));
}
MessageContent::Parts(_) => panic!("expected flattened text, got Parts"),
}
}
#[test]
fn injects_in_orchestrator_order_for_multiple_skills() {
let mut req = req_with_messages(vec![user_msg("hi")]);
inject_activated_skills_into_request(
&mut req,
&[skill("first", "alpha-body"), skill("second", "beta-body")],
);
let txt = first_system_text(&req);
let first_pos = txt.find("alpha-body").expect("first skill body present");
let second_pos = txt.find("beta-body").expect("second skill body present");
assert!(
first_pos < second_pos,
"skills should appear in the order they were activated"
);
}
}

View file

@ -308,11 +308,12 @@ async fn init_app_state(
.orchestrator_model_context_length
.unwrap_or(brightstaff::router::orchestrator_model_v1::MAX_TOKEN_LEN);
let orchestrator_service = Arc::new(OrchestratorService::with_routing(
let orchestrator_service = Arc::new(OrchestratorService::with_routing_and_skills(
format!("{llm_provider_url}{CHAT_COMPLETIONS_PATH}"),
orchestrator_model_name,
orchestrator_llm_provider,
config.routing_preferences.clone(),
config.skills.clone(),
metrics_service,
session_ttl_seconds,
session_cache,

View file

@ -1,19 +1,22 @@
use std::{borrow::Cow, collections::HashMap, sync::Arc, time::Duration};
use common::{
configuration::{AgentUsagePreference, OrchestrationPreference, TopLevelRoutingPreference},
configuration::{
AgentUsagePreference, OrchestrationPreference, SkillRef, TopLevelRoutingPreference,
},
consts::{ARCH_PROVIDER_HINT_HEADER, REQUEST_ID_HEADER},
skills_runtime::{referenced_skills_catalog, resolve_for_route, resolve_selected_skills},
};
use hermesllm::apis::openai::Message;
use hyper::header;
use opentelemetry::global;
use opentelemetry_http::HeaderInjector;
use thiserror::Error;
use tracing::{debug, info};
use tracing::{debug, info, warn};
use super::http::{self, post_and_extract_content};
use super::model_metrics::ModelMetricsService;
use super::orchestrator_model::OrchestratorModel;
use super::orchestrator_model::{OrchestratorModel, OrchestratorSelection};
use crate::metrics as bs_metrics;
use crate::metrics::labels as metric_labels;
@ -30,12 +33,40 @@ pub struct OrchestratorService {
orchestrator_model: Arc<dyn OrchestratorModel>,
orchestrator_provider_name: String,
top_level_preferences: HashMap<String, TopLevelRoutingPreference>,
/// Agent Skills catalog (deduplicated by name) attached to any
/// `routing_preferences[].skills` list. Empty when no route has skills.
skills_catalog: Vec<SkillRef>,
metrics_service: Option<Arc<ModelMetricsService>>,
session_cache: Option<Arc<dyn SessionCache>>,
session_ttl: Duration,
tenant_header: Option<String>,
}
/// Result of `determine_route`: which route was picked (if any), the
/// ranked candidate models for that route, and the Agent Skill bodies the
/// orchestrator chose to activate alongside it.
///
/// Two valid shapes:
///
/// * **Route + skills (typical):** `route_name = Some(...)`, `models`
/// non-empty, `activated_skills` may be non-empty. Skills are resolved
/// against `routing_preferences[<route>].skills`, so picks that aren't
/// allow-listed for the route are dropped with a `warn!`.
/// * **Skills-only:** `route_name = None`, `models` empty,
/// `activated_skills` non-empty. The orchestrator decided no route
/// needed to change but the user's intent matches one or more skills.
/// Per `docs/source/resources/skills.rst`, the request falls back to the
/// originally-requested model and the skill bodies are injected the
/// same way. Allow-list filtering uses the catalog union (effectively
/// the catalog itself, which is pre-filtered to skills referenced by
/// some route).
#[derive(Debug, Clone, Default)]
pub struct RouteDecision {
pub route_name: Option<String>,
pub models: Vec<String>,
pub activated_skills: Vec<SkillRef>,
}
#[derive(Debug, Error)]
pub enum OrchestrationError {
#[error(transparent)]
@ -66,6 +97,7 @@ impl OrchestratorService {
orchestrator_model,
orchestrator_provider_name,
top_level_preferences: HashMap::new(),
skills_catalog: Vec::new(),
metrics_service: None,
session_cache: None,
session_ttl: Duration::from_secs(DEFAULT_SESSION_TTL_SECONDS),
@ -84,14 +116,53 @@ impl OrchestratorService {
session_cache: Arc<dyn SessionCache>,
tenant_header: Option<String>,
max_token_length: usize,
) -> Self {
Self::with_routing_and_skills(
orchestrator_url,
orchestration_model_name,
orchestrator_provider_name,
top_level_prefs,
None,
metrics_service,
session_ttl_seconds,
session_cache,
tenant_header,
max_token_length,
)
}
/// Like `with_routing`, but also seeds the orchestrator with a catalog of
/// Agent Skills referenced by `routing_preferences[].skills`. The
/// orchestrator gets a `<skills>` block in its system prompt and may
/// select zero or more skills alongside the picked route; this enables
/// the LLM handler to inject the chosen SKILL.md bodies into the
/// upstream request.
#[allow(clippy::too_many_arguments)]
pub fn with_routing_and_skills(
orchestrator_url: String,
orchestration_model_name: String,
orchestrator_provider_name: String,
top_level_prefs: Option<Vec<TopLevelRoutingPreference>>,
skills_catalog: Option<Vec<SkillRef>>,
metrics_service: Option<Arc<ModelMetricsService>>,
session_ttl_seconds: Option<u64>,
session_cache: Arc<dyn SessionCache>,
tenant_header: Option<String>,
max_token_length: usize,
) -> Self {
let top_level_preferences: HashMap<String, TopLevelRoutingPreference> = top_level_prefs
.map_or_else(HashMap::new, |prefs| {
prefs.into_iter().map(|p| (p.name.clone(), p)).collect()
});
let orchestrator_model = Arc::new(orchestrator_model_v1::OrchestratorModelV1::new(
let skills_catalog = referenced_skills_catalog(
skills_catalog.as_deref().unwrap_or(&[]),
&top_level_preferences,
);
let orchestrator_model = Arc::new(orchestrator_model_v1::OrchestratorModelV1::with_skills(
HashMap::new(),
skills_catalog.clone(),
orchestration_model_name,
max_token_length,
));
@ -105,6 +176,7 @@ impl OrchestratorService {
orchestrator_model,
orchestrator_provider_name,
top_level_preferences,
skills_catalog,
metrics_service,
session_cache: Some(session_cache),
session_ttl,
@ -170,7 +242,7 @@ impl OrchestratorService {
messages: &[Message],
inline_routing_preferences: Option<Vec<TopLevelRoutingPreference>>,
request_id: &str,
) -> Result<Option<(String, Vec<String>)>> {
) -> Result<Option<RouteDecision>> {
if messages.is_empty() {
return Ok(None);
}
@ -206,9 +278,13 @@ impl OrchestratorService {
)
.await?;
let result = if let Some(ref routes) = orchestration_result {
if routes.len() > 1 {
let all_routes: Vec<&str> = routes.iter().map(|(name, _)| name.as_str()).collect();
let result = if let Some(ref selection) = orchestration_result {
if selection.routes.len() > 1 {
let all_routes: Vec<&str> = selection
.routes
.iter()
.map(|(name, _)| name.as_str())
.collect();
info!(
routes = ?all_routes,
using = %all_routes.first().unwrap_or(&"none"),
@ -216,7 +292,8 @@ impl OrchestratorService {
);
}
if let Some((route_name, _)) = routes.first() {
if let Some((route_name, _)) = selection.routes.first() {
// Route + (optional) skills path.
let top_pref = inline_top_map
.as_ref()
.and_then(|m| m.get(route_name))
@ -227,10 +304,43 @@ impl OrchestratorService {
Some(svc) => svc.rank_models(&pref.models, &pref.selection_policy).await,
None => pref.models.clone(),
};
Some((route_name.clone(), ranked))
let resolution = resolve_for_route(
&self.skills_catalog,
pref.skills.as_deref().unwrap_or(&[]),
&selection.skills,
);
log_skill_drops(route_name, &resolution);
let activated_skills: Vec<SkillRef> =
resolution.activated.into_iter().cloned().collect();
Some(RouteDecision {
route_name: Some(route_name.clone()),
models: ranked,
activated_skills,
})
} else {
None
}
} else if !selection.skills.is_empty() {
// Skills-only path: orchestrator picked no route but flagged
// skills. Per the documented contract the request still goes
// through with the originally-requested model and the skill
// bodies are injected. The catalog itself is the effective
// allow-list (it's already the union across every route's
// allow-list, so anything in it was deemed safe to expose).
let activated: Vec<SkillRef> =
resolve_selected_skills(&self.skills_catalog, &selection.skills)
.into_iter()
.cloned()
.collect();
if activated.is_empty() {
None
} else {
Some(RouteDecision {
route_name: None,
models: Vec::new(),
activated_skills: activated,
})
}
} else {
None
}
@ -239,7 +349,7 @@ impl OrchestratorService {
};
info!(
selected_model = ?result,
selected_route = ?result.as_ref().map(|r| (&r.route_name, r.models.first(), r.activated_skills.iter().map(|s| s.name.as_str()).collect::<Vec<_>>())),
"plano-orchestrator determined route"
);
@ -253,7 +363,7 @@ impl OrchestratorService {
messages: &[Message],
usage_preferences: Option<Vec<AgentUsagePreference>>,
request_id: Option<String>,
) -> Result<Option<Vec<(String, String)>>> {
) -> Result<Option<OrchestratorSelection>> {
if messages.is_empty() {
return Ok(None);
}
@ -328,6 +438,30 @@ impl OrchestratorService {
}
}
/// Emit `warn!` for any skill names the orchestrator selected but the
/// resolver dropped. Surfacing these is critical for debuggability — a
/// silently-dropped skill is hard to diagnose, and the most common causes
/// (forgetting to add a skill to a route's allow-list, or the orchestrator
/// hallucinating a name) are both fixable once visible.
fn log_skill_drops(route_name: &str, resolution: &common::skills_runtime::SkillResolution<'_>) {
if !resolution.dropped_not_allowed.is_empty() {
warn!(
route = %route_name,
skills = ?resolution.dropped_not_allowed,
"orchestrator selected Agent Skills that are not on this route's allow-list; \
dropping (add them to routing_preferences[].skills if you want this route to use them)"
);
}
if !resolution.dropped_unknown.is_empty() {
warn!(
route = %route_name,
skills = ?resolution.dropped_unknown,
"orchestrator selected Agent Skills that are not in the runtime catalog \
(likely hallucinated or removed)"
);
}
}
#[cfg(test)]
mod tests {
use super::*;
@ -410,6 +544,50 @@ mod tests {
assert!(svc.get_cached_route("s3", None).await.is_some());
}
// ---- RouteDecision construction ----
fn skill_ref(name: &str) -> SkillRef {
SkillRef {
name: name.to_string(),
description: format!("desc for {name}"),
path: None,
base_dir: None,
body: Some(format!("body for {name}")),
scope: Some("project".to_string()),
compatibility: None,
license: None,
metadata: None,
allowed_tools: None,
}
}
#[test]
fn route_decision_holds_optional_route_name_for_skills_only_path() {
// Regression guard for the docs promise at skills.rst:153-155: a
// skills-only decision must be representable, with no route_name and
// empty models, so the LLM handler falls back to the original model.
let decision = RouteDecision {
route_name: None,
models: Vec::new(),
activated_skills: vec![skill_ref("pdf")],
};
assert!(decision.route_name.is_none());
assert!(decision.models.is_empty());
assert_eq!(decision.activated_skills.len(), 1);
}
#[test]
fn log_skill_drops_does_not_panic_on_empty_resolution() {
// The logger is fire-and-forget. We can't easily assert on the
// emitted warns here without setting up a tracing subscriber, so the
// contract under test is: empty resolutions are silent (no warn
// attempt). Confidence in the warn paths comes from
// common::skills_runtime tests for resolve_for_route, which is the
// function whose dropped_* lists drive this logger.
let empty = common::skills_runtime::SkillResolution::default();
log_skill_drops("any", &empty);
}
#[tokio::test]
async fn test_cache_update_existing_session_does_not_evict() {
let svc = make_orchestrator_service(600, 2);

View file

@ -10,20 +10,37 @@ pub enum OrchestratorModelError {
pub type Result<T> = std::result::Result<T, OrchestratorModelError>;
/// The result of running Plano-Orchestrator over a conversation: zero or more
/// selected routes (each mapped to its upstream model name) plus zero or more
/// selected Agent Skills. Skills are filtered down by the consumer to the
/// catalog defined under `routing_preferences[].skills` for the chosen route.
#[derive(Debug, Clone, Default, PartialEq, Eq)]
pub struct OrchestratorSelection {
pub routes: Vec<(String, String)>,
pub skills: Vec<String>,
}
impl OrchestratorSelection {
pub fn is_empty(&self) -> bool {
self.routes.is_empty() && self.skills.is_empty()
}
}
/// OrchestratorModel trait for handling orchestration requests.
/// Returns multiple routes as the model output format is:
/// {"route": ["route_name_1", "route_name_2", ...]}
/// Returns multiple routes and skills as the model output format is:
/// {"route": ["route_name_1", ...], "skills": ["skill_name_1", ...]}
pub trait OrchestratorModel: Send + Sync {
fn generate_request(
&self,
messages: &[Message],
usage_preferences: &Option<Vec<AgentUsagePreference>>,
) -> ChatCompletionsRequest;
/// Returns a vector of (route_name, model_name) tuples for all matched routes.
/// Parses the orchestrator's raw model output into selected routes (each
/// mapped to a model) and selected skill names.
fn parse_response(
&self,
content: &str,
usage_preferences: &Option<Vec<AgentUsagePreference>>,
) -> Result<Option<Vec<(String, String)>>>;
) -> Result<Option<OrchestratorSelection>>;
fn get_model_name(&self) -> String;
}

View file

@ -1,12 +1,12 @@
use std::collections::HashMap;
use common::configuration::{AgentUsagePreference, OrchestrationPreference};
use common::configuration::{AgentUsagePreference, OrchestrationPreference, SkillRef};
use hermesllm::apis::openai::{ChatCompletionsRequest, Message, MessageContent, Role};
use hermesllm::transforms::lib::ExtractText;
use serde::{ser::Serialize as SerializeTrait, Deserialize, Serialize};
use tracing::{debug, warn};
use super::orchestrator_model::{OrchestratorModel, OrchestratorModelError};
use super::orchestrator_model::{OrchestratorModel, OrchestratorModelError, OrchestratorSelection};
pub const MAX_TOKEN_LEN: usize = 8192; // Default max token length for the orchestration model
@ -138,10 +138,47 @@ Return your answer strictly in JSON as follows:
If no routes are needed, return an empty list for `route`.
"#;
/// System prompt used when one or more Agent Skills are attached to candidate
/// routes. Adds a `<skills>` block alongside `<routes>` and asks the model to
/// also pick zero or more skills that should be loaded into the downstream
/// LLM's system prompt.
pub const ARCH_ORCHESTRATOR_V1_SYSTEM_PROMPT_WITH_SKILLS: &str = r#"
You are a helpful assistant that selects the most suitable routes and Agent Skills based on user intent.
You are provided with a list of available routes enclosed within <routes></routes> XML tags:
<routes>
{routes}
</routes>
You are provided with a list of available Agent Skills enclosed within <skills></skills> XML tags:
<skills>
{skills}
</skills>
You are also given the conversation context enclosed within <conversation></conversation> XML tags:
<conversation>
{conversation}
</conversation>
## Instructions
1. Analyze the latest user intent from the conversation.
2. Compare it against the available routes to find which routes can help fulfill the request.
3. Independently compare it against the available skills; pick the skills whose descriptions match what the user is trying to do. Skills can be combined with any route. Activating a skill loads detailed instructions into the next response's system prompt.
4. Respond only with exact names from <routes> and <skills>.
5. If no routes or skills can help, return empty lists.
## Response Format
Return your answer strictly in JSON as follows:
{{"route": ["route_name_1", "..."], "skills": ["skill_name_1", "..."]}}
Use empty lists for `route` and/or `skills` when nothing applies.
"#;
pub type Result<T> = std::result::Result<T, OrchestratorModelError>;
pub struct OrchestratorModelV1 {
agent_orchestration_json_str: String,
agent_orchestration_to_model_map: HashMap<String, String>,
/// Pre-rendered `<skills>` block (one JSON entry per skill, name +
/// description). Empty when no skills are attached to any route.
skills_catalog_json_str: String,
orchestration_model: String,
max_token_length: usize,
}
@ -151,10 +188,27 @@ impl OrchestratorModelV1 {
agent_orchestrations: HashMap<String, Vec<OrchestrationPreference>>,
orchestration_model: String,
max_token_length: usize,
) -> Self {
Self::with_skills(
agent_orchestrations,
Vec::new(),
orchestration_model,
max_token_length,
)
}
/// Like `new`, but additionally seeds the orchestrator with an Agent
/// Skills catalog. When `skills_catalog` is empty the orchestrator uses
/// the routes-only system prompt; otherwise it asks the model to also
/// pick zero or more skills from the catalog.
pub fn with_skills(
agent_orchestrations: HashMap<String, Vec<OrchestrationPreference>>,
skills_catalog: Vec<SkillRef>,
orchestration_model: String,
max_token_length: usize,
) -> Self {
let agent_orchestration_values: Vec<OrchestrationPreference> =
agent_orchestrations.values().flatten().cloned().collect();
// Format routes: each route as JSON on its own line with standard spacing
let agent_orchestration_json_str = agent_orchestration_values
.iter()
.map(to_spaced_json)
@ -165,19 +219,48 @@ impl OrchestratorModelV1 {
.flat_map(|(model, prefs)| prefs.iter().map(|pref| (pref.name.clone(), model.clone())))
.collect();
let skills_catalog_json_str = render_skills_catalog(&skills_catalog);
OrchestratorModelV1 {
orchestration_model,
max_token_length,
agent_orchestration_json_str,
agent_orchestration_to_model_map,
skills_catalog_json_str,
}
}
}
/// JSON shape suitable for the `<skills>` block in the orchestrator prompt:
/// `{"name": "...", "description": "..."}`. Only metadata that helps the
/// orchestrator pick a skill is included; the full SKILL.md body is injected
/// separately, after a skill has been selected.
#[derive(Debug, Clone, Serialize)]
struct SkillCatalogEntry<'a> {
name: &'a str,
description: &'a str,
}
fn render_skills_catalog(skills: &[SkillRef]) -> String {
skills
.iter()
.map(|s| SkillCatalogEntry {
name: &s.name,
description: s.catalog_description(),
})
.map(|entry| to_spaced_json(&entry))
.collect::<Vec<String>>()
.join("\n")
}
#[derive(Debug, Clone, Serialize, Deserialize)]
struct AgentOrchestratorResponse {
/// The route field now expects an array of route names: ["route_name_1", "route_name_2", ...]
/// The route field expects an array of route names: ["route_name_1", "route_name_2", ...].
pub route: Option<Vec<String>>,
/// Optional array of Agent Skill names the orchestrator chose to activate.
/// Absent or empty when no skills should be loaded for this turn.
#[serde(default)]
pub skills: Option<Vec<String>>,
}
const TOKEN_LENGTH_DIVISOR: usize = 4; // Approximate token length divisor for UTF-8 characters
@ -209,7 +292,13 @@ impl OrchestratorModel for OrchestratorModelV1 {
// Ensure the conversation does not exceed the configured token budget.
// We use `len() / TOKEN_LENGTH_DIVISOR` as a cheap token estimate to
// avoid running a real tokenizer on the hot path.
let mut token_count = ARCH_ORCHESTRATOR_V1_SYSTEM_PROMPT.len() / TOKEN_LENGTH_DIVISOR;
let template_len = if self.skills_catalog_json_str.is_empty() {
ARCH_ORCHESTRATOR_V1_SYSTEM_PROMPT.len()
} else {
ARCH_ORCHESTRATOR_V1_SYSTEM_PROMPT_WITH_SKILLS.len()
+ self.skills_catalog_json_str.len()
};
let mut token_count = template_len / TOKEN_LENGTH_DIVISOR;
let mut selected_messages_list_reversed: Vec<Message> = vec![];
for (selected_messsage_count, message) in messages_vec.iter().rev().enumerate() {
let message_text = message.content.extract_text();
@ -289,14 +378,16 @@ impl OrchestratorModel for OrchestratorModelV1 {
// Generate the orchestrator request message based on the usage preferences.
// If preferences are passed in request then we use them;
// Otherwise, we use the default orchestration modelpreferences.
let orchestrator_message =
match convert_to_orchestrator_preferences(usage_preferences_from_request) {
Some(prefs) => generate_orchestrator_message(&prefs, &selected_conversation_list),
None => generate_orchestrator_message(
&self.agent_orchestration_json_str,
&selected_conversation_list,
),
};
let routes_block = match convert_to_orchestrator_preferences(usage_preferences_from_request)
{
Some(prefs) => prefs,
None => self.agent_orchestration_json_str.clone(),
};
let orchestrator_message = generate_orchestrator_message(
&routes_block,
&self.skills_catalog_json_str,
&selected_conversation_list,
);
ChatCompletionsRequest {
model: self.orchestration_model.clone(),
@ -316,7 +407,7 @@ impl OrchestratorModel for OrchestratorModelV1 {
&self,
content: &str,
usage_preferences: &Option<Vec<AgentUsagePreference>>,
) -> Result<Option<Vec<(String, String)>>> {
) -> Result<Option<OrchestratorSelection>> {
if content.is_empty() {
return Ok(None);
}
@ -326,20 +417,21 @@ impl OrchestratorModel for OrchestratorModelV1 {
let selected_routes = orchestrator_response.route.unwrap_or_default();
// Filter out empty routes
let valid_routes: Vec<String> = selected_routes
.into_iter()
.filter(|route| !route.is_empty())
.collect();
if valid_routes.is_empty() {
return Ok(None);
}
let selected_skills: Vec<String> = orchestrator_response
.skills
.unwrap_or_default()
.into_iter()
.filter(|s| !s.is_empty())
.collect();
let mut result: Vec<(String, String)> = Vec::new();
let mut routes: Vec<(String, String)> = Vec::new();
if let Some(usage_preferences) = usage_preferences {
// If usage preferences are defined, we need to find the model that matches each selected route
for selected_route in valid_routes {
let model_name: Option<String> = usage_preferences
.iter()
@ -351,7 +443,7 @@ impl OrchestratorModel for OrchestratorModelV1 {
.map(|pref| pref.model.clone());
if let Some(model_name) = model_name {
result.push((selected_route, model_name));
routes.push((selected_route, model_name));
} else {
warn!(
route = %selected_route,
@ -361,14 +453,13 @@ impl OrchestratorModel for OrchestratorModelV1 {
}
}
} else {
// If no usage preferences are passed in request then use the default orchestration model preferences
for selected_route in valid_routes {
if let Some(model) = self
.agent_orchestration_to_model_map
.get(&selected_route)
.cloned()
{
result.push((selected_route, model));
routes.push((selected_route, model));
} else {
warn!(
route = %selected_route,
@ -379,11 +470,14 @@ impl OrchestratorModel for OrchestratorModelV1 {
}
}
if result.is_empty() {
if routes.is_empty() && selected_skills.is_empty() {
return Ok(None);
}
Ok(Some(result))
Ok(Some(OrchestratorSelection {
routes,
skills: selected_skills,
}))
}
fn get_model_name(&self) -> String {
@ -391,7 +485,11 @@ impl OrchestratorModel for OrchestratorModelV1 {
}
}
fn generate_orchestrator_message(prefs: &str, selected_conversation_list: &Vec<Message>) -> String {
fn generate_orchestrator_message(
prefs: &str,
skills_catalog: &str,
selected_conversation_list: &Vec<Message>,
) -> String {
// Format conversation with 4-space indentation (equivalent to Python's json.dumps(obj, indent=4))
let formatter = serde_json::ser::PrettyFormatter::with_indent(b" ");
let mut conversation_buf = Vec::new();
@ -399,9 +497,19 @@ fn generate_orchestrator_message(prefs: &str, selected_conversation_list: &Vec<M
SerializeTrait::serialize(&selected_conversation_list, &mut serializer).unwrap();
let conversation_json = String::from_utf8(conversation_buf).unwrap_or_default();
ARCH_ORCHESTRATOR_V1_SYSTEM_PROMPT
let template = if skills_catalog.is_empty() {
ARCH_ORCHESTRATOR_V1_SYSTEM_PROMPT
} else {
ARCH_ORCHESTRATOR_V1_SYSTEM_PROMPT_WITH_SKILLS
};
let mut out = template
.replace("{routes}", prefs)
.replace("{conversation}", &conversation_json)
.replace("{conversation}", &conversation_json);
if !skills_catalog.is_empty() {
out = out.replace("{skills}", skills_catalog);
}
out
}
fn convert_to_orchestrator_preferences(
@ -1349,23 +1457,30 @@ If no routes are needed, return an empty list for `route`.
let orchestrator =
OrchestratorModelV1::new(agent_orchestrations, "test-model".to_string(), 2000);
fn routes(pairs: &[(&str, &str)]) -> OrchestratorSelection {
OrchestratorSelection {
routes: pairs
.iter()
.map(|(r, m)| (r.to_string(), m.to_string()))
.collect(),
skills: Vec::new(),
}
}
// Case 1: Valid JSON with single route in array
let input = r#"{"route": ["Image generation"]}"#;
let result = orchestrator.parse_response(input, &None).unwrap();
assert_eq!(
result,
Some(vec![("Image generation".to_string(), "gpt-4o".to_string())])
);
assert_eq!(result, Some(routes(&[("Image generation", "gpt-4o")])));
// Case 2: Valid JSON with multiple routes in array
let input = r#"{"route": ["Image generation", "Code generation"]}"#;
let result = orchestrator.parse_response(input, &None).unwrap();
assert_eq!(
result,
Some(vec![
("Image generation".to_string(), "gpt-4o".to_string()),
("Code generation".to_string(), "gpt-4o".to_string())
])
Some(routes(&[
("Image generation", "gpt-4o"),
("Code generation", "gpt-4o")
]))
);
// Case 3: Valid JSON with empty array
@ -1396,14 +1511,65 @@ If no routes are needed, return an empty list for `route`.
// Case 7: Single quotes and \n in JSON
let input = "{'route': ['Image generation']}\\n";
let result = orchestrator.parse_response(input, &None).unwrap();
assert_eq!(
result,
Some(vec![("Image generation".to_string(), "gpt-4o".to_string())])
);
assert_eq!(result, Some(routes(&[("Image generation", "gpt-4o")])));
// Case 8: Array with unknown route (not in orchestrations map)
let input = r#"{"route": ["Unknown route"]}"#;
let result = orchestrator.parse_response(input, &None).unwrap();
assert_eq!(result, None);
// Case 9: Routes plus selected skills are propagated through.
let input = r#"{"route": ["Image generation"], "skills": ["pdf-processing"]}"#;
let result = orchestrator.parse_response(input, &None).unwrap().unwrap();
assert_eq!(result.routes.len(), 1);
assert_eq!(result.skills, vec!["pdf-processing".to_string()]);
// Case 10: Skills-only selection (no routes) still surfaces as Some.
let input = r#"{"route": [], "skills": ["pdf-processing"]}"#;
let result = orchestrator.parse_response(input, &None).unwrap().unwrap();
assert!(result.routes.is_empty());
assert_eq!(result.skills, vec!["pdf-processing".to_string()]);
}
#[test]
fn test_system_prompt_with_skills_block() {
let orchestrator = OrchestratorModelV1::with_skills(
HashMap::from([(
"gpt-4o".to_string(),
vec![OrchestrationPreference {
name: "Image generation".to_string(),
description: "generating image".to_string(),
}],
)]),
vec![SkillRef {
name: "pdf-processing".to_string(),
description: "Extract structured data from PDFs.".to_string(),
path: None,
base_dir: None,
body: None,
scope: None,
compatibility: None,
license: None,
metadata: None,
allowed_tools: None,
}],
"test-model".to_string(),
usize::MAX,
);
let conversation: Vec<Message> = serde_json::from_str(
r#"[{"role": "user", "content": "extract the invoice totals from this pdf"}]"#,
)
.unwrap();
let req = orchestrator.generate_request(&conversation, &None);
let prompt = req.messages[0].content.extract_text();
assert!(prompt.contains("<skills>"));
assert!(prompt.contains("</skills>"));
assert!(prompt.contains(r#""name": "pdf-processing""#));
assert!(prompt.contains("Extract structured data from PDFs."));
// Response format documentation must mention the `skills` array
// so the orchestrator emits it.
assert!(prompt.contains(r#""skills""#));
}
}

View file

@ -33,6 +33,7 @@ mod tests {
"openai/gpt-4o".to_string(),
"openai/gpt-4o-mini".to_string(),
],
skills: None,
selection_policy: SelectionPolicy {
prefer: SelectionPreference::None,
},
@ -44,6 +45,7 @@ mod tests {
"anthropic/claude-3-sonnet".to_string(),
"openai/gpt-4o-mini".to_string(),
],
skills: None,
selection_policy: SelectionPolicy {
prefer: SelectionPreference::None,
},

View file

@ -163,6 +163,12 @@ pub struct TopLevelRoutingPreference {
pub name: String,
pub description: String,
pub models: Vec<String>,
/// Agent Skills associated with this route. When Plano-Orchestrator
/// selects this route, every skill listed here is also offered to the
/// orchestrator in the `<skills>` block; selected skills have their
/// SKILL.md body prepended to the upstream system prompt.
#[serde(default)]
pub skills: Option<Vec<String>>,
#[serde(default)]
pub selection_policy: SelectionPolicy,
}
@ -224,6 +230,17 @@ pub struct Configuration {
pub state_storage: Option<StateStorageConfig>,
pub routing_preferences: Option<Vec<TopLevelRoutingPreference>>,
pub model_metrics_sources: Option<Vec<MetricsSource>>,
/// Agent Skills (https://agentskills.io) installed for this project.
///
/// The Plano CLI discovers `.plano/skills/<name>/SKILL.md` files at render
/// time and materializes them into this list with `body` already loaded so
/// downstream consumers do not need filesystem access. Skills are scoped
/// to specific routes via `routing_preferences[].skills`; Plano-Orchestrator
/// receives a `<skills>` block alongside `<routes>` for any skills attached
/// to candidate routes, and selected skills have their SKILL.md body
/// injected into the upstream system prompt.
#[serde(default)]
pub skills: Option<Vec<SkillRef>>,
}
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
@ -611,6 +628,45 @@ pub struct PromptTarget {
pub auto_llm_dispatch_on_response: Option<bool>,
}
/// An Agent Skill (https://agentskills.io) as materialized by the Plano CLI.
///
/// At runtime brightstaff and the WASM filters reason over the catalog
/// (`name` + `description`) and, when a skill is selected, inject the
/// pre-loaded `body` into the downstream system prompt.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct SkillRef {
pub name: String,
pub description: String,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub path: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub base_dir: Option<String>,
/// Full SKILL.md markdown body (post-frontmatter). Inlined here at render
/// time so the WASM sandbox does not need filesystem access.
#[serde(default, skip_serializing_if = "Option::is_none")]
pub body: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub scope: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub compatibility: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub license: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub metadata: Option<HashMap<String, String>>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub allowed_tools: Option<String>,
}
impl SkillRef {
/// Best-effort short summary suitable for the `<skills>` block sent to
/// Plano-Orchestrator: only the public-facing description, never the
/// full SKILL.md body. The body is injected separately, after a skill
/// has been selected.
pub fn catalog_description(&self) -> &str {
&self.description
}
}
// convert PromptTarget to ChatCompletionTool
impl From<&PromptTarget> for ChatCompletionTool {
fn from(val: &PromptTarget) -> Self {
@ -807,4 +863,34 @@ disable_signals: false
let overrides: super::Overrides = serde_yaml::from_str(yaml_missing).unwrap();
assert_eq!(overrides.disable_signals, None);
}
#[test]
fn test_top_level_routing_preference_skills_deserialize() {
let yaml = r#"
name: code review
description: reviewing, analyzing, and suggesting improvements to existing code
models:
- openai/gpt-4o
skills:
- code-review-skill
"#;
let pref: super::TopLevelRoutingPreference = serde_yaml::from_str(yaml).unwrap();
assert_eq!(pref.name, "code review");
assert_eq!(
pref.skills.as_deref(),
Some(&["code-review-skill".to_string()][..])
);
}
#[test]
fn test_top_level_routing_preference_skills_optional() {
let yaml = r#"
name: code generation
description: generating new code
models:
- openai/gpt-4o
"#;
let pref: super::TopLevelRoutingPreference = serde_yaml::from_str(yaml).unwrap();
assert!(pref.skills.is_none());
}
}

View file

@ -8,6 +8,7 @@ pub mod path;
pub mod pii;
pub mod ratelimit;
pub mod routing;
pub mod skills_runtime;
pub mod stats;
pub mod tokenizer;
pub mod traces;

View file

@ -0,0 +1,415 @@
//! Runtime helpers for handling Agent Skills selected by Plano-Orchestrator.
//!
//! These functions live in `common` (rather than `brightstaff` or a WASM
//! crate) so they can be unit-tested on the native target without dragging
//! in proxy-wasm host-call symbols or tokio runtime dependencies.
use std::collections::{HashMap, HashSet};
use crate::configuration::{SkillRef, TopLevelRoutingPreference};
/// Upper bound on the byte length of a single skill body the runtime will
/// inject into an upstream system prompt. SKILL.md files are typically a few
/// kilobytes; this guard keeps a single oversized or malicious skill from
/// blowing the downstream model's context window. Bodies longer than this
/// are tail-trimmed with a marker line. ~32 KiB ≈ 8K tokens at the
/// 4-bytes-per-token heuristic used elsewhere in brightstaff.
pub const MAX_SKILL_BODY_BYTES: usize = 32 * 1024;
const SKILL_BODY_TRUNCATION_MARKER: &str = "\n…[skill body truncated]\n";
/// Outcome of resolving a list of orchestrator-selected skill names against
/// a route's `skills:` allow-list and the runtime catalog. Callers should
/// emit `warn!` for each name in `dropped_not_allowed` / `dropped_unknown`
/// so misconfigured allow-lists and hallucinated picks are observable.
#[derive(Debug, Default)]
pub struct SkillResolution<'a> {
/// Skills that survived both the allow-list and catalog filters, in
/// orchestrator-selected order with duplicates removed.
pub activated: Vec<&'a SkillRef>,
/// Names the orchestrator selected that are NOT in the chosen route's
/// `skills:` allow-list. Typically a cross-route mention — the model
/// pulled a skill name from the global catalog that this route did not
/// expose. Callers should `warn!`.
pub dropped_not_allowed: Vec<String>,
/// Names that ARE allow-listed for the route but are missing from the
/// runtime catalog (skill removed / never installed / hallucinated).
pub dropped_unknown: Vec<String>,
}
/// Build the orchestrator-visible skills catalog from the union of every
/// skill name referenced under `routing_preferences[].skills`. Skills not
/// referenced by any route are excluded — they would just clutter the
/// `<skills>` block with no way for the orchestrator to attach them. The
/// result preserves `catalog` order and is deduplicated by name.
pub fn referenced_skills_catalog(
catalog: &[SkillRef],
routes: &HashMap<String, TopLevelRoutingPreference>,
) -> Vec<SkillRef> {
let mut referenced: HashSet<&str> = HashSet::new();
for route in routes.values() {
if let Some(names) = route.skills.as_ref() {
for name in names {
referenced.insert(name.as_str());
}
}
}
let mut out: Vec<SkillRef> = Vec::new();
let mut seen: HashSet<String> = HashSet::new();
for skill in catalog {
if referenced.contains(skill.name.as_str()) && seen.insert(skill.name.clone()) {
out.push(skill.clone());
}
}
out
}
/// Filter `selected` skill names to those that are both (a) allow-listed
/// for the chosen route via `route_allowlist` and (b) present in `catalog`,
/// preserving orchestrator order and deduplicating. Drops are reported on
/// the `SkillResolution` struct so callers can `warn!` and surface
/// misconfiguration without re-walking the lists.
pub fn resolve_for_route<'a>(
catalog: &'a [SkillRef],
route_allowlist: &[String],
selected: &[String],
) -> SkillResolution<'a> {
let allowed: HashSet<&str> = route_allowlist.iter().map(String::as_str).collect();
let mut activated: Vec<&SkillRef> = Vec::with_capacity(selected.len());
let mut taken: HashSet<&str> = HashSet::new();
let mut dropped_not_allowed: Vec<String> = Vec::new();
let mut dropped_unknown: Vec<String> = Vec::new();
for name in selected {
if !taken.insert(name.as_str()) {
continue;
}
if !allowed.contains(name.as_str()) {
dropped_not_allowed.push(name.clone());
continue;
}
match catalog.iter().find(|s| &s.name == name) {
Some(skill) => activated.push(skill),
None => dropped_unknown.push(name.clone()),
}
}
SkillResolution {
activated,
dropped_not_allowed,
dropped_unknown,
}
}
/// Resolve a list of orchestrator-selected skill names to their `SkillRef`s
/// directly against the catalog, without any per-route allow-list. Use this
/// for the "skills-only" path documented in `docs/source/resources/skills.rst`
/// where the orchestrator returns skills but no route — the catalog itself
/// (already pre-filtered to skills referenced by SOME route via
/// `referenced_skills_catalog`) is the effective allow-list. Unknown names
/// are dropped silently; results are deduplicated by name preserving order.
pub fn resolve_selected_skills<'a>(
skills: &'a [SkillRef],
selected_names: &[String],
) -> Vec<&'a SkillRef> {
let mut out: Vec<&SkillRef> = Vec::with_capacity(selected_names.len());
let mut seen: HashSet<&str> = HashSet::new();
for name in selected_names {
if !seen.insert(name.as_str()) {
continue;
}
if let Some(skill) = skills.iter().find(|s| &s.name == name) {
out.push(skill);
}
}
out
}
/// Append the bodies of activated skills to a system prompt, wrapped in
/// `<skill_content name="..." [base_dir="..."]>…</skill_content>` tags so a
/// future context-management pass can recognize and recompact them.
///
/// Behavior contract (relied on by `brightstaff::handlers::llm::model_selection`):
///
/// * Returns `None` only when no base prompt was supplied **and** no skills
/// were activated. Otherwise always returns `Some`.
/// * The base prompt (if any) is kept verbatim and the skill block is
/// appended after a blank line.
/// * Each skill body is tail-trimmed at `MAX_SKILL_BODY_BYTES` bytes (UTF-8
/// boundary safe) with a truncation marker, so a single oversized
/// SKILL.md cannot blow the downstream context window.
/// * `name` and `base_dir` are XML-attribute-escaped (`&`, `<`, `>`, `"`)
/// so a maliciously named skill cannot break out of the wrapper. Skill
/// names are already validated upstream, but defense-in-depth matters
/// here because the wrapper is part of the LLM's system prompt.
pub fn augment_system_prompt_with_skills(
base_system_prompt: Option<String>,
activated_skills: &[&SkillRef],
) -> Option<String> {
if activated_skills.is_empty() {
return base_system_prompt;
}
let mut buf = String::new();
if let Some(base) = base_system_prompt {
if !base.is_empty() {
buf.push_str(&base);
buf.push('\n');
buf.push('\n');
}
}
buf.push_str(
"The following Agent Skills have been activated for this request. \
Follow their instructions when relevant; resolve relative paths \
against each skill's base directory.\n\n",
);
for skill in activated_skills {
buf.push_str(&format!(
"<skill_content name=\"{}\"",
xml_attr_escape(&skill.name)
));
if let Some(base_dir) = skill.base_dir.as_deref() {
buf.push_str(&format!(" base_dir=\"{}\"", xml_attr_escape(base_dir)));
}
buf.push_str(">\n");
if let Some(body) = skill.body.as_deref() {
buf.push_str(&truncate_skill_body(body));
buf.push('\n');
} else {
buf.push_str(&format!(
"(skill description) {}\n",
xml_attr_escape(&skill.description)
));
}
buf.push_str("</skill_content>\n\n");
}
Some(buf.trim_end().to_string())
}
/// Escape a string for use inside an XML attribute value (double-quoted).
/// Quotes `&`, `<`, `>`, and `"`; leaves single quotes alone since the
/// wrapper always uses double quotes.
fn xml_attr_escape(s: &str) -> String {
let mut out = String::with_capacity(s.len());
for ch in s.chars() {
match ch {
'&' => out.push_str("&amp;"),
'<' => out.push_str("&lt;"),
'>' => out.push_str("&gt;"),
'"' => out.push_str("&quot;"),
_ => out.push(ch),
}
}
out
}
/// Tail-trim `body` to at most `MAX_SKILL_BODY_BYTES` bytes, respecting
/// UTF-8 character boundaries. Appends a marker so the downstream model
/// can tell content was dropped. Pass-through for short bodies.
fn truncate_skill_body(body: &str) -> String {
let trimmed = body.trim_end();
if trimmed.len() <= MAX_SKILL_BODY_BYTES {
return trimmed.to_string();
}
// Reserve room for the marker so the final length is still within the
// budget even when the marker is added.
let budget = MAX_SKILL_BODY_BYTES.saturating_sub(SKILL_BODY_TRUNCATION_MARKER.len());
let mut end = budget;
while end > 0 && !trimmed.is_char_boundary(end) {
end -= 1;
}
let mut out = String::with_capacity(end + SKILL_BODY_TRUNCATION_MARKER.len());
out.push_str(&trimmed[..end]);
out.push_str(SKILL_BODY_TRUNCATION_MARKER);
out
}
#[cfg(test)]
mod tests {
use super::*;
use crate::configuration::SelectionPolicy;
fn skill(name: &str, body: &str) -> SkillRef {
SkillRef {
name: name.to_string(),
description: format!("desc for {}", name),
path: Some(format!("/skills/{}/SKILL.md", name)),
base_dir: Some(format!("/skills/{}", name)),
body: Some(body.to_string()),
scope: Some("project".to_string()),
compatibility: None,
license: None,
metadata: None,
allowed_tools: None,
}
}
fn route(name: &str, skill_names: Option<Vec<&str>>) -> TopLevelRoutingPreference {
TopLevelRoutingPreference {
name: name.to_string(),
description: format!("desc for {}", name),
models: vec!["openai/gpt-4o".to_string()],
skills: skill_names.map(|v| v.into_iter().map(String::from).collect()),
selection_policy: SelectionPolicy::default(),
}
}
fn routes_map(
routes: Vec<TopLevelRoutingPreference>,
) -> HashMap<String, TopLevelRoutingPreference> {
routes.into_iter().map(|r| (r.name.clone(), r)).collect()
}
// --- referenced_skills_catalog ---
#[test]
fn referenced_catalog_is_union_across_routes() {
let catalog = vec![
skill("pdf", "extract"),
skill("code-review", "review"),
skill("never-used", "x"),
];
let routes = routes_map(vec![
route("docs", Some(vec!["pdf"])),
route("review", Some(vec!["code-review"])),
route("other", None),
]);
let out = referenced_skills_catalog(&catalog, &routes);
let names: Vec<_> = out.iter().map(|s| s.name.as_str()).collect();
assert!(names.contains(&"pdf"));
assert!(names.contains(&"code-review"));
assert!(!names.contains(&"never-used"));
}
#[test]
fn referenced_catalog_deduplicates_when_multiple_routes_share_a_skill() {
let catalog = vec![skill("pdf", "extract")];
let routes = routes_map(vec![
route("a", Some(vec!["pdf"])),
route("b", Some(vec!["pdf"])),
]);
let out = referenced_skills_catalog(&catalog, &routes);
assert_eq!(out.len(), 1);
}
// --- resolve_for_route ---
#[test]
fn resolve_for_route_keeps_allowlisted_skills_in_orchestrator_order() {
let catalog = vec![skill("a", ""), skill("b", ""), skill("c", "")];
let allow = vec!["a".to_string(), "b".to_string(), "c".to_string()];
let selected = vec!["c".to_string(), "a".to_string()];
let r = resolve_for_route(&catalog, &allow, &selected);
let names: Vec<_> = r.activated.iter().map(|s| s.name.as_str()).collect();
assert_eq!(names, vec!["c", "a"]);
assert!(r.dropped_not_allowed.is_empty());
assert!(r.dropped_unknown.is_empty());
}
#[test]
fn resolve_for_route_drops_cross_route_skill_into_not_allowed() {
let catalog = vec![skill("pdf", ""), skill("payment", "")];
let allow = vec!["pdf".to_string()]; // route only allows pdf
let selected = vec!["pdf".to_string(), "payment".to_string()];
let r = resolve_for_route(&catalog, &allow, &selected);
assert_eq!(r.activated.len(), 1);
assert_eq!(r.activated[0].name, "pdf");
assert_eq!(r.dropped_not_allowed, vec!["payment".to_string()]);
assert!(r.dropped_unknown.is_empty());
}
#[test]
fn resolve_for_route_drops_hallucinated_skill_into_unknown() {
let catalog = vec![skill("pdf", "")];
let allow = vec!["pdf".to_string(), "imaginary".to_string()];
let selected = vec!["pdf".to_string(), "imaginary".to_string()];
let r = resolve_for_route(&catalog, &allow, &selected);
assert_eq!(r.activated.len(), 1);
assert_eq!(r.activated[0].name, "pdf");
assert!(r.dropped_not_allowed.is_empty());
assert_eq!(r.dropped_unknown, vec!["imaginary".to_string()]);
}
#[test]
fn resolve_for_route_deduplicates_repeats() {
let catalog = vec![skill("pdf", "")];
let allow = vec!["pdf".to_string()];
let selected = vec!["pdf".to_string(), "pdf".to_string(), "pdf".to_string()];
let r = resolve_for_route(&catalog, &allow, &selected);
assert_eq!(r.activated.len(), 1);
}
// --- resolve_selected_skills (skills-only path) ---
#[test]
fn resolve_selected_skills_drops_unknown_and_dedupes() {
let catalog = vec![
skill("pdf-processing", "extract"),
skill("code-review", "review"),
];
let names = vec![
"code-review".to_string(),
"ghost".to_string(),
"code-review".to_string(),
"pdf-processing".to_string(),
];
let resolved = resolve_selected_skills(&catalog, &names);
assert_eq!(resolved.len(), 2);
assert_eq!(resolved[0].name, "code-review");
assert_eq!(resolved[1].name, "pdf-processing");
}
// --- augment_system_prompt_with_skills ---
#[test]
fn augment_passthrough_with_no_skills() {
let augmented = augment_system_prompt_with_skills(Some("you are helpful".to_string()), &[]);
assert_eq!(augmented.as_deref(), Some("you are helpful"));
}
#[test]
fn augment_includes_skill_bodies() {
let s = skill("pdf-processing", "extract text and tables");
let augmented =
augment_system_prompt_with_skills(Some("you are helpful".to_string()), &[&s])
.expect("augmented");
assert!(augmented.starts_with("you are helpful"));
assert!(augmented.contains("<skill_content name=\"pdf-processing\""));
assert!(augmented.contains("extract text and tables"));
assert!(augmented.contains("base_dir=\"/skills/pdf-processing\""));
}
#[test]
fn augment_without_base_prompt_still_works() {
let s = skill("code-review", "look at diffs");
let augmented = augment_system_prompt_with_skills(None, &[&s]).expect("augmented");
assert!(augmented.contains("<skill_content name=\"code-review\""));
assert!(augmented.contains("look at diffs"));
}
#[test]
fn augment_xml_escapes_skill_name_and_base_dir() {
let mut s = skill("safe-name", "body");
s.name = "bad\"name".to_string();
s.base_dir = Some("/path/with\"quote".to_string());
let augmented = augment_system_prompt_with_skills(None, &[&s]).expect("augmented");
// Raw double-quote must NOT appear inside the attribute value — only
// its escaped form. Otherwise it would close the attribute and let a
// skill name inject arbitrary attributes / break out of the wrapper.
assert!(augmented.contains("name=\"bad&quot;name\""));
assert!(augmented.contains("base_dir=\"/path/with&quot;quote\""));
}
#[test]
fn augment_truncates_oversized_skill_body() {
let big_body: String = "a".repeat(MAX_SKILL_BODY_BYTES * 2);
let s = skill("huge", &big_body);
let augmented = augment_system_prompt_with_skills(None, &[&s]).expect("augmented");
// Truncation marker is present, so the body did NOT pass through verbatim.
assert!(augmented.contains("[skill body truncated]"));
// And the body slice cannot be longer than MAX_SKILL_BODY_BYTES + a
// little wrapper overhead — definitely not 2× the cap.
let body_section_end = augmented.find("</skill_content>").unwrap();
let body_section_start = augmented.find(">\n").unwrap() + 2;
let body_len = body_section_end - body_section_start;
assert!(body_len <= MAX_SKILL_BODY_BYTES + 64);
}
}

View file

@ -209,19 +209,15 @@ impl StreamContext {
} else {
info!("no default prompt target found, forwarding request to upstream llm");
let mut messages = Vec::new();
// add system prompt
match self.system_prompt.as_ref() {
None => {}
Some(system_prompt) => {
let system_prompt_message = Message {
role: SYSTEM_ROLE.to_string(),
content: Some(ContentType::Text(system_prompt.clone())),
model: None,
tool_calls: None,
tool_call_id: None,
};
messages.push(system_prompt_message);
}
if let Some(system_prompt) = self.system_prompt.as_ref().clone() {
let system_prompt_message = Message {
role: SYSTEM_ROLE.to_string(),
content: Some(ContentType::Text(system_prompt)),
model: None,
tool_calls: None,
tool_call_id: None,
};
messages.push(system_prompt_message);
}
messages.append(