mirror of
https://github.com/katanemo/plano.git
synced 2026-06-23 15:38:07 +02:00
feat(skills): add Agent Skills support with orchestrator-driven activation
This commit is contained in:
parent
5a4487fc6e
commit
7f5bf641bb
24 changed files with 2777 additions and 97 deletions
|
|
@ -132,11 +132,15 @@ impl AgentSelector {
|
|||
.determine_orchestration(messages, Some(usage_preferences), request_id)
|
||||
.await
|
||||
{
|
||||
Ok(Some(routes)) => {
|
||||
debug!(count = routes.len(), "determined agents via orchestration");
|
||||
Ok(Some(selection)) => {
|
||||
debug!(
|
||||
count = selection.routes.len(),
|
||||
skill_count = selection.skills.len(),
|
||||
"determined agents via orchestration"
|
||||
);
|
||||
let mut selected_agents = Vec::new();
|
||||
|
||||
for (route_name, agent_name) in routes {
|
||||
for (route_name, agent_name) in selection.routes {
|
||||
debug!(route = %route_name, agent = %agent_name, "processing route");
|
||||
let selected_agent = agents
|
||||
.iter()
|
||||
|
|
|
|||
|
|
@ -37,7 +37,7 @@ use crate::tracing::{
|
|||
collect_custom_trace_attributes, llm as tracing_llm, operation_component,
|
||||
plano as tracing_plano, set_service_name,
|
||||
};
|
||||
use model_selection::router_chat_get_upstream_model;
|
||||
use model_selection::{inject_activated_skills_into_request, router_chat_get_upstream_model};
|
||||
|
||||
const PERPLEXITY_PROVIDER_PREFIX: &str = "perplexity/";
|
||||
|
||||
|
|
@ -282,26 +282,16 @@ async fn llm_chat_inner(
|
|||
Err(response) => return Ok(response),
|
||||
};
|
||||
|
||||
// Serialize request for upstream BEFORE router consumes it
|
||||
let client_request_bytes_for_upstream: Bytes =
|
||||
match ProviderRequestType::to_bytes(&client_request) {
|
||||
Ok(bytes) => bytes.into(),
|
||||
Err(err) => {
|
||||
warn!(error = %err, "failed to serialize request for upstream");
|
||||
let mut r = Response::new(full(format!("Failed to serialize request: {}", err)));
|
||||
*r.status_mut() = StatusCode::INTERNAL_SERVER_ERROR;
|
||||
return Ok(r);
|
||||
}
|
||||
};
|
||||
|
||||
// --- Phase 3: Route the request (or use pinned model from session cache) ---
|
||||
let resolved_model = if let Some(cached_model) = pinned_model {
|
||||
// Route the request (or use pinned model from session cache) BEFORE
|
||||
// serializing for upstream — skill body injection happens here, so the
|
||||
// upstream bytes must be produced after routing returns.
|
||||
let (resolved_model, activated_skills) = if let Some(cached_model) = pinned_model {
|
||||
info!(
|
||||
session_id = %session_id.as_deref().unwrap_or(""),
|
||||
model = %cached_model,
|
||||
"using pinned routing decision from cache"
|
||||
);
|
||||
cached_model
|
||||
(cached_model, Vec::new())
|
||||
} else {
|
||||
let routing_span = info_span!(
|
||||
"routing",
|
||||
|
|
@ -313,11 +303,16 @@ async fn llm_chat_inner(
|
|||
route.selected_model = tracing::field::Empty,
|
||||
routing.determination_ms = tracing::field::Empty,
|
||||
);
|
||||
// The router consumes the request (it converts it to OpenAI format
|
||||
// internally to extract conversation messages). Clone so we can
|
||||
// still mutate the original below when Plano-Orchestrator activates
|
||||
// any Agent Skills.
|
||||
let request_for_routing = client_request.clone();
|
||||
let routing_result = match async {
|
||||
set_service_name(operation_component::ROUTING);
|
||||
router_chat_get_upstream_model(
|
||||
Arc::clone(&state.orchestrator_service),
|
||||
client_request,
|
||||
request_for_routing,
|
||||
&request_path,
|
||||
&request_id,
|
||||
inline_routing_preferences,
|
||||
|
|
@ -335,8 +330,11 @@ async fn llm_chat_inner(
|
|||
}
|
||||
};
|
||||
|
||||
let (router_selected_model, route_name) =
|
||||
(routing_result.model_name, routing_result.route_name);
|
||||
let (router_selected_model, route_name, activated) = (
|
||||
routing_result.model_name,
|
||||
routing_result.route_name,
|
||||
routing_result.activated_skills,
|
||||
);
|
||||
let model = if router_selected_model != "none" {
|
||||
router_selected_model
|
||||
} else {
|
||||
|
|
@ -362,10 +360,34 @@ async fn llm_chat_inner(
|
|||
.await;
|
||||
}
|
||||
|
||||
model
|
||||
(model, activated)
|
||||
};
|
||||
tracing::Span::current().record(tracing_llm::MODEL_NAME, resolved_model.as_str());
|
||||
|
||||
// If Plano-Orchestrator activated any Agent Skills for this route, inject
|
||||
// their SKILL.md bodies into the system prompt before we hand the bytes
|
||||
// off to the upstream provider.
|
||||
if !activated_skills.is_empty() {
|
||||
info!(
|
||||
count = activated_skills.len(),
|
||||
skills = ?activated_skills.iter().map(|s| s.name.as_str()).collect::<Vec<_>>(),
|
||||
"injecting activated Agent Skills into upstream system prompt"
|
||||
);
|
||||
inject_activated_skills_into_request(&mut client_request, &activated_skills);
|
||||
}
|
||||
|
||||
// Serialize request for upstream AFTER potential skill injection.
|
||||
let client_request_bytes_for_upstream: Bytes =
|
||||
match ProviderRequestType::to_bytes(&client_request) {
|
||||
Ok(bytes) => bytes.into(),
|
||||
Err(err) => {
|
||||
warn!(error = %err, "failed to serialize request for upstream");
|
||||
let mut r = Response::new(full(format!("Failed to serialize request: {}", err)));
|
||||
*r.status_mut() = StatusCode::INTERNAL_SERVER_ERROR;
|
||||
return Ok(r);
|
||||
}
|
||||
};
|
||||
|
||||
// --- Phase 4: Forward to upstream and stream back ---
|
||||
send_upstream(
|
||||
&state.http_client,
|
||||
|
|
|
|||
|
|
@ -1,5 +1,9 @@
|
|||
use common::configuration::TopLevelRoutingPreference;
|
||||
use common::configuration::{SkillRef, TopLevelRoutingPreference};
|
||||
use common::skills_runtime::augment_system_prompt_with_skills;
|
||||
use hermesllm::apis::openai::{Message, MessageContent, Role};
|
||||
use hermesllm::clients::endpoints::SupportedUpstreamAPIs;
|
||||
use hermesllm::providers::request::ProviderRequest;
|
||||
use hermesllm::transforms::lib::ExtractText;
|
||||
use hermesllm::ProviderRequestType;
|
||||
use hyper::StatusCode;
|
||||
use std::sync::Arc;
|
||||
|
|
@ -29,6 +33,10 @@ pub struct RoutingResult {
|
|||
/// Full ranked list — use subsequent entries as fallbacks on 429/5xx.
|
||||
pub models: Vec<String>,
|
||||
pub route_name: Option<String>,
|
||||
/// Agent Skills activated by Plano-Orchestrator for this request.
|
||||
/// Their `body` field (the SKILL.md content) is prepended to the
|
||||
/// upstream system prompt by the caller in `send_upstream`.
|
||||
pub activated_skills: Vec<SkillRef>,
|
||||
}
|
||||
|
||||
pub struct RoutingError {
|
||||
|
|
@ -128,8 +136,8 @@ pub async fn router_chat_get_upstream_model(
|
|||
|
||||
match routing_result {
|
||||
Ok(route) => match route {
|
||||
Some((route_name, ranked_models)) => {
|
||||
let model_name = ranked_models.first().cloned().unwrap_or_default();
|
||||
Some(decision) => {
|
||||
let model_name = decision.models.first().cloned().unwrap_or_default();
|
||||
current_span.record("route.selected_model", model_name.as_str());
|
||||
bs_metrics::record_router_decision(
|
||||
route_label,
|
||||
|
|
@ -139,8 +147,9 @@ pub async fn router_chat_get_upstream_model(
|
|||
);
|
||||
Ok(RoutingResult {
|
||||
model_name,
|
||||
models: ranked_models,
|
||||
route_name: Some(route_name),
|
||||
models: decision.models,
|
||||
route_name: Some(decision.route_name),
|
||||
activated_skills: decision.activated_skills,
|
||||
})
|
||||
}
|
||||
None => {
|
||||
|
|
@ -159,6 +168,7 @@ pub async fn router_chat_get_upstream_model(
|
|||
model_name: "none".to_string(),
|
||||
models: vec!["none".to_string()],
|
||||
route_name: None,
|
||||
activated_skills: Vec::new(),
|
||||
})
|
||||
}
|
||||
},
|
||||
|
|
@ -172,3 +182,61 @@ pub async fn router_chat_get_upstream_model(
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Prepend the bodies of `activated_skills` to the system prompt of the
|
||||
/// upstream request so the chosen LLM has access to each skill's instructions.
|
||||
/// Works across every provider variant by going through the OpenAI message
|
||||
/// shape (`get_messages`/`set_messages`).
|
||||
///
|
||||
/// When there is already a leading system message we augment it in place;
|
||||
/// otherwise a new system message is inserted at position 0. No-op when
|
||||
/// `activated_skills` is empty.
|
||||
pub fn inject_activated_skills_into_request(
|
||||
client_request: &mut ProviderRequestType,
|
||||
activated_skills: &[SkillRef],
|
||||
) {
|
||||
if activated_skills.is_empty() {
|
||||
return;
|
||||
}
|
||||
|
||||
let skill_refs: Vec<&SkillRef> = activated_skills.iter().collect();
|
||||
|
||||
let mut messages = client_request.get_messages();
|
||||
|
||||
let (system_idx, base_text) = match messages.iter().position(|m| m.role == Role::System) {
|
||||
Some(idx) => {
|
||||
let text = messages[idx]
|
||||
.content
|
||||
.as_ref()
|
||||
.map(|c| c.extract_text())
|
||||
.unwrap_or_default();
|
||||
(Some(idx), Some(text))
|
||||
}
|
||||
None => (None, None),
|
||||
};
|
||||
|
||||
let augmented = augment_system_prompt_with_skills(base_text, &skill_refs);
|
||||
let Some(augmented_text) = augmented else {
|
||||
return;
|
||||
};
|
||||
|
||||
match system_idx {
|
||||
Some(idx) => {
|
||||
messages[idx].content = Some(MessageContent::Text(augmented_text));
|
||||
}
|
||||
None => {
|
||||
messages.insert(
|
||||
0,
|
||||
Message {
|
||||
role: Role::System,
|
||||
content: Some(MessageContent::Text(augmented_text)),
|
||||
name: None,
|
||||
tool_calls: None,
|
||||
tool_call_id: None,
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
client_request.set_messages(&messages);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -314,11 +314,12 @@ async fn init_app_state(
|
|||
.orchestrator_model_context_length
|
||||
.unwrap_or(brightstaff::router::orchestrator_model_v1::MAX_TOKEN_LEN);
|
||||
|
||||
let orchestrator_service = Arc::new(OrchestratorService::with_routing(
|
||||
let orchestrator_service = Arc::new(OrchestratorService::with_routing_and_skills(
|
||||
format!("{llm_provider_url}{CHAT_COMPLETIONS_PATH}"),
|
||||
orchestrator_model_name,
|
||||
orchestrator_llm_provider,
|
||||
config.routing_preferences.clone(),
|
||||
config.skills.clone(),
|
||||
metrics_service,
|
||||
session_ttl_seconds,
|
||||
session_cache,
|
||||
|
|
|
|||
|
|
@ -1,7 +1,9 @@
|
|||
use std::{borrow::Cow, collections::HashMap, sync::Arc, time::Duration};
|
||||
|
||||
use common::{
|
||||
configuration::{AgentUsagePreference, OrchestrationPreference, TopLevelRoutingPreference},
|
||||
configuration::{
|
||||
AgentUsagePreference, OrchestrationPreference, SkillRef, TopLevelRoutingPreference,
|
||||
},
|
||||
consts::{ARCH_PROVIDER_HINT_HEADER, REQUEST_ID_HEADER},
|
||||
};
|
||||
use hermesllm::apis::openai::Message;
|
||||
|
|
@ -13,7 +15,7 @@ use tracing::{debug, info};
|
|||
|
||||
use super::http::{self, post_and_extract_content};
|
||||
use super::model_metrics::ModelMetricsService;
|
||||
use super::orchestrator_model::OrchestratorModel;
|
||||
use super::orchestrator_model::{OrchestratorModel, OrchestratorSelection};
|
||||
|
||||
use crate::metrics as bs_metrics;
|
||||
use crate::metrics::labels as metric_labels;
|
||||
|
|
@ -30,12 +32,27 @@ pub struct OrchestratorService {
|
|||
orchestrator_model: Arc<dyn OrchestratorModel>,
|
||||
orchestrator_provider_name: String,
|
||||
top_level_preferences: HashMap<String, TopLevelRoutingPreference>,
|
||||
/// Agent Skills catalog (deduplicated by name) attached to any
|
||||
/// `routing_preferences[].skills` list. Empty when no route has skills.
|
||||
skills_catalog: Vec<SkillRef>,
|
||||
metrics_service: Option<Arc<ModelMetricsService>>,
|
||||
session_cache: Option<Arc<dyn SessionCache>>,
|
||||
session_ttl: Duration,
|
||||
tenant_header: Option<String>,
|
||||
}
|
||||
|
||||
/// Result of `determine_route`: which route was picked, the ranked candidate
|
||||
/// models for that route, and the Agent Skill bodies the orchestrator chose
|
||||
/// to activate alongside it. Skills are resolved against
|
||||
/// `routing_preferences[<route>].skills`, so unknown / cross-route names are
|
||||
/// silently dropped.
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct RouteDecision {
|
||||
pub route_name: String,
|
||||
pub models: Vec<String>,
|
||||
pub activated_skills: Vec<SkillRef>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
pub enum OrchestrationError {
|
||||
#[error(transparent)]
|
||||
|
|
@ -66,6 +83,7 @@ impl OrchestratorService {
|
|||
orchestrator_model,
|
||||
orchestrator_provider_name,
|
||||
top_level_preferences: HashMap::new(),
|
||||
skills_catalog: Vec::new(),
|
||||
metrics_service: None,
|
||||
session_cache: None,
|
||||
session_ttl: Duration::from_secs(DEFAULT_SESSION_TTL_SECONDS),
|
||||
|
|
@ -84,14 +102,53 @@ impl OrchestratorService {
|
|||
session_cache: Arc<dyn SessionCache>,
|
||||
tenant_header: Option<String>,
|
||||
max_token_length: usize,
|
||||
) -> Self {
|
||||
Self::with_routing_and_skills(
|
||||
orchestrator_url,
|
||||
orchestration_model_name,
|
||||
orchestrator_provider_name,
|
||||
top_level_prefs,
|
||||
None,
|
||||
metrics_service,
|
||||
session_ttl_seconds,
|
||||
session_cache,
|
||||
tenant_header,
|
||||
max_token_length,
|
||||
)
|
||||
}
|
||||
|
||||
/// Like `with_routing`, but also seeds the orchestrator with a catalog of
|
||||
/// Agent Skills referenced by `routing_preferences[].skills`. The
|
||||
/// orchestrator gets a `<skills>` block in its system prompt and may
|
||||
/// select zero or more skills alongside the picked route; this enables
|
||||
/// the LLM handler to inject the chosen SKILL.md bodies into the
|
||||
/// upstream request.
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub fn with_routing_and_skills(
|
||||
orchestrator_url: String,
|
||||
orchestration_model_name: String,
|
||||
orchestrator_provider_name: String,
|
||||
top_level_prefs: Option<Vec<TopLevelRoutingPreference>>,
|
||||
skills_catalog: Option<Vec<SkillRef>>,
|
||||
metrics_service: Option<Arc<ModelMetricsService>>,
|
||||
session_ttl_seconds: Option<u64>,
|
||||
session_cache: Arc<dyn SessionCache>,
|
||||
tenant_header: Option<String>,
|
||||
max_token_length: usize,
|
||||
) -> Self {
|
||||
let top_level_preferences: HashMap<String, TopLevelRoutingPreference> = top_level_prefs
|
||||
.map_or_else(HashMap::new, |prefs| {
|
||||
prefs.into_iter().map(|p| (p.name.clone(), p)).collect()
|
||||
});
|
||||
|
||||
let orchestrator_model = Arc::new(orchestrator_model_v1::OrchestratorModelV1::new(
|
||||
let skills_catalog = build_skills_catalog_for_routes(
|
||||
skills_catalog.as_deref().unwrap_or(&[]),
|
||||
&top_level_preferences,
|
||||
);
|
||||
|
||||
let orchestrator_model = Arc::new(orchestrator_model_v1::OrchestratorModelV1::with_skills(
|
||||
HashMap::new(),
|
||||
skills_catalog.clone(),
|
||||
orchestration_model_name,
|
||||
max_token_length,
|
||||
));
|
||||
|
|
@ -105,6 +162,7 @@ impl OrchestratorService {
|
|||
orchestrator_model,
|
||||
orchestrator_provider_name,
|
||||
top_level_preferences,
|
||||
skills_catalog,
|
||||
metrics_service,
|
||||
session_cache: Some(session_cache),
|
||||
session_ttl,
|
||||
|
|
@ -170,7 +228,7 @@ impl OrchestratorService {
|
|||
messages: &[Message],
|
||||
inline_routing_preferences: Option<Vec<TopLevelRoutingPreference>>,
|
||||
request_id: &str,
|
||||
) -> Result<Option<(String, Vec<String>)>> {
|
||||
) -> Result<Option<RouteDecision>> {
|
||||
if messages.is_empty() {
|
||||
return Ok(None);
|
||||
}
|
||||
|
|
@ -206,9 +264,13 @@ impl OrchestratorService {
|
|||
)
|
||||
.await?;
|
||||
|
||||
let result = if let Some(ref routes) = orchestration_result {
|
||||
if routes.len() > 1 {
|
||||
let all_routes: Vec<&str> = routes.iter().map(|(name, _)| name.as_str()).collect();
|
||||
let result = if let Some(ref selection) = orchestration_result {
|
||||
if selection.routes.len() > 1 {
|
||||
let all_routes: Vec<&str> = selection
|
||||
.routes
|
||||
.iter()
|
||||
.map(|(name, _)| name.as_str())
|
||||
.collect();
|
||||
info!(
|
||||
routes = ?all_routes,
|
||||
using = %all_routes.first().unwrap_or(&"none"),
|
||||
|
|
@ -216,7 +278,7 @@ impl OrchestratorService {
|
|||
);
|
||||
}
|
||||
|
||||
if let Some((route_name, _)) = routes.first() {
|
||||
if let Some((route_name, _)) = selection.routes.first() {
|
||||
let top_pref = inline_top_map
|
||||
.as_ref()
|
||||
.and_then(|m| m.get(route_name))
|
||||
|
|
@ -227,7 +289,16 @@ impl OrchestratorService {
|
|||
Some(svc) => svc.rank_models(&pref.models, &pref.selection_policy).await,
|
||||
None => pref.models.clone(),
|
||||
};
|
||||
Some((route_name.clone(), ranked))
|
||||
let activated_skills = resolve_activated_skills(
|
||||
&self.skills_catalog,
|
||||
pref.skills.as_deref().unwrap_or(&[]),
|
||||
&selection.skills,
|
||||
);
|
||||
Some(RouteDecision {
|
||||
route_name: route_name.clone(),
|
||||
models: ranked,
|
||||
activated_skills,
|
||||
})
|
||||
} else {
|
||||
None
|
||||
}
|
||||
|
|
@ -239,7 +310,7 @@ impl OrchestratorService {
|
|||
};
|
||||
|
||||
info!(
|
||||
selected_model = ?result,
|
||||
selected_route = ?result.as_ref().map(|r| (&r.route_name, r.models.first(), r.activated_skills.iter().map(|s| s.name.as_str()).collect::<Vec<_>>())),
|
||||
"plano-orchestrator determined route"
|
||||
);
|
||||
|
||||
|
|
@ -253,7 +324,7 @@ impl OrchestratorService {
|
|||
messages: &[Message],
|
||||
usage_preferences: Option<Vec<AgentUsagePreference>>,
|
||||
request_id: Option<String>,
|
||||
) -> Result<Option<Vec<(String, String)>>> {
|
||||
) -> Result<Option<OrchestratorSelection>> {
|
||||
if messages.is_empty() {
|
||||
return Ok(None);
|
||||
}
|
||||
|
|
@ -328,6 +399,61 @@ impl OrchestratorService {
|
|||
}
|
||||
}
|
||||
|
||||
/// Build the orchestrator-visible skills catalog (deduplicated by name) from
|
||||
/// the union of every skill name referenced under
|
||||
/// `routing_preferences[].skills`. Skills that are not referenced by any
|
||||
/// route are excluded — they would just clutter the prompt with no way for
|
||||
/// the orchestrator to attach them to a route.
|
||||
fn build_skills_catalog_for_routes(
|
||||
catalog: &[SkillRef],
|
||||
routes: &HashMap<String, TopLevelRoutingPreference>,
|
||||
) -> Vec<SkillRef> {
|
||||
let mut referenced: std::collections::HashSet<&str> = std::collections::HashSet::new();
|
||||
for route in routes.values() {
|
||||
if let Some(names) = route.skills.as_ref() {
|
||||
for name in names {
|
||||
referenced.insert(name.as_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let mut out: Vec<SkillRef> = Vec::new();
|
||||
let mut seen: std::collections::HashSet<String> = std::collections::HashSet::new();
|
||||
for skill in catalog {
|
||||
if referenced.contains(skill.name.as_str()) && seen.insert(skill.name.clone()) {
|
||||
out.push(skill.clone());
|
||||
}
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
/// Filter the orchestrator-selected skill names down to the SKILL.md bodies
|
||||
/// allowed for the chosen route, preserving the order the orchestrator
|
||||
/// returned. Unknown names (either not in the catalog or not allowed by the
|
||||
/// route) are silently dropped; the orchestrator can hallucinate.
|
||||
fn resolve_activated_skills(
|
||||
catalog: &[SkillRef],
|
||||
route_allowlist: &[String],
|
||||
selected: &[String],
|
||||
) -> Vec<SkillRef> {
|
||||
let allowed: std::collections::HashSet<&str> =
|
||||
route_allowlist.iter().map(String::as_str).collect();
|
||||
let mut out: Vec<SkillRef> = Vec::with_capacity(selected.len());
|
||||
let mut taken: std::collections::HashSet<&str> = std::collections::HashSet::new();
|
||||
for name in selected {
|
||||
if !allowed.contains(name.as_str()) {
|
||||
continue;
|
||||
}
|
||||
if !taken.insert(name.as_str()) {
|
||||
continue;
|
||||
}
|
||||
if let Some(skill) = catalog.iter().find(|s| &s.name == name) {
|
||||
out.push(skill.clone());
|
||||
}
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
|
|
|||
|
|
@ -10,20 +10,37 @@ pub enum OrchestratorModelError {
|
|||
|
||||
pub type Result<T> = std::result::Result<T, OrchestratorModelError>;
|
||||
|
||||
/// The result of running Plano-Orchestrator over a conversation: zero or more
|
||||
/// selected routes (each mapped to its upstream model name) plus zero or more
|
||||
/// selected Agent Skills. Skills are filtered down by the consumer to the
|
||||
/// catalog defined under `routing_preferences[].skills` for the chosen route.
|
||||
#[derive(Debug, Clone, Default, PartialEq, Eq)]
|
||||
pub struct OrchestratorSelection {
|
||||
pub routes: Vec<(String, String)>,
|
||||
pub skills: Vec<String>,
|
||||
}
|
||||
|
||||
impl OrchestratorSelection {
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.routes.is_empty() && self.skills.is_empty()
|
||||
}
|
||||
}
|
||||
|
||||
/// OrchestratorModel trait for handling orchestration requests.
|
||||
/// Returns multiple routes as the model output format is:
|
||||
/// {"route": ["route_name_1", "route_name_2", ...]}
|
||||
/// Returns multiple routes and skills as the model output format is:
|
||||
/// {"route": ["route_name_1", ...], "skills": ["skill_name_1", ...]}
|
||||
pub trait OrchestratorModel: Send + Sync {
|
||||
fn generate_request(
|
||||
&self,
|
||||
messages: &[Message],
|
||||
usage_preferences: &Option<Vec<AgentUsagePreference>>,
|
||||
) -> ChatCompletionsRequest;
|
||||
/// Returns a vector of (route_name, model_name) tuples for all matched routes.
|
||||
/// Parses the orchestrator's raw model output into selected routes (each
|
||||
/// mapped to a model) and selected skill names.
|
||||
fn parse_response(
|
||||
&self,
|
||||
content: &str,
|
||||
usage_preferences: &Option<Vec<AgentUsagePreference>>,
|
||||
) -> Result<Option<Vec<(String, String)>>>;
|
||||
) -> Result<Option<OrchestratorSelection>>;
|
||||
fn get_model_name(&self) -> String;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,12 +1,12 @@
|
|||
use std::collections::HashMap;
|
||||
|
||||
use common::configuration::{AgentUsagePreference, OrchestrationPreference};
|
||||
use common::configuration::{AgentUsagePreference, OrchestrationPreference, SkillRef};
|
||||
use hermesllm::apis::openai::{ChatCompletionsRequest, Message, MessageContent, Role};
|
||||
use hermesllm::transforms::lib::ExtractText;
|
||||
use serde::{ser::Serialize as SerializeTrait, Deserialize, Serialize};
|
||||
use tracing::{debug, warn};
|
||||
|
||||
use super::orchestrator_model::{OrchestratorModel, OrchestratorModelError};
|
||||
use super::orchestrator_model::{OrchestratorModel, OrchestratorModelError, OrchestratorSelection};
|
||||
|
||||
pub const MAX_TOKEN_LEN: usize = 8192; // Default max token length for the orchestration model
|
||||
|
||||
|
|
@ -138,10 +138,47 @@ Return your answer strictly in JSON as follows:
|
|||
If no routes are needed, return an empty list for `route`.
|
||||
"#;
|
||||
|
||||
/// System prompt used when one or more Agent Skills are attached to candidate
|
||||
/// routes. Adds a `<skills>` block alongside `<routes>` and asks the model to
|
||||
/// also pick zero or more skills that should be loaded into the downstream
|
||||
/// LLM's system prompt.
|
||||
pub const ARCH_ORCHESTRATOR_V1_SYSTEM_PROMPT_WITH_SKILLS: &str = r#"
|
||||
You are a helpful assistant that selects the most suitable routes and Agent Skills based on user intent.
|
||||
You are provided with a list of available routes enclosed within <routes></routes> XML tags:
|
||||
<routes>
|
||||
{routes}
|
||||
</routes>
|
||||
|
||||
You are provided with a list of available Agent Skills enclosed within <skills></skills> XML tags:
|
||||
<skills>
|
||||
{skills}
|
||||
</skills>
|
||||
|
||||
You are also given the conversation context enclosed within <conversation></conversation> XML tags:
|
||||
<conversation>
|
||||
{conversation}
|
||||
</conversation>
|
||||
|
||||
## Instructions
|
||||
1. Analyze the latest user intent from the conversation.
|
||||
2. Compare it against the available routes to find which routes can help fulfill the request.
|
||||
3. Independently compare it against the available skills; pick the skills whose descriptions match what the user is trying to do. Skills can be combined with any route. Activating a skill loads detailed instructions into the next response's system prompt.
|
||||
4. Respond only with exact names from <routes> and <skills>.
|
||||
5. If no routes or skills can help, return empty lists.
|
||||
|
||||
## Response Format
|
||||
Return your answer strictly in JSON as follows:
|
||||
{{"route": ["route_name_1", "..."], "skills": ["skill_name_1", "..."]}}
|
||||
Use empty lists for `route` and/or `skills` when nothing applies.
|
||||
"#;
|
||||
|
||||
pub type Result<T> = std::result::Result<T, OrchestratorModelError>;
|
||||
pub struct OrchestratorModelV1 {
|
||||
agent_orchestration_json_str: String,
|
||||
agent_orchestration_to_model_map: HashMap<String, String>,
|
||||
/// Pre-rendered `<skills>` block (one JSON entry per skill, name +
|
||||
/// description). Empty when no skills are attached to any route.
|
||||
skills_catalog_json_str: String,
|
||||
orchestration_model: String,
|
||||
max_token_length: usize,
|
||||
}
|
||||
|
|
@ -151,10 +188,27 @@ impl OrchestratorModelV1 {
|
|||
agent_orchestrations: HashMap<String, Vec<OrchestrationPreference>>,
|
||||
orchestration_model: String,
|
||||
max_token_length: usize,
|
||||
) -> Self {
|
||||
Self::with_skills(
|
||||
agent_orchestrations,
|
||||
Vec::new(),
|
||||
orchestration_model,
|
||||
max_token_length,
|
||||
)
|
||||
}
|
||||
|
||||
/// Like `new`, but additionally seeds the orchestrator with an Agent
|
||||
/// Skills catalog. When `skills_catalog` is empty the orchestrator uses
|
||||
/// the routes-only system prompt; otherwise it asks the model to also
|
||||
/// pick zero or more skills from the catalog.
|
||||
pub fn with_skills(
|
||||
agent_orchestrations: HashMap<String, Vec<OrchestrationPreference>>,
|
||||
skills_catalog: Vec<SkillRef>,
|
||||
orchestration_model: String,
|
||||
max_token_length: usize,
|
||||
) -> Self {
|
||||
let agent_orchestration_values: Vec<OrchestrationPreference> =
|
||||
agent_orchestrations.values().flatten().cloned().collect();
|
||||
// Format routes: each route as JSON on its own line with standard spacing
|
||||
let agent_orchestration_json_str = agent_orchestration_values
|
||||
.iter()
|
||||
.map(to_spaced_json)
|
||||
|
|
@ -165,19 +219,48 @@ impl OrchestratorModelV1 {
|
|||
.flat_map(|(model, prefs)| prefs.iter().map(|pref| (pref.name.clone(), model.clone())))
|
||||
.collect();
|
||||
|
||||
let skills_catalog_json_str = render_skills_catalog(&skills_catalog);
|
||||
|
||||
OrchestratorModelV1 {
|
||||
orchestration_model,
|
||||
max_token_length,
|
||||
agent_orchestration_json_str,
|
||||
agent_orchestration_to_model_map,
|
||||
skills_catalog_json_str,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// JSON shape suitable for the `<skills>` block in the orchestrator prompt:
|
||||
/// `{"name": "...", "description": "..."}`. Only metadata that helps the
|
||||
/// orchestrator pick a skill is included; the full SKILL.md body is injected
|
||||
/// separately, after a skill has been selected.
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
struct SkillCatalogEntry<'a> {
|
||||
name: &'a str,
|
||||
description: &'a str,
|
||||
}
|
||||
|
||||
fn render_skills_catalog(skills: &[SkillRef]) -> String {
|
||||
skills
|
||||
.iter()
|
||||
.map(|s| SkillCatalogEntry {
|
||||
name: &s.name,
|
||||
description: s.catalog_description(),
|
||||
})
|
||||
.map(|entry| to_spaced_json(&entry))
|
||||
.collect::<Vec<String>>()
|
||||
.join("\n")
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
struct AgentOrchestratorResponse {
|
||||
/// The route field now expects an array of route names: ["route_name_1", "route_name_2", ...]
|
||||
/// The route field expects an array of route names: ["route_name_1", "route_name_2", ...].
|
||||
pub route: Option<Vec<String>>,
|
||||
/// Optional array of Agent Skill names the orchestrator chose to activate.
|
||||
/// Absent or empty when no skills should be loaded for this turn.
|
||||
#[serde(default)]
|
||||
pub skills: Option<Vec<String>>,
|
||||
}
|
||||
|
||||
const TOKEN_LENGTH_DIVISOR: usize = 4; // Approximate token length divisor for UTF-8 characters
|
||||
|
|
@ -209,7 +292,13 @@ impl OrchestratorModel for OrchestratorModelV1 {
|
|||
// Ensure the conversation does not exceed the configured token budget.
|
||||
// We use `len() / TOKEN_LENGTH_DIVISOR` as a cheap token estimate to
|
||||
// avoid running a real tokenizer on the hot path.
|
||||
let mut token_count = ARCH_ORCHESTRATOR_V1_SYSTEM_PROMPT.len() / TOKEN_LENGTH_DIVISOR;
|
||||
let template_len = if self.skills_catalog_json_str.is_empty() {
|
||||
ARCH_ORCHESTRATOR_V1_SYSTEM_PROMPT.len()
|
||||
} else {
|
||||
ARCH_ORCHESTRATOR_V1_SYSTEM_PROMPT_WITH_SKILLS.len()
|
||||
+ self.skills_catalog_json_str.len()
|
||||
};
|
||||
let mut token_count = template_len / TOKEN_LENGTH_DIVISOR;
|
||||
let mut selected_messages_list_reversed: Vec<Message> = vec![];
|
||||
for (selected_messsage_count, message) in messages_vec.iter().rev().enumerate() {
|
||||
let message_text = message.content.extract_text();
|
||||
|
|
@ -289,14 +378,16 @@ impl OrchestratorModel for OrchestratorModelV1 {
|
|||
// Generate the orchestrator request message based on the usage preferences.
|
||||
// If preferences are passed in request then we use them;
|
||||
// Otherwise, we use the default orchestration modelpreferences.
|
||||
let orchestrator_message =
|
||||
match convert_to_orchestrator_preferences(usage_preferences_from_request) {
|
||||
Some(prefs) => generate_orchestrator_message(&prefs, &selected_conversation_list),
|
||||
None => generate_orchestrator_message(
|
||||
&self.agent_orchestration_json_str,
|
||||
&selected_conversation_list,
|
||||
),
|
||||
};
|
||||
let routes_block = match convert_to_orchestrator_preferences(usage_preferences_from_request)
|
||||
{
|
||||
Some(prefs) => prefs,
|
||||
None => self.agent_orchestration_json_str.clone(),
|
||||
};
|
||||
let orchestrator_message = generate_orchestrator_message(
|
||||
&routes_block,
|
||||
&self.skills_catalog_json_str,
|
||||
&selected_conversation_list,
|
||||
);
|
||||
|
||||
ChatCompletionsRequest {
|
||||
model: self.orchestration_model.clone(),
|
||||
|
|
@ -316,7 +407,7 @@ impl OrchestratorModel for OrchestratorModelV1 {
|
|||
&self,
|
||||
content: &str,
|
||||
usage_preferences: &Option<Vec<AgentUsagePreference>>,
|
||||
) -> Result<Option<Vec<(String, String)>>> {
|
||||
) -> Result<Option<OrchestratorSelection>> {
|
||||
if content.is_empty() {
|
||||
return Ok(None);
|
||||
}
|
||||
|
|
@ -326,20 +417,21 @@ impl OrchestratorModel for OrchestratorModelV1 {
|
|||
|
||||
let selected_routes = orchestrator_response.route.unwrap_or_default();
|
||||
|
||||
// Filter out empty routes
|
||||
let valid_routes: Vec<String> = selected_routes
|
||||
.into_iter()
|
||||
.filter(|route| !route.is_empty())
|
||||
.collect();
|
||||
|
||||
if valid_routes.is_empty() {
|
||||
return Ok(None);
|
||||
}
|
||||
let selected_skills: Vec<String> = orchestrator_response
|
||||
.skills
|
||||
.unwrap_or_default()
|
||||
.into_iter()
|
||||
.filter(|s| !s.is_empty())
|
||||
.collect();
|
||||
|
||||
let mut result: Vec<(String, String)> = Vec::new();
|
||||
let mut routes: Vec<(String, String)> = Vec::new();
|
||||
|
||||
if let Some(usage_preferences) = usage_preferences {
|
||||
// If usage preferences are defined, we need to find the model that matches each selected route
|
||||
for selected_route in valid_routes {
|
||||
let model_name: Option<String> = usage_preferences
|
||||
.iter()
|
||||
|
|
@ -351,7 +443,7 @@ impl OrchestratorModel for OrchestratorModelV1 {
|
|||
.map(|pref| pref.model.clone());
|
||||
|
||||
if let Some(model_name) = model_name {
|
||||
result.push((selected_route, model_name));
|
||||
routes.push((selected_route, model_name));
|
||||
} else {
|
||||
warn!(
|
||||
route = %selected_route,
|
||||
|
|
@ -361,14 +453,13 @@ impl OrchestratorModel for OrchestratorModelV1 {
|
|||
}
|
||||
}
|
||||
} else {
|
||||
// If no usage preferences are passed in request then use the default orchestration model preferences
|
||||
for selected_route in valid_routes {
|
||||
if let Some(model) = self
|
||||
.agent_orchestration_to_model_map
|
||||
.get(&selected_route)
|
||||
.cloned()
|
||||
{
|
||||
result.push((selected_route, model));
|
||||
routes.push((selected_route, model));
|
||||
} else {
|
||||
warn!(
|
||||
route = %selected_route,
|
||||
|
|
@ -379,11 +470,14 @@ impl OrchestratorModel for OrchestratorModelV1 {
|
|||
}
|
||||
}
|
||||
|
||||
if result.is_empty() {
|
||||
if routes.is_empty() && selected_skills.is_empty() {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
Ok(Some(result))
|
||||
Ok(Some(OrchestratorSelection {
|
||||
routes,
|
||||
skills: selected_skills,
|
||||
}))
|
||||
}
|
||||
|
||||
fn get_model_name(&self) -> String {
|
||||
|
|
@ -391,7 +485,11 @@ impl OrchestratorModel for OrchestratorModelV1 {
|
|||
}
|
||||
}
|
||||
|
||||
fn generate_orchestrator_message(prefs: &str, selected_conversation_list: &Vec<Message>) -> String {
|
||||
fn generate_orchestrator_message(
|
||||
prefs: &str,
|
||||
skills_catalog: &str,
|
||||
selected_conversation_list: &Vec<Message>,
|
||||
) -> String {
|
||||
// Format conversation with 4-space indentation (equivalent to Python's json.dumps(obj, indent=4))
|
||||
let formatter = serde_json::ser::PrettyFormatter::with_indent(b" ");
|
||||
let mut conversation_buf = Vec::new();
|
||||
|
|
@ -399,9 +497,19 @@ fn generate_orchestrator_message(prefs: &str, selected_conversation_list: &Vec<M
|
|||
SerializeTrait::serialize(&selected_conversation_list, &mut serializer).unwrap();
|
||||
let conversation_json = String::from_utf8(conversation_buf).unwrap_or_default();
|
||||
|
||||
ARCH_ORCHESTRATOR_V1_SYSTEM_PROMPT
|
||||
let template = if skills_catalog.is_empty() {
|
||||
ARCH_ORCHESTRATOR_V1_SYSTEM_PROMPT
|
||||
} else {
|
||||
ARCH_ORCHESTRATOR_V1_SYSTEM_PROMPT_WITH_SKILLS
|
||||
};
|
||||
|
||||
let mut out = template
|
||||
.replace("{routes}", prefs)
|
||||
.replace("{conversation}", &conversation_json)
|
||||
.replace("{conversation}", &conversation_json);
|
||||
if !skills_catalog.is_empty() {
|
||||
out = out.replace("{skills}", skills_catalog);
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
fn convert_to_orchestrator_preferences(
|
||||
|
|
@ -1349,23 +1457,30 @@ If no routes are needed, return an empty list for `route`.
|
|||
let orchestrator =
|
||||
OrchestratorModelV1::new(agent_orchestrations, "test-model".to_string(), 2000);
|
||||
|
||||
fn routes(pairs: &[(&str, &str)]) -> OrchestratorSelection {
|
||||
OrchestratorSelection {
|
||||
routes: pairs
|
||||
.iter()
|
||||
.map(|(r, m)| (r.to_string(), m.to_string()))
|
||||
.collect(),
|
||||
skills: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
// Case 1: Valid JSON with single route in array
|
||||
let input = r#"{"route": ["Image generation"]}"#;
|
||||
let result = orchestrator.parse_response(input, &None).unwrap();
|
||||
assert_eq!(
|
||||
result,
|
||||
Some(vec![("Image generation".to_string(), "gpt-4o".to_string())])
|
||||
);
|
||||
assert_eq!(result, Some(routes(&[("Image generation", "gpt-4o")])));
|
||||
|
||||
// Case 2: Valid JSON with multiple routes in array
|
||||
let input = r#"{"route": ["Image generation", "Code generation"]}"#;
|
||||
let result = orchestrator.parse_response(input, &None).unwrap();
|
||||
assert_eq!(
|
||||
result,
|
||||
Some(vec![
|
||||
("Image generation".to_string(), "gpt-4o".to_string()),
|
||||
("Code generation".to_string(), "gpt-4o".to_string())
|
||||
])
|
||||
Some(routes(&[
|
||||
("Image generation", "gpt-4o"),
|
||||
("Code generation", "gpt-4o")
|
||||
]))
|
||||
);
|
||||
|
||||
// Case 3: Valid JSON with empty array
|
||||
|
|
@ -1396,14 +1511,65 @@ If no routes are needed, return an empty list for `route`.
|
|||
// Case 7: Single quotes and \n in JSON
|
||||
let input = "{'route': ['Image generation']}\\n";
|
||||
let result = orchestrator.parse_response(input, &None).unwrap();
|
||||
assert_eq!(
|
||||
result,
|
||||
Some(vec![("Image generation".to_string(), "gpt-4o".to_string())])
|
||||
);
|
||||
assert_eq!(result, Some(routes(&[("Image generation", "gpt-4o")])));
|
||||
|
||||
// Case 8: Array with unknown route (not in orchestrations map)
|
||||
let input = r#"{"route": ["Unknown route"]}"#;
|
||||
let result = orchestrator.parse_response(input, &None).unwrap();
|
||||
assert_eq!(result, None);
|
||||
|
||||
// Case 9: Routes plus selected skills are propagated through.
|
||||
let input = r#"{"route": ["Image generation"], "skills": ["pdf-processing"]}"#;
|
||||
let result = orchestrator.parse_response(input, &None).unwrap().unwrap();
|
||||
assert_eq!(result.routes.len(), 1);
|
||||
assert_eq!(result.skills, vec!["pdf-processing".to_string()]);
|
||||
|
||||
// Case 10: Skills-only selection (no routes) still surfaces as Some.
|
||||
let input = r#"{"route": [], "skills": ["pdf-processing"]}"#;
|
||||
let result = orchestrator.parse_response(input, &None).unwrap().unwrap();
|
||||
assert!(result.routes.is_empty());
|
||||
assert_eq!(result.skills, vec!["pdf-processing".to_string()]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_system_prompt_with_skills_block() {
|
||||
let orchestrator = OrchestratorModelV1::with_skills(
|
||||
HashMap::from([(
|
||||
"gpt-4o".to_string(),
|
||||
vec![OrchestrationPreference {
|
||||
name: "Image generation".to_string(),
|
||||
description: "generating image".to_string(),
|
||||
}],
|
||||
)]),
|
||||
vec![SkillRef {
|
||||
name: "pdf-processing".to_string(),
|
||||
description: "Extract structured data from PDFs.".to_string(),
|
||||
path: None,
|
||||
base_dir: None,
|
||||
body: None,
|
||||
scope: None,
|
||||
compatibility: None,
|
||||
license: None,
|
||||
metadata: None,
|
||||
allowed_tools: None,
|
||||
}],
|
||||
"test-model".to_string(),
|
||||
usize::MAX,
|
||||
);
|
||||
|
||||
let conversation: Vec<Message> = serde_json::from_str(
|
||||
r#"[{"role": "user", "content": "extract the invoice totals from this pdf"}]"#,
|
||||
)
|
||||
.unwrap();
|
||||
let req = orchestrator.generate_request(&conversation, &None);
|
||||
let prompt = req.messages[0].content.extract_text();
|
||||
|
||||
assert!(prompt.contains("<skills>"));
|
||||
assert!(prompt.contains("</skills>"));
|
||||
assert!(prompt.contains(r#""name": "pdf-processing""#));
|
||||
assert!(prompt.contains("Extract structured data from PDFs."));
|
||||
// Response format documentation must mention the `skills` array
|
||||
// so the orchestrator emits it.
|
||||
assert!(prompt.contains(r#""skills""#));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -33,6 +33,7 @@ mod tests {
|
|||
"openai/gpt-4o".to_string(),
|
||||
"openai/gpt-4o-mini".to_string(),
|
||||
],
|
||||
skills: None,
|
||||
selection_policy: SelectionPolicy {
|
||||
prefer: SelectionPreference::None,
|
||||
},
|
||||
|
|
@ -44,6 +45,7 @@ mod tests {
|
|||
"anthropic/claude-3-sonnet".to_string(),
|
||||
"openai/gpt-4o-mini".to_string(),
|
||||
],
|
||||
skills: None,
|
||||
selection_policy: SelectionPolicy {
|
||||
prefer: SelectionPreference::None,
|
||||
},
|
||||
|
|
|
|||
|
|
@ -163,6 +163,12 @@ pub struct TopLevelRoutingPreference {
|
|||
pub name: String,
|
||||
pub description: String,
|
||||
pub models: Vec<String>,
|
||||
/// Agent Skills associated with this route. When Plano-Orchestrator
|
||||
/// selects this route, every skill listed here is also offered to the
|
||||
/// orchestrator in the `<skills>` block; selected skills have their
|
||||
/// SKILL.md body prepended to the upstream system prompt.
|
||||
#[serde(default)]
|
||||
pub skills: Option<Vec<String>>,
|
||||
#[serde(default)]
|
||||
pub selection_policy: SelectionPolicy,
|
||||
}
|
||||
|
|
@ -224,6 +230,17 @@ pub struct Configuration {
|
|||
pub state_storage: Option<StateStorageConfig>,
|
||||
pub routing_preferences: Option<Vec<TopLevelRoutingPreference>>,
|
||||
pub model_metrics_sources: Option<Vec<MetricsSource>>,
|
||||
/// Agent Skills (https://agentskills.io) installed for this project.
|
||||
///
|
||||
/// The Plano CLI discovers `.plano/skills/<name>/SKILL.md` files at render
|
||||
/// time and materializes them into this list with `body` already loaded so
|
||||
/// downstream consumers do not need filesystem access. Skills are scoped
|
||||
/// to specific routes via `routing_preferences[].skills`; Plano-Orchestrator
|
||||
/// receives a `<skills>` block alongside `<routes>` for any skills attached
|
||||
/// to candidate routes, and selected skills have their SKILL.md body
|
||||
/// injected into the upstream system prompt.
|
||||
#[serde(default)]
|
||||
pub skills: Option<Vec<SkillRef>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
|
||||
|
|
@ -611,6 +628,45 @@ pub struct PromptTarget {
|
|||
pub auto_llm_dispatch_on_response: Option<bool>,
|
||||
}
|
||||
|
||||
/// An Agent Skill (https://agentskills.io) as materialized by the Plano CLI.
|
||||
///
|
||||
/// At runtime brightstaff and the WASM filters reason over the catalog
|
||||
/// (`name` + `description`) and, when a skill is selected, inject the
|
||||
/// pre-loaded `body` into the downstream system prompt.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
pub struct SkillRef {
|
||||
pub name: String,
|
||||
pub description: String,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub path: Option<String>,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub base_dir: Option<String>,
|
||||
/// Full SKILL.md markdown body (post-frontmatter). Inlined here at render
|
||||
/// time so the WASM sandbox does not need filesystem access.
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub body: Option<String>,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub scope: Option<String>,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub compatibility: Option<String>,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub license: Option<String>,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub metadata: Option<HashMap<String, String>>,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub allowed_tools: Option<String>,
|
||||
}
|
||||
|
||||
impl SkillRef {
|
||||
/// Best-effort short summary suitable for the `<skills>` block sent to
|
||||
/// Plano-Orchestrator: only the public-facing description, never the
|
||||
/// full SKILL.md body. The body is injected separately, after a skill
|
||||
/// has been selected.
|
||||
pub fn catalog_description(&self) -> &str {
|
||||
&self.description
|
||||
}
|
||||
}
|
||||
|
||||
// convert PromptTarget to ChatCompletionTool
|
||||
impl From<&PromptTarget> for ChatCompletionTool {
|
||||
fn from(val: &PromptTarget) -> Self {
|
||||
|
|
@ -807,4 +863,34 @@ disable_signals: false
|
|||
let overrides: super::Overrides = serde_yaml::from_str(yaml_missing).unwrap();
|
||||
assert_eq!(overrides.disable_signals, None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_top_level_routing_preference_skills_deserialize() {
|
||||
let yaml = r#"
|
||||
name: code review
|
||||
description: reviewing, analyzing, and suggesting improvements to existing code
|
||||
models:
|
||||
- openai/gpt-4o
|
||||
skills:
|
||||
- code-review-skill
|
||||
"#;
|
||||
let pref: super::TopLevelRoutingPreference = serde_yaml::from_str(yaml).unwrap();
|
||||
assert_eq!(pref.name, "code review");
|
||||
assert_eq!(
|
||||
pref.skills.as_deref(),
|
||||
Some(&["code-review-skill".to_string()][..])
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_top_level_routing_preference_skills_optional() {
|
||||
let yaml = r#"
|
||||
name: code generation
|
||||
description: generating new code
|
||||
models:
|
||||
- openai/gpt-4o
|
||||
"#;
|
||||
let pref: super::TopLevelRoutingPreference = serde_yaml::from_str(yaml).unwrap();
|
||||
assert!(pref.skills.is_none());
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ pub mod path;
|
|||
pub mod pii;
|
||||
pub mod ratelimit;
|
||||
pub mod routing;
|
||||
pub mod skills_runtime;
|
||||
pub mod stats;
|
||||
pub mod tokenizer;
|
||||
pub mod traces;
|
||||
|
|
|
|||
215
crates/common/src/skills_runtime.rs
Normal file
215
crates/common/src/skills_runtime.rs
Normal file
|
|
@ -0,0 +1,215 @@
|
|||
//! Runtime helpers for handling Agent Skills selected by Plano-Orchestrator.
|
||||
//!
|
||||
//! These functions live in `common` (rather than `brightstaff` or a WASM
|
||||
//! crate) so they can be unit-tested on the native target without dragging
|
||||
//! in proxy-wasm host-call symbols or tokio runtime dependencies.
|
||||
|
||||
use crate::configuration::{SkillRef, TopLevelRoutingPreference};
|
||||
|
||||
/// Filter `skills` down to the subset attached to `route_name` via
|
||||
/// `routing_preferences[].skills`. When the selected route has no `skills:`
|
||||
/// list, returns an empty vector — skills are scoped to routes, not global.
|
||||
///
|
||||
/// `routing_preferences` is the source of truth for which skills are even
|
||||
/// eligible for the orchestrator to activate on a given route.
|
||||
pub fn skills_for_route<'a>(
|
||||
skills: &'a [SkillRef],
|
||||
routing_preferences: &[TopLevelRoutingPreference],
|
||||
route_name: &str,
|
||||
) -> Vec<&'a SkillRef> {
|
||||
let Some(route) = routing_preferences.iter().find(|p| p.name == route_name) else {
|
||||
return Vec::new();
|
||||
};
|
||||
let Some(allow) = route.skills.as_ref() else {
|
||||
return Vec::new();
|
||||
};
|
||||
let mut out: Vec<&SkillRef> = Vec::with_capacity(allow.len());
|
||||
for name in allow {
|
||||
if let Some(skill) = skills.iter().find(|s| &s.name == name) {
|
||||
out.push(skill);
|
||||
}
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
/// Resolve a list of orchestrator-selected skill names to their `SkillRef`s.
|
||||
/// Unknown names are dropped silently — the orchestrator can hallucinate.
|
||||
/// Results are deduplicated by name, preserving the order Plano-Orchestrator
|
||||
/// returned.
|
||||
pub fn resolve_selected_skills<'a>(
|
||||
skills: &'a [SkillRef],
|
||||
selected_names: &[String],
|
||||
) -> Vec<&'a SkillRef> {
|
||||
let mut out: Vec<&SkillRef> = Vec::with_capacity(selected_names.len());
|
||||
for name in selected_names {
|
||||
if out.iter().any(|s| &s.name == name) {
|
||||
continue;
|
||||
}
|
||||
if let Some(skill) = skills.iter().find(|s| &s.name == name) {
|
||||
out.push(skill);
|
||||
}
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
/// Append the bodies of activated skills to a system prompt, wrapped in
|
||||
/// `<skill_content name="...">` tags so a future context-management pass can
|
||||
/// recognize and recompact them.
|
||||
///
|
||||
/// Returns `None` only if no base system prompt was supplied and no skills
|
||||
/// were activated. When skills are present the wrapper text always appears so
|
||||
/// the downstream model receives a clear, well-structured instruction block.
|
||||
pub fn augment_system_prompt_with_skills(
|
||||
base_system_prompt: Option<String>,
|
||||
activated_skills: &[&SkillRef],
|
||||
) -> Option<String> {
|
||||
if activated_skills.is_empty() {
|
||||
return base_system_prompt;
|
||||
}
|
||||
let mut buf = String::new();
|
||||
if let Some(base) = base_system_prompt {
|
||||
if !base.is_empty() {
|
||||
buf.push_str(&base);
|
||||
buf.push('\n');
|
||||
buf.push('\n');
|
||||
}
|
||||
}
|
||||
buf.push_str(
|
||||
"The following Agent Skills have been activated for this request. \
|
||||
Follow their instructions when relevant; resolve relative paths \
|
||||
against each skill's base directory.\n\n",
|
||||
);
|
||||
for skill in activated_skills {
|
||||
buf.push_str(&format!("<skill_content name=\"{}\"", skill.name));
|
||||
if let Some(base_dir) = skill.base_dir.as_deref() {
|
||||
buf.push_str(&format!(" base_dir=\"{}\"", base_dir));
|
||||
}
|
||||
buf.push_str(">\n");
|
||||
if let Some(body) = skill.body.as_deref() {
|
||||
buf.push_str(body.trim_end());
|
||||
buf.push('\n');
|
||||
} else {
|
||||
buf.push_str(&format!("(skill description) {}\n", skill.description));
|
||||
}
|
||||
buf.push_str("</skill_content>\n\n");
|
||||
}
|
||||
Some(buf.trim_end().to_string())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::configuration::SelectionPolicy;
|
||||
|
||||
fn skill(name: &str, body: &str) -> SkillRef {
|
||||
SkillRef {
|
||||
name: name.to_string(),
|
||||
description: format!("desc for {}", name),
|
||||
path: Some(format!("/skills/{}/SKILL.md", name)),
|
||||
base_dir: Some(format!("/skills/{}", name)),
|
||||
body: Some(body.to_string()),
|
||||
scope: Some("project".to_string()),
|
||||
compatibility: None,
|
||||
license: None,
|
||||
metadata: None,
|
||||
allowed_tools: None,
|
||||
}
|
||||
}
|
||||
|
||||
fn route(name: &str, skill_names: Option<Vec<&str>>) -> TopLevelRoutingPreference {
|
||||
TopLevelRoutingPreference {
|
||||
name: name.to_string(),
|
||||
description: format!("desc for {}", name),
|
||||
models: vec!["openai/gpt-4o".to_string()],
|
||||
skills: skill_names.map(|v| v.into_iter().map(String::from).collect()),
|
||||
selection_policy: SelectionPolicy::default(),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn skills_for_route_returns_attached_skills() {
|
||||
let catalog = vec![
|
||||
skill("pdf-processing", "extract"),
|
||||
skill("code-review", "review"),
|
||||
];
|
||||
let routes = vec![
|
||||
route("code review", Some(vec!["code-review"])),
|
||||
route("doc work", Some(vec!["pdf-processing"])),
|
||||
];
|
||||
let resolved = skills_for_route(&catalog, &routes, "code review");
|
||||
assert_eq!(resolved.len(), 1);
|
||||
assert_eq!(resolved[0].name, "code-review");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn skills_for_route_empty_when_route_has_no_skills_list() {
|
||||
let catalog = vec![skill("pdf-processing", "extract")];
|
||||
let routes = vec![route("code review", None)];
|
||||
let resolved = skills_for_route(&catalog, &routes, "code review");
|
||||
assert!(resolved.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn skills_for_route_empty_when_route_missing() {
|
||||
let catalog = vec![skill("pdf-processing", "extract")];
|
||||
let routes = vec![route("code review", Some(vec!["pdf-processing"]))];
|
||||
let resolved = skills_for_route(&catalog, &routes, "no-such-route");
|
||||
assert!(resolved.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn skills_for_route_drops_unknown_skill_names() {
|
||||
let catalog = vec![skill("pdf-processing", "extract")];
|
||||
let routes = vec![route(
|
||||
"code review",
|
||||
Some(vec!["pdf-processing", "ghost-skill"]),
|
||||
)];
|
||||
let resolved = skills_for_route(&catalog, &routes, "code review");
|
||||
assert_eq!(resolved.len(), 1);
|
||||
assert_eq!(resolved[0].name, "pdf-processing");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn resolve_selected_skills_drops_unknown_and_dedupes() {
|
||||
let catalog = vec![
|
||||
skill("pdf-processing", "extract"),
|
||||
skill("code-review", "review"),
|
||||
];
|
||||
let names = vec![
|
||||
"code-review".to_string(),
|
||||
"ghost".to_string(),
|
||||
"code-review".to_string(),
|
||||
"pdf-processing".to_string(),
|
||||
];
|
||||
let resolved = resolve_selected_skills(&catalog, &names);
|
||||
assert_eq!(resolved.len(), 2);
|
||||
assert_eq!(resolved[0].name, "code-review");
|
||||
assert_eq!(resolved[1].name, "pdf-processing");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn augment_passthrough_with_no_skills() {
|
||||
let augmented = augment_system_prompt_with_skills(Some("you are helpful".to_string()), &[]);
|
||||
assert_eq!(augmented.as_deref(), Some("you are helpful"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn augment_includes_skill_bodies() {
|
||||
let s = skill("pdf-processing", "extract text and tables");
|
||||
let augmented =
|
||||
augment_system_prompt_with_skills(Some("you are helpful".to_string()), &[&s])
|
||||
.expect("augmented");
|
||||
assert!(augmented.starts_with("you are helpful"));
|
||||
assert!(augmented.contains("<skill_content name=\"pdf-processing\""));
|
||||
assert!(augmented.contains("extract text and tables"));
|
||||
assert!(augmented.contains("base_dir=\"/skills/pdf-processing\""));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn augment_without_base_prompt_still_works() {
|
||||
let s = skill("code-review", "look at diffs");
|
||||
let augmented = augment_system_prompt_with_skills(None, &[&s]).expect("augmented");
|
||||
assert!(augmented.contains("<skill_content name=\"code-review\""));
|
||||
assert!(augmented.contains("look at diffs"));
|
||||
}
|
||||
}
|
||||
|
|
@ -209,19 +209,15 @@ impl StreamContext {
|
|||
} else {
|
||||
info!("no default prompt target found, forwarding request to upstream llm");
|
||||
let mut messages = Vec::new();
|
||||
// add system prompt
|
||||
match self.system_prompt.as_ref() {
|
||||
None => {}
|
||||
Some(system_prompt) => {
|
||||
let system_prompt_message = Message {
|
||||
role: SYSTEM_ROLE.to_string(),
|
||||
content: Some(ContentType::Text(system_prompt.clone())),
|
||||
model: None,
|
||||
tool_calls: None,
|
||||
tool_call_id: None,
|
||||
};
|
||||
messages.push(system_prompt_message);
|
||||
}
|
||||
if let Some(system_prompt) = self.system_prompt.as_ref().clone() {
|
||||
let system_prompt_message = Message {
|
||||
role: SYSTEM_ROLE.to_string(),
|
||||
content: Some(ContentType::Text(system_prompt)),
|
||||
model: None,
|
||||
tool_calls: None,
|
||||
tool_call_id: None,
|
||||
};
|
||||
messages.push(system_prompt_message);
|
||||
}
|
||||
|
||||
messages.append(
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue