From 5e8d27fd3c088d40a1c1ffb8697f784a4ce10ccf Mon Sep 17 00:00:00 2001 From: Spherrrical Date: Mon, 18 May 2026 12:39:21 -0700 Subject: [PATCH] fix(skills): honor skills-only orchestrator decisions, dedupe runtime helpers, warn on dropped picks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses the code-review findings on 7f5bf641: - Honor skills-only decisions: RouteDecision.route_name is now Option and the orchestrator emits a decision when routes is empty but skills is non-empty. The LLM handler falls back to the originally-requested model and still injects activated skill bodies, matching the contract in docs/source/resources/skills.rst. - Warn on allow-list misses: resolve_for_route now returns a SkillResolution that splits drops into "not allow-listed for this route" vs "not in catalog (hallucinated)". brightstaff logs each bucket so misconfigured routing_preferences[].skills lists become visible instead of vanishing silently. - Consolidate runtime: common::skills_runtime is now the single source of truth (referenced_skills_catalog, resolve_for_route, resolve_selected_skills, augment_system_prompt_with_skills). brightstaff drops its local re-implementations and calls into common. - Tests: 11 new tests in common::skills_runtime (catalog union, allow-list intersection, dedup, hallucination handling, XML escaping, body size cap) and 6 new tests in brightstaff::handlers::llm::model_selection cover inject_activated_skills_into_request, including the first-system-message rule and the Parts->Text flatten — both now documented on the function. - Cap skill body size at 32 KiB with a UTF-8-safe tail-trim + marker so an oversized SKILL.md cannot blow the downstream context window. - XML-escape skill name and base_dir in the wrapper as defense-in-depth (names are validated upstream, but the wrapper sits inside the system prompt). - Bound find_project_root at \$HOME plus a 30-parent depth cap so CLI invocations outside HOME no longer walk to /. --- cli/planoai/skills.py | 51 ++- .../src/handlers/llm/model_selection.rs | 227 +++++++++++- crates/brightstaff/src/router/orchestrator.rs | 174 ++++++---- crates/common/src/skills_runtime.rs | 326 ++++++++++++++---- 4 files changed, 639 insertions(+), 139 deletions(-) diff --git a/cli/planoai/skills.py b/cli/planoai/skills.py index f8a4a982..367fde85 100644 --- a/cli/planoai/skills.py +++ b/cli/planoai/skills.py @@ -102,24 +102,55 @@ class Skill: } -def find_project_root(start: Path | None = None) -> Path: - """Walk up from `start` looking for `.plano/`, then `.git/`. +_MAX_PROJECT_ROOT_WALK_DEPTH = 30 - Falls back to `start` (or cwd) if nothing is found. This matches how - `npx skills add` chooses a project root. + +def find_project_root(start: Path | None = None) -> Path: + """Walk up from ``start`` looking for ``.plano/``, then ``.git/``. + + The walk is bounded so a CLI invocation in a deeply-nested or + pathological directory does not iterate all the way to ``/`` on every + call. Two bounds apply, whichever fires first: + + * **$HOME**: when ``start`` is inside the user's home directory, the + walk stops at ``$HOME`` itself. We never inspect siblings of + ``$HOME`` like ``/Users`` — picking up a stray ``.git/`` there would + be more surprising than helpful. + * **Hard depth cap** (``_MAX_PROJECT_ROOT_WALK_DEPTH`` parents): a + defensive fallback for paths outside ``$HOME`` (e.g. ``/tmp/...``) + so we still terminate quickly on absurdly deep trees. + + Falls back to ``start`` (or cwd) if nothing is found. This matches how + ``npx skills add`` chooses a project root. """ base = Path(start or Path.cwd()).resolve() - cur = base - while cur != cur.parent: + + try: + home = Path(os.path.expanduser("~")).resolve() + except (OSError, RuntimeError): + home = None + + def _ancestors(start_dir: Path) -> list[Path]: + out: list[Path] = [] + cur = start_dir + for _ in range(_MAX_PROJECT_ROOT_WALK_DEPTH + 1): + out.append(cur) + if home is not None and cur == home: + break + if cur == cur.parent: + break + cur = cur.parent + return out + + ancestors = _ancestors(base) + + for cur in ancestors: if (cur / ".plano").exists(): return cur - cur = cur.parent - cur = base - while cur != cur.parent: + for cur in ancestors: if (cur / ".git").exists(): return cur - cur = cur.parent return base diff --git a/crates/brightstaff/src/handlers/llm/model_selection.rs b/crates/brightstaff/src/handlers/llm/model_selection.rs index 5418c584..088bb288 100644 --- a/crates/brightstaff/src/handlers/llm/model_selection.rs +++ b/crates/brightstaff/src/handlers/llm/model_selection.rs @@ -137,6 +137,34 @@ pub async fn router_chat_get_upstream_model( match routing_result { Ok(route) => match route { Some(decision) => { + // Skills-only decision (no route, no models) -> fall through + // to the "none" sentinel so the original / aliased model is + // used, but propagate activated_skills so they still get + // injected. Documented at docs/source/resources/skills.rst. + if decision.route_name.is_none() && decision.models.is_empty() { + current_span.record("route.selected_model", "none"); + info!( + skills = ?decision + .activated_skills + .iter() + .map(|s| s.name.as_str()) + .collect::>(), + "no route determined; activating skills against default model" + ); + bs_metrics::record_router_decision( + route_label, + "none", + true, + determination_elapsed, + ); + return Ok(RoutingResult { + model_name: "none".to_string(), + models: vec!["none".to_string()], + route_name: None, + activated_skills: decision.activated_skills, + }); + } + let model_name = decision.models.first().cloned().unwrap_or_default(); current_span.record("route.selected_model", model_name.as_str()); bs_metrics::record_router_decision( @@ -148,7 +176,7 @@ pub async fn router_chat_get_upstream_model( Ok(RoutingResult { model_name, models: decision.models, - route_name: Some(decision.route_name), + route_name: decision.route_name, activated_skills: decision.activated_skills, }) } @@ -186,11 +214,29 @@ pub async fn router_chat_get_upstream_model( /// Prepend the bodies of `activated_skills` to the system prompt of the /// upstream request so the chosen LLM has access to each skill's instructions. /// Works across every provider variant by going through the OpenAI message -/// shape (`get_messages`/`set_messages`). +/// shape (`get_messages` / `set_messages`). /// -/// When there is already a leading system message we augment it in place; -/// otherwise a new system message is inserted at position 0. No-op when -/// `activated_skills` is empty. +/// # Behavior contract +/// +/// * **No-op** when `activated_skills` is empty. +/// * **Augments the first system message in place** when one is present at +/// any position in `messages` (typically index 0, but we look for the +/// first `Role::System` rather than assuming). Subsequent system messages +/// (rare but legal for some providers) are left untouched. We pick "first" +/// so the skill content appears as early in the prompt as possible — +/// models weight earlier system content more heavily and an Anthropic +/// tools+system combo is conventionally a single leading block. +/// * **Inserts a new leading system message** at index 0 when no system +/// message exists in the request. +/// * **Flattens `MessageContent::Parts` system content to a single +/// `MessageContent::Text`** when extracting the base prompt. This is +/// intentional: every supported upstream API accepts text in system +/// messages, and the alternative — preserving each `ContentPart` and +/// appending a new text part — fails on providers that disallow +/// multi-part system content. The trade-off is that non-text system parts +/// (e.g. images attached to a system message, which no production +/// provider supports anyway) are dropped on the floor. Verified by +/// `flattens_parts_system_content` below. pub fn inject_activated_skills_into_request( client_request: &mut ProviderRequestType, activated_skills: &[SkillRef], @@ -240,3 +286,174 @@ pub fn inject_activated_skills_into_request( client_request.set_messages(&messages); } + +#[cfg(test)] +mod tests { + use super::*; + use hermesllm::apis::openai::{ChatCompletionsRequest, ContentPart}; + + fn req_with_messages(msgs: Vec) -> ProviderRequestType { + ProviderRequestType::ChatCompletionsRequest(ChatCompletionsRequest { + model: "test".to_string(), + messages: msgs, + ..Default::default() + }) + } + + fn user_msg(text: &str) -> Message { + Message { + role: Role::User, + content: Some(MessageContent::Text(text.to_string())), + name: None, + tool_calls: None, + tool_call_id: None, + } + } + + fn system_msg(text: &str) -> Message { + Message { + role: Role::System, + content: Some(MessageContent::Text(text.to_string())), + name: None, + tool_calls: None, + tool_call_id: None, + } + } + + fn skill(name: &str, body: &str) -> SkillRef { + SkillRef { + name: name.to_string(), + description: format!("desc for {name}"), + path: None, + base_dir: None, + body: Some(body.to_string()), + scope: Some("project".to_string()), + compatibility: None, + license: None, + metadata: None, + allowed_tools: None, + } + } + + fn first_system_text(req: &ProviderRequestType) -> String { + req.get_messages() + .iter() + .find(|m| m.role == Role::System) + .and_then(|m| m.content.as_ref()) + .map(|c| c.extract_text()) + .unwrap_or_default() + } + + #[test] + fn injects_new_system_message_when_none_present() { + let mut req = req_with_messages(vec![user_msg("hi")]); + inject_activated_skills_into_request(&mut req, &[skill("pdf", "process pdfs")]); + let messages = req.get_messages(); + assert_eq!(messages.len(), 2); + assert_eq!(messages[0].role, Role::System); + let txt = first_system_text(&req); + assert!(txt.contains(" { + assert!(t.contains("be brief")); + assert!(t.contains(" and polite")); + assert!(t.contains(" panic!("expected flattened text, got Parts"), + } + } + + #[test] + fn injects_in_orchestrator_order_for_multiple_skills() { + let mut req = req_with_messages(vec![user_msg("hi")]); + inject_activated_skills_into_request( + &mut req, + &[skill("first", "alpha-body"), skill("second", "beta-body")], + ); + let txt = first_system_text(&req); + let first_pos = txt.find("alpha-body").expect("first skill body present"); + let second_pos = txt.find("beta-body").expect("second skill body present"); + assert!( + first_pos < second_pos, + "skills should appear in the order they were activated" + ); + } +} diff --git a/crates/brightstaff/src/router/orchestrator.rs b/crates/brightstaff/src/router/orchestrator.rs index 31bf8e99..f27001c8 100644 --- a/crates/brightstaff/src/router/orchestrator.rs +++ b/crates/brightstaff/src/router/orchestrator.rs @@ -5,13 +5,14 @@ use common::{ AgentUsagePreference, OrchestrationPreference, SkillRef, TopLevelRoutingPreference, }, consts::{ARCH_PROVIDER_HINT_HEADER, REQUEST_ID_HEADER}, + skills_runtime::{referenced_skills_catalog, resolve_for_route, resolve_selected_skills}, }; use hermesllm::apis::openai::Message; use hyper::header; use opentelemetry::global; use opentelemetry_http::HeaderInjector; use thiserror::Error; -use tracing::{debug, info}; +use tracing::{debug, info, warn}; use super::http::{self, post_and_extract_content}; use super::model_metrics::ModelMetricsService; @@ -41,14 +42,27 @@ pub struct OrchestratorService { tenant_header: Option, } -/// Result of `determine_route`: which route was picked, the ranked candidate -/// models for that route, and the Agent Skill bodies the orchestrator chose -/// to activate alongside it. Skills are resolved against -/// `routing_preferences[].skills`, so unknown / cross-route names are -/// silently dropped. +/// Result of `determine_route`: which route was picked (if any), the +/// ranked candidate models for that route, and the Agent Skill bodies the +/// orchestrator chose to activate alongside it. +/// +/// Two valid shapes: +/// +/// * **Route + skills (typical):** `route_name = Some(...)`, `models` +/// non-empty, `activated_skills` may be non-empty. Skills are resolved +/// against `routing_preferences[].skills`, so picks that aren't +/// allow-listed for the route are dropped with a `warn!`. +/// * **Skills-only:** `route_name = None`, `models` empty, +/// `activated_skills` non-empty. The orchestrator decided no route +/// needed to change but the user's intent matches one or more skills. +/// Per `docs/source/resources/skills.rst`, the request falls back to the +/// originally-requested model and the skill bodies are injected the +/// same way. Allow-list filtering uses the catalog union (effectively +/// the catalog itself, which is pre-filtered to skills referenced by +/// some route). #[derive(Debug, Clone, Default)] pub struct RouteDecision { - pub route_name: String, + pub route_name: Option, pub models: Vec, pub activated_skills: Vec, } @@ -141,7 +155,7 @@ impl OrchestratorService { prefs.into_iter().map(|p| (p.name.clone(), p)).collect() }); - let skills_catalog = build_skills_catalog_for_routes( + let skills_catalog = referenced_skills_catalog( skills_catalog.as_deref().unwrap_or(&[]), &top_level_preferences, ); @@ -279,6 +293,7 @@ impl OrchestratorService { } if let Some((route_name, _)) = selection.routes.first() { + // Route + (optional) skills path. let top_pref = inline_top_map .as_ref() .and_then(|m| m.get(route_name)) @@ -289,19 +304,43 @@ impl OrchestratorService { Some(svc) => svc.rank_models(&pref.models, &pref.selection_policy).await, None => pref.models.clone(), }; - let activated_skills = resolve_activated_skills( + let resolution = resolve_for_route( &self.skills_catalog, pref.skills.as_deref().unwrap_or(&[]), &selection.skills, ); + log_skill_drops(route_name, &resolution); + let activated_skills: Vec = + resolution.activated.into_iter().cloned().collect(); Some(RouteDecision { - route_name: route_name.clone(), + route_name: Some(route_name.clone()), models: ranked, activated_skills, }) } else { None } + } else if !selection.skills.is_empty() { + // Skills-only path: orchestrator picked no route but flagged + // skills. Per the documented contract the request still goes + // through with the originally-requested model and the skill + // bodies are injected. The catalog itself is the effective + // allow-list (it's already the union across every route's + // allow-list, so anything in it was deemed safe to expose). + let activated: Vec = + resolve_selected_skills(&self.skills_catalog, &selection.skills) + .into_iter() + .cloned() + .collect(); + if activated.is_empty() { + None + } else { + Some(RouteDecision { + route_name: None, + models: Vec::new(), + activated_skills: activated, + }) + } } else { None } @@ -399,59 +438,28 @@ impl OrchestratorService { } } -/// Build the orchestrator-visible skills catalog (deduplicated by name) from -/// the union of every skill name referenced under -/// `routing_preferences[].skills`. Skills that are not referenced by any -/// route are excluded — they would just clutter the prompt with no way for -/// the orchestrator to attach them to a route. -fn build_skills_catalog_for_routes( - catalog: &[SkillRef], - routes: &HashMap, -) -> Vec { - let mut referenced: std::collections::HashSet<&str> = std::collections::HashSet::new(); - for route in routes.values() { - if let Some(names) = route.skills.as_ref() { - for name in names { - referenced.insert(name.as_str()); - } - } +/// Emit `warn!` for any skill names the orchestrator selected but the +/// resolver dropped. Surfacing these is critical for debuggability — a +/// silently-dropped skill is hard to diagnose, and the most common causes +/// (forgetting to add a skill to a route's allow-list, or the orchestrator +/// hallucinating a name) are both fixable once visible. +fn log_skill_drops(route_name: &str, resolution: &common::skills_runtime::SkillResolution<'_>) { + if !resolution.dropped_not_allowed.is_empty() { + warn!( + route = %route_name, + skills = ?resolution.dropped_not_allowed, + "orchestrator selected Agent Skills that are not on this route's allow-list; \ + dropping (add them to routing_preferences[].skills if you want this route to use them)" + ); } - - let mut out: Vec = Vec::new(); - let mut seen: std::collections::HashSet = std::collections::HashSet::new(); - for skill in catalog { - if referenced.contains(skill.name.as_str()) && seen.insert(skill.name.clone()) { - out.push(skill.clone()); - } + if !resolution.dropped_unknown.is_empty() { + warn!( + route = %route_name, + skills = ?resolution.dropped_unknown, + "orchestrator selected Agent Skills that are not in the runtime catalog \ + (likely hallucinated or removed)" + ); } - out -} - -/// Filter the orchestrator-selected skill names down to the SKILL.md bodies -/// allowed for the chosen route, preserving the order the orchestrator -/// returned. Unknown names (either not in the catalog or not allowed by the -/// route) are silently dropped; the orchestrator can hallucinate. -fn resolve_activated_skills( - catalog: &[SkillRef], - route_allowlist: &[String], - selected: &[String], -) -> Vec { - let allowed: std::collections::HashSet<&str> = - route_allowlist.iter().map(String::as_str).collect(); - let mut out: Vec = Vec::with_capacity(selected.len()); - let mut taken: std::collections::HashSet<&str> = std::collections::HashSet::new(); - for name in selected { - if !allowed.contains(name.as_str()) { - continue; - } - if !taken.insert(name.as_str()) { - continue; - } - if let Some(skill) = catalog.iter().find(|s| &s.name == name) { - out.push(skill.clone()); - } - } - out } #[cfg(test)] @@ -536,6 +544,50 @@ mod tests { assert!(svc.get_cached_route("s3", None).await.is_some()); } + // ---- RouteDecision construction ---- + + fn skill_ref(name: &str) -> SkillRef { + SkillRef { + name: name.to_string(), + description: format!("desc for {name}"), + path: None, + base_dir: None, + body: Some(format!("body for {name}")), + scope: Some("project".to_string()), + compatibility: None, + license: None, + metadata: None, + allowed_tools: None, + } + } + + #[test] + fn route_decision_holds_optional_route_name_for_skills_only_path() { + // Regression guard for the docs promise at skills.rst:153-155: a + // skills-only decision must be representable, with no route_name and + // empty models, so the LLM handler falls back to the original model. + let decision = RouteDecision { + route_name: None, + models: Vec::new(), + activated_skills: vec![skill_ref("pdf")], + }; + assert!(decision.route_name.is_none()); + assert!(decision.models.is_empty()); + assert_eq!(decision.activated_skills.len(), 1); + } + + #[test] + fn log_skill_drops_does_not_panic_on_empty_resolution() { + // The logger is fire-and-forget. We can't easily assert on the + // emitted warns here without setting up a tracing subscriber, so the + // contract under test is: empty resolutions are silent (no warn + // attempt). Confidence in the warn paths comes from + // common::skills_runtime tests for resolve_for_route, which is the + // function whose dropped_* lists drive this logger. + let empty = common::skills_runtime::SkillResolution::default(); + log_skill_drops("any", &empty); + } + #[tokio::test] async fn test_cache_update_existing_session_does_not_evict() { let svc = make_orchestrator_service(600, 2); diff --git a/crates/common/src/skills_runtime.rs b/crates/common/src/skills_runtime.rs index 4e9d4aab..e8bd69ee 100644 --- a/crates/common/src/skills_runtime.rs +++ b/crates/common/src/skills_runtime.rs @@ -4,45 +4,117 @@ //! crate) so they can be unit-tested on the native target without dragging //! in proxy-wasm host-call symbols or tokio runtime dependencies. +use std::collections::{HashMap, HashSet}; + use crate::configuration::{SkillRef, TopLevelRoutingPreference}; -/// Filter `skills` down to the subset attached to `route_name` via -/// `routing_preferences[].skills`. When the selected route has no `skills:` -/// list, returns an empty vector — skills are scoped to routes, not global. -/// -/// `routing_preferences` is the source of truth for which skills are even -/// eligible for the orchestrator to activate on a given route. -pub fn skills_for_route<'a>( - skills: &'a [SkillRef], - routing_preferences: &[TopLevelRoutingPreference], - route_name: &str, -) -> Vec<&'a SkillRef> { - let Some(route) = routing_preferences.iter().find(|p| p.name == route_name) else { - return Vec::new(); - }; - let Some(allow) = route.skills.as_ref() else { - return Vec::new(); - }; - let mut out: Vec<&SkillRef> = Vec::with_capacity(allow.len()); - for name in allow { - if let Some(skill) = skills.iter().find(|s| &s.name == name) { - out.push(skill); +/// Upper bound on the byte length of a single skill body the runtime will +/// inject into an upstream system prompt. SKILL.md files are typically a few +/// kilobytes; this guard keeps a single oversized or malicious skill from +/// blowing the downstream model's context window. Bodies longer than this +/// are tail-trimmed with a marker line. ~32 KiB ≈ 8K tokens at the +/// 4-bytes-per-token heuristic used elsewhere in brightstaff. +pub const MAX_SKILL_BODY_BYTES: usize = 32 * 1024; + +const SKILL_BODY_TRUNCATION_MARKER: &str = "\n…[skill body truncated]\n"; + +/// Outcome of resolving a list of orchestrator-selected skill names against +/// a route's `skills:` allow-list and the runtime catalog. Callers should +/// emit `warn!` for each name in `dropped_not_allowed` / `dropped_unknown` +/// so misconfigured allow-lists and hallucinated picks are observable. +#[derive(Debug, Default)] +pub struct SkillResolution<'a> { + /// Skills that survived both the allow-list and catalog filters, in + /// orchestrator-selected order with duplicates removed. + pub activated: Vec<&'a SkillRef>, + /// Names the orchestrator selected that are NOT in the chosen route's + /// `skills:` allow-list. Typically a cross-route mention — the model + /// pulled a skill name from the global catalog that this route did not + /// expose. Callers should `warn!`. + pub dropped_not_allowed: Vec, + /// Names that ARE allow-listed for the route but are missing from the + /// runtime catalog (skill removed / never installed / hallucinated). + pub dropped_unknown: Vec, +} + +/// Build the orchestrator-visible skills catalog from the union of every +/// skill name referenced under `routing_preferences[].skills`. Skills not +/// referenced by any route are excluded — they would just clutter the +/// `` block with no way for the orchestrator to attach them. The +/// result preserves `catalog` order and is deduplicated by name. +pub fn referenced_skills_catalog( + catalog: &[SkillRef], + routes: &HashMap, +) -> Vec { + let mut referenced: HashSet<&str> = HashSet::new(); + for route in routes.values() { + if let Some(names) = route.skills.as_ref() { + for name in names { + referenced.insert(name.as_str()); + } + } + } + + let mut out: Vec = Vec::new(); + let mut seen: HashSet = HashSet::new(); + for skill in catalog { + if referenced.contains(skill.name.as_str()) && seen.insert(skill.name.clone()) { + out.push(skill.clone()); } } out } -/// Resolve a list of orchestrator-selected skill names to their `SkillRef`s. -/// Unknown names are dropped silently — the orchestrator can hallucinate. -/// Results are deduplicated by name, preserving the order Plano-Orchestrator -/// returned. +/// Filter `selected` skill names to those that are both (a) allow-listed +/// for the chosen route via `route_allowlist` and (b) present in `catalog`, +/// preserving orchestrator order and deduplicating. Drops are reported on +/// the `SkillResolution` struct so callers can `warn!` and surface +/// misconfiguration without re-walking the lists. +pub fn resolve_for_route<'a>( + catalog: &'a [SkillRef], + route_allowlist: &[String], + selected: &[String], +) -> SkillResolution<'a> { + let allowed: HashSet<&str> = route_allowlist.iter().map(String::as_str).collect(); + let mut activated: Vec<&SkillRef> = Vec::with_capacity(selected.len()); + let mut taken: HashSet<&str> = HashSet::new(); + let mut dropped_not_allowed: Vec = Vec::new(); + let mut dropped_unknown: Vec = Vec::new(); + for name in selected { + if !taken.insert(name.as_str()) { + continue; + } + if !allowed.contains(name.as_str()) { + dropped_not_allowed.push(name.clone()); + continue; + } + match catalog.iter().find(|s| &s.name == name) { + Some(skill) => activated.push(skill), + None => dropped_unknown.push(name.clone()), + } + } + SkillResolution { + activated, + dropped_not_allowed, + dropped_unknown, + } +} + +/// Resolve a list of orchestrator-selected skill names to their `SkillRef`s +/// directly against the catalog, without any per-route allow-list. Use this +/// for the "skills-only" path documented in `docs/source/resources/skills.rst` +/// where the orchestrator returns skills but no route — the catalog itself +/// (already pre-filtered to skills referenced by SOME route via +/// `referenced_skills_catalog`) is the effective allow-list. Unknown names +/// are dropped silently; results are deduplicated by name preserving order. pub fn resolve_selected_skills<'a>( skills: &'a [SkillRef], selected_names: &[String], ) -> Vec<&'a SkillRef> { let mut out: Vec<&SkillRef> = Vec::with_capacity(selected_names.len()); + let mut seen: HashSet<&str> = HashSet::new(); for name in selected_names { - if out.iter().any(|s| &s.name == name) { + if !seen.insert(name.as_str()) { continue; } if let Some(skill) = skills.iter().find(|s| &s.name == name) { @@ -53,12 +125,22 @@ pub fn resolve_selected_skills<'a>( } /// Append the bodies of activated skills to a system prompt, wrapped in -/// `` tags so a future context-management pass can -/// recognize and recompact them. +/// `` tags so a +/// future context-management pass can recognize and recompact them. /// -/// Returns `None` only if no base system prompt was supplied and no skills -/// were activated. When skills are present the wrapper text always appears so -/// the downstream model receives a clear, well-structured instruction block. +/// Behavior contract (relied on by `brightstaff::handlers::llm::model_selection`): +/// +/// * Returns `None` only when no base prompt was supplied **and** no skills +/// were activated. Otherwise always returns `Some`. +/// * The base prompt (if any) is kept verbatim and the skill block is +/// appended after a blank line. +/// * Each skill body is tail-trimmed at `MAX_SKILL_BODY_BYTES` bytes (UTF-8 +/// boundary safe) with a truncation marker, so a single oversized +/// SKILL.md cannot blow the downstream context window. +/// * `name` and `base_dir` are XML-attribute-escaped (`&`, `<`, `>`, `"`) +/// so a maliciously named skill cannot break out of the wrapper. Skill +/// names are already validated upstream, but defense-in-depth matters +/// here because the wrapper is part of the LLM's system prompt. pub fn augment_system_prompt_with_skills( base_system_prompt: Option, activated_skills: &[&SkillRef], @@ -80,22 +162,66 @@ pub fn augment_system_prompt_with_skills( against each skill's base directory.\n\n", ); for skill in activated_skills { - buf.push_str(&format!("\n"); if let Some(body) = skill.body.as_deref() { - buf.push_str(body.trim_end()); + buf.push_str(&truncate_skill_body(body)); buf.push('\n'); } else { - buf.push_str(&format!("(skill description) {}\n", skill.description)); + buf.push_str(&format!( + "(skill description) {}\n", + xml_attr_escape(&skill.description) + )); } buf.push_str("\n\n"); } Some(buf.trim_end().to_string()) } +/// Escape a string for use inside an XML attribute value (double-quoted). +/// Quotes `&`, `<`, `>`, and `"`; leaves single quotes alone since the +/// wrapper always uses double quotes. +fn xml_attr_escape(s: &str) -> String { + let mut out = String::with_capacity(s.len()); + for ch in s.chars() { + match ch { + '&' => out.push_str("&"), + '<' => out.push_str("<"), + '>' => out.push_str(">"), + '"' => out.push_str("""), + _ => out.push(ch), + } + } + out +} + +/// Tail-trim `body` to at most `MAX_SKILL_BODY_BYTES` bytes, respecting +/// UTF-8 character boundaries. Appends a marker so the downstream model +/// can tell content was dropped. Pass-through for short bodies. +fn truncate_skill_body(body: &str) -> String { + let trimmed = body.trim_end(); + if trimmed.len() <= MAX_SKILL_BODY_BYTES { + return trimmed.to_string(); + } + // Reserve room for the marker so the final length is still within the + // budget even when the marker is added. + let budget = MAX_SKILL_BODY_BYTES.saturating_sub(SKILL_BODY_TRUNCATION_MARKER.len()); + let mut end = budget; + while end > 0 && !trimmed.is_char_boundary(end) { + end -= 1; + } + let mut out = String::with_capacity(end + SKILL_BODY_TRUNCATION_MARKER.len()); + out.push_str(&trimmed[..end]); + out.push_str(SKILL_BODY_TRUNCATION_MARKER); + out +} + #[cfg(test)] mod tests { use super::*; @@ -126,49 +252,93 @@ mod tests { } } + fn routes_map( + routes: Vec, + ) -> HashMap { + routes.into_iter().map(|r| (r.name.clone(), r)).collect() + } + + // --- referenced_skills_catalog --- + #[test] - fn skills_for_route_returns_attached_skills() { + fn referenced_catalog_is_union_across_routes() { let catalog = vec![ - skill("pdf-processing", "extract"), + skill("pdf", "extract"), skill("code-review", "review"), + skill("never-used", "x"), ]; - let routes = vec![ - route("code review", Some(vec!["code-review"])), - route("doc work", Some(vec!["pdf-processing"])), - ]; - let resolved = skills_for_route(&catalog, &routes, "code review"); - assert_eq!(resolved.len(), 1); - assert_eq!(resolved[0].name, "code-review"); + let routes = routes_map(vec![ + route("docs", Some(vec!["pdf"])), + route("review", Some(vec!["code-review"])), + route("other", None), + ]); + let out = referenced_skills_catalog(&catalog, &routes); + let names: Vec<_> = out.iter().map(|s| s.name.as_str()).collect(); + assert!(names.contains(&"pdf")); + assert!(names.contains(&"code-review")); + assert!(!names.contains(&"never-used")); } #[test] - fn skills_for_route_empty_when_route_has_no_skills_list() { - let catalog = vec![skill("pdf-processing", "extract")]; - let routes = vec![route("code review", None)]; - let resolved = skills_for_route(&catalog, &routes, "code review"); - assert!(resolved.is_empty()); + fn referenced_catalog_deduplicates_when_multiple_routes_share_a_skill() { + let catalog = vec![skill("pdf", "extract")]; + let routes = routes_map(vec![ + route("a", Some(vec!["pdf"])), + route("b", Some(vec!["pdf"])), + ]); + let out = referenced_skills_catalog(&catalog, &routes); + assert_eq!(out.len(), 1); + } + + // --- resolve_for_route --- + + #[test] + fn resolve_for_route_keeps_allowlisted_skills_in_orchestrator_order() { + let catalog = vec![skill("a", ""), skill("b", ""), skill("c", "")]; + let allow = vec!["a".to_string(), "b".to_string(), "c".to_string()]; + let selected = vec!["c".to_string(), "a".to_string()]; + let r = resolve_for_route(&catalog, &allow, &selected); + let names: Vec<_> = r.activated.iter().map(|s| s.name.as_str()).collect(); + assert_eq!(names, vec!["c", "a"]); + assert!(r.dropped_not_allowed.is_empty()); + assert!(r.dropped_unknown.is_empty()); } #[test] - fn skills_for_route_empty_when_route_missing() { - let catalog = vec![skill("pdf-processing", "extract")]; - let routes = vec![route("code review", Some(vec!["pdf-processing"]))]; - let resolved = skills_for_route(&catalog, &routes, "no-such-route"); - assert!(resolved.is_empty()); + fn resolve_for_route_drops_cross_route_skill_into_not_allowed() { + let catalog = vec![skill("pdf", ""), skill("payment", "")]; + let allow = vec!["pdf".to_string()]; // route only allows pdf + let selected = vec!["pdf".to_string(), "payment".to_string()]; + let r = resolve_for_route(&catalog, &allow, &selected); + assert_eq!(r.activated.len(), 1); + assert_eq!(r.activated[0].name, "pdf"); + assert_eq!(r.dropped_not_allowed, vec!["payment".to_string()]); + assert!(r.dropped_unknown.is_empty()); } #[test] - fn skills_for_route_drops_unknown_skill_names() { - let catalog = vec![skill("pdf-processing", "extract")]; - let routes = vec![route( - "code review", - Some(vec!["pdf-processing", "ghost-skill"]), - )]; - let resolved = skills_for_route(&catalog, &routes, "code review"); - assert_eq!(resolved.len(), 1); - assert_eq!(resolved[0].name, "pdf-processing"); + fn resolve_for_route_drops_hallucinated_skill_into_unknown() { + let catalog = vec![skill("pdf", "")]; + let allow = vec!["pdf".to_string(), "imaginary".to_string()]; + let selected = vec!["pdf".to_string(), "imaginary".to_string()]; + let r = resolve_for_route(&catalog, &allow, &selected); + assert_eq!(r.activated.len(), 1); + assert_eq!(r.activated[0].name, "pdf"); + assert!(r.dropped_not_allowed.is_empty()); + assert_eq!(r.dropped_unknown, vec!["imaginary".to_string()]); } + #[test] + fn resolve_for_route_deduplicates_repeats() { + let catalog = vec![skill("pdf", "")]; + let allow = vec!["pdf".to_string()]; + let selected = vec!["pdf".to_string(), "pdf".to_string(), "pdf".to_string()]; + let r = resolve_for_route(&catalog, &allow, &selected); + assert_eq!(r.activated.len(), 1); + } + + // --- resolve_selected_skills (skills-only path) --- + #[test] fn resolve_selected_skills_drops_unknown_and_dedupes() { let catalog = vec![ @@ -187,6 +357,8 @@ mod tests { assert_eq!(resolved[1].name, "pdf-processing"); } + // --- augment_system_prompt_with_skills --- + #[test] fn augment_passthrough_with_no_skills() { let augmented = augment_system_prompt_with_skills(Some("you are helpful".to_string()), &[]); @@ -212,4 +384,32 @@ mod tests { assert!(augmented.contains("").unwrap(); + let body_section_start = augmented.find(">\n").unwrap() + 2; + let body_len = body_section_end - body_section_start; + assert!(body_len <= MAX_SKILL_BODY_BYTES + 64); + } }