Merge 5e8d27fd3c into f3d6ea41ad

2026-06-29 15:49:40 +02:00 · 2026-06-03 10:10:11 -07:00 · 2026-06-03 10:10:11 -07:00 · a24710dfaf
commit a24710dfaf
parent f3d6ea41ad 5e8d27fd3c
24 changed files with 3278 additions and 98 deletions
--- a/crates/brightstaff/src/handlers/agents/selector.rs
+++ b/crates/brightstaff/src/handlers/agents/selector.rs
@ -132,11 +132,15 @@ impl AgentSelector {
            .determine_orchestration(messages, Some(usage_preferences), request_id)
            .await
        {
-            Ok(Some(routes)) => {
-                debug!(count = routes.len(), "determined agents via orchestration");
+            Ok(Some(selection)) => {
+                debug!(
+                    count = selection.routes.len(),
+                    skill_count = selection.skills.len(),
+                    "determined agents via orchestration"
+                );
                let mut selected_agents = Vec::new();

-                for (route_name, agent_name) in routes {
+                for (route_name, agent_name) in selection.routes {
                    debug!(route = %route_name, agent = %agent_name, "processing route");
                    let selected_agent = agents
                        .iter()
--- a/crates/brightstaff/src/handlers/llm/mod.rs
+++ b/crates/brightstaff/src/handlers/llm/mod.rs
@ -37,7 +37,7 @@ use crate::tracing::{
    collect_custom_trace_attributes, llm as tracing_llm, operation_component,
    plano as tracing_plano, set_service_name,
 };
-use model_selection::router_chat_get_upstream_model;
+use model_selection::{inject_activated_skills_into_request, router_chat_get_upstream_model};

 const PERPLEXITY_PROVIDER_PREFIX: &str = "perplexity/";

@ -282,26 +282,16 @@ async fn llm_chat_inner(
        Err(response) => return Ok(response),
    };

-    // Serialize request for upstream BEFORE router consumes it
-    let client_request_bytes_for_upstream: Bytes =
-        match ProviderRequestType::to_bytes(&client_request) {
-            Ok(bytes) => bytes.into(),
-            Err(err) => {
-                warn!(error = %err, "failed to serialize request for upstream");
-                let mut r = Response::new(full(format!("Failed to serialize request: {}", err)));
-                *r.status_mut() = StatusCode::INTERNAL_SERVER_ERROR;
-                return Ok(r);
-            }
-        };
-
-    // --- Phase 3: Route the request (or use pinned model from session cache) ---
-    let resolved_model = if let Some(cached_model) = pinned_model {
+    // Route the request (or use pinned model from session cache) BEFORE
+    // serializing for upstream — skill body injection happens here, so the
+    // upstream bytes must be produced after routing returns.
+    let (resolved_model, activated_skills) = if let Some(cached_model) = pinned_model {
        info!(
            session_id = %session_id.as_deref().unwrap_or(""),
            model = %cached_model,
            "using pinned routing decision from cache"
        );
-        cached_model
+        (cached_model, Vec::new())
    } else {
        let routing_span = info_span!(
            "routing",
@ -313,11 +303,16 @@ async fn llm_chat_inner(
            route.selected_model = tracing::field::Empty,
            routing.determination_ms = tracing::field::Empty,
        );
+        // The router consumes the request (it converts it to OpenAI format
+        // internally to extract conversation messages). Clone so we can
+        // still mutate the original below when Plano-Orchestrator activates
+        // any Agent Skills.
+        let request_for_routing = client_request.clone();
        let routing_result = match async {
            set_service_name(operation_component::ROUTING);
            router_chat_get_upstream_model(
                Arc::clone(&state.orchestrator_service),
-                client_request,
+                request_for_routing,
                &request_path,
                &request_id,
                inline_routing_preferences,
@ -335,8 +330,11 @@ async fn llm_chat_inner(
            }
        };

-        let (router_selected_model, route_name) =
-            (routing_result.model_name, routing_result.route_name);
+        let (router_selected_model, route_name, activated) = (
+            routing_result.model_name,
+            routing_result.route_name,
+            routing_result.activated_skills,
+        );
        let model = if router_selected_model != "none" {
            router_selected_model
        } else {
@ -362,10 +360,34 @@ async fn llm_chat_inner(
                .await;
        }

-        model
+        (model, activated)
    };
    tracing::Span::current().record(tracing_llm::MODEL_NAME, resolved_model.as_str());

+    // If Plano-Orchestrator activated any Agent Skills for this route, inject
+    // their SKILL.md bodies into the system prompt before we hand the bytes
+    // off to the upstream provider.
+    if !activated_skills.is_empty() {
+        info!(
+            count = activated_skills.len(),
+            skills = ?activated_skills.iter().map(|s| s.name.as_str()).collect::<Vec<_>>(),
+            "injecting activated Agent Skills into upstream system prompt"
+        );
+        inject_activated_skills_into_request(&mut client_request, &activated_skills);
+    }
+
+    // Serialize request for upstream AFTER potential skill injection.
+    let client_request_bytes_for_upstream: Bytes =
+        match ProviderRequestType::to_bytes(&client_request) {
+            Ok(bytes) => bytes.into(),
+            Err(err) => {
+                warn!(error = %err, "failed to serialize request for upstream");
+                let mut r = Response::new(full(format!("Failed to serialize request: {}", err)));
+                *r.status_mut() = StatusCode::INTERNAL_SERVER_ERROR;
+                return Ok(r);
+            }
+        };
+
    // --- Phase 4: Forward to upstream and stream back ---
    send_upstream(
        &state.http_client,
--- a/crates/brightstaff/src/handlers/llm/model_selection.rs
+++ b/crates/brightstaff/src/handlers/llm/model_selection.rs
@ -1,5 +1,9 @@
-use common::configuration::TopLevelRoutingPreference;
+use common::configuration::{SkillRef, TopLevelRoutingPreference};
+use common::skills_runtime::augment_system_prompt_with_skills;
+use hermesllm::apis::openai::{Message, MessageContent, Role};
 use hermesllm::clients::endpoints::SupportedUpstreamAPIs;
+use hermesllm::providers::request::ProviderRequest;
+use hermesllm::transforms::lib::ExtractText;
 use hermesllm::ProviderRequestType;
 use hyper::StatusCode;
 use std::sync::Arc;
@ -29,6 +33,10 @@ pub struct RoutingResult {
    /// Full ranked list — use subsequent entries as fallbacks on 429/5xx.
    pub models: Vec<String>,
    pub route_name: Option<String>,
+    /// Agent Skills activated by Plano-Orchestrator for this request.
+    /// Their `body` field (the SKILL.md content) is prepended to the
+    /// upstream system prompt by the caller in `send_upstream`.
+    pub activated_skills: Vec<SkillRef>,
 }

 pub struct RoutingError {
@ -128,8 +136,36 @@ pub async fn router_chat_get_upstream_model(

    match routing_result {
        Ok(route) => match route {
-            Some((route_name, ranked_models)) => {
-                let model_name = ranked_models.first().cloned().unwrap_or_default();
+            Some(decision) => {
+                // Skills-only decision (no route, no models) -> fall through
+                // to the "none" sentinel so the original / aliased model is
+                // used, but propagate activated_skills so they still get
+                // injected. Documented at docs/source/resources/skills.rst.
+                if decision.route_name.is_none() && decision.models.is_empty() {
+                    current_span.record("route.selected_model", "none");
+                    info!(
+                        skills = ?decision
+                            .activated_skills
+                            .iter()
+                            .map(|s| s.name.as_str())
+                            .collect::<Vec<_>>(),
+                        "no route determined; activating skills against default model"
+                    );
+                    bs_metrics::record_router_decision(
+                        route_label,
+                        "none",
+                        true,
+                        determination_elapsed,
+                    );
+                    return Ok(RoutingResult {
+                        model_name: "none".to_string(),
+                        models: vec!["none".to_string()],
+                        route_name: None,
+                        activated_skills: decision.activated_skills,
+                    });
+                }
+
+                let model_name = decision.models.first().cloned().unwrap_or_default();
                current_span.record("route.selected_model", model_name.as_str());
                bs_metrics::record_router_decision(
                    route_label,
@ -139,8 +175,9 @@ pub async fn router_chat_get_upstream_model(
                );
                Ok(RoutingResult {
                    model_name,
-                    models: ranked_models,
-                    route_name: Some(route_name),
+                    models: decision.models,
+                    route_name: decision.route_name,
+                    activated_skills: decision.activated_skills,
                })
            }
            None => {
@ -159,6 +196,7 @@ pub async fn router_chat_get_upstream_model(
                    model_name: "none".to_string(),
                    models: vec!["none".to_string()],
                    route_name: None,
+                    activated_skills: Vec::new(),
                })
            }
        },
@ -172,3 +210,250 @@ pub async fn router_chat_get_upstream_model(
        }
    }
 }
+
+/// Prepend the bodies of `activated_skills` to the system prompt of the
+/// upstream request so the chosen LLM has access to each skill's instructions.
+/// Works across every provider variant by going through the OpenAI message
+/// shape (`get_messages` / `set_messages`).
+///
+/// # Behavior contract
+///
+/// * **No-op** when `activated_skills` is empty.
+/// * **Augments the first system message in place** when one is present at
+///   any position in `messages` (typically index 0, but we look for the
+///   first `Role::System` rather than assuming). Subsequent system messages
+///   (rare but legal for some providers) are left untouched. We pick "first"
+///   so the skill content appears as early in the prompt as possible —
+///   models weight earlier system content more heavily and an Anthropic
+///   tools+system combo is conventionally a single leading block.
+/// * **Inserts a new leading system message** at index 0 when no system
+///   message exists in the request.
+/// * **Flattens `MessageContent::Parts` system content to a single
+///   `MessageContent::Text`** when extracting the base prompt. This is
+///   intentional: every supported upstream API accepts text in system
+///   messages, and the alternative — preserving each `ContentPart` and
+///   appending a new text part — fails on providers that disallow
+///   multi-part system content. The trade-off is that non-text system parts
+///   (e.g. images attached to a system message, which no production
+///   provider supports anyway) are dropped on the floor. Verified by
+///   `flattens_parts_system_content` below.
+pub fn inject_activated_skills_into_request(
+    client_request: &mut ProviderRequestType,
+    activated_skills: &[SkillRef],
+) {
+    if activated_skills.is_empty() {
+        return;
+    }
+
+    let skill_refs: Vec<&SkillRef> = activated_skills.iter().collect();
+
+    let mut messages = client_request.get_messages();
+
+    let (system_idx, base_text) = match messages.iter().position(|m| m.role == Role::System) {
+        Some(idx) => {
+            let text = messages[idx]
+                .content
+                .as_ref()
+                .map(|c| c.extract_text())
+                .unwrap_or_default();
+            (Some(idx), Some(text))
+        }
+        None => (None, None),
+    };
+
+    let augmented = augment_system_prompt_with_skills(base_text, &skill_refs);
+    let Some(augmented_text) = augmented else {
+        return;
+    };
+
+    match system_idx {
+        Some(idx) => {
+            messages[idx].content = Some(MessageContent::Text(augmented_text));
+        }
+        None => {
+            messages.insert(
+                0,
+                Message {
+                    role: Role::System,
+                    content: Some(MessageContent::Text(augmented_text)),
+                    name: None,
+                    tool_calls: None,
+                    tool_call_id: None,
+                },
+            );
+        }
+    }
+
+    client_request.set_messages(&messages);
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use hermesllm::apis::openai::{ChatCompletionsRequest, ContentPart};
+
+    fn req_with_messages(msgs: Vec<Message>) -> ProviderRequestType {
+        ProviderRequestType::ChatCompletionsRequest(ChatCompletionsRequest {
+            model: "test".to_string(),
+            messages: msgs,
+            ..Default::default()
+        })
+    }
+
+    fn user_msg(text: &str) -> Message {
+        Message {
+            role: Role::User,
+            content: Some(MessageContent::Text(text.to_string())),
+            name: None,
+            tool_calls: None,
+            tool_call_id: None,
+        }
+    }
+
+    fn system_msg(text: &str) -> Message {
+        Message {
+            role: Role::System,
+            content: Some(MessageContent::Text(text.to_string())),
+            name: None,
+            tool_calls: None,
+            tool_call_id: None,
+        }
+    }
+
+    fn skill(name: &str, body: &str) -> SkillRef {
+        SkillRef {
+            name: name.to_string(),
+            description: format!("desc for {name}"),
+            path: None,
+            base_dir: None,
+            body: Some(body.to_string()),
+            scope: Some("project".to_string()),
+            compatibility: None,
+            license: None,
+            metadata: None,
+            allowed_tools: None,
+        }
+    }
+
+    fn first_system_text(req: &ProviderRequestType) -> String {
+        req.get_messages()
+            .iter()
+            .find(|m| m.role == Role::System)
+            .and_then(|m| m.content.as_ref())
+            .map(|c| c.extract_text())
+            .unwrap_or_default()
+    }
+
+    #[test]
+    fn injects_new_system_message_when_none_present() {
+        let mut req = req_with_messages(vec![user_msg("hi")]);
+        inject_activated_skills_into_request(&mut req, &[skill("pdf", "process pdfs")]);
+        let messages = req.get_messages();
+        assert_eq!(messages.len(), 2);
+        assert_eq!(messages[0].role, Role::System);
+        let txt = first_system_text(&req);
+        assert!(txt.contains("<skill_content name=\"pdf\""));
+        assert!(txt.contains("process pdfs"));
+    }
+
+    #[test]
+    fn augments_existing_system_message_in_place_preserving_user_message() {
+        let mut req = req_with_messages(vec![system_msg("you are helpful"), user_msg("hi")]);
+        inject_activated_skills_into_request(&mut req, &[skill("pdf", "process pdfs")]);
+        let messages = req.get_messages();
+        assert_eq!(messages.len(), 2);
+        let txt = first_system_text(&req);
+        assert!(txt.starts_with("you are helpful"));
+        assert!(txt.contains("<skill_content name=\"pdf\""));
+        assert_eq!(messages[1].role, Role::User);
+    }
+
+    #[test]
+    fn noop_when_no_skills_activated() {
+        let mut req = req_with_messages(vec![system_msg("base"), user_msg("hi")]);
+        let before = req.get_messages();
+        inject_activated_skills_into_request(&mut req, &[]);
+        let after = req.get_messages();
+        assert_eq!(after.len(), before.len());
+        assert_eq!(first_system_text(&req), "base");
+    }
+
+    #[test]
+    fn augments_only_the_first_system_message() {
+        // Two system messages (rare in practice). Only the first one is
+        // augmented; the trailing one is left untouched. Documented contract.
+        let mut req = req_with_messages(vec![
+            system_msg("primary"),
+            system_msg("secondary"),
+            user_msg("hi"),
+        ]);
+        inject_activated_skills_into_request(&mut req, &[skill("pdf", "process pdfs")]);
+        let messages = req.get_messages();
+        assert!(messages[0]
+            .content
+            .as_ref()
+            .unwrap()
+            .extract_text()
+            .contains("primary"));
+        assert!(messages[0]
+            .content
+            .as_ref()
+            .unwrap()
+            .extract_text()
+            .contains("<skill_content"));
+        assert_eq!(
+            messages[1].content.as_ref().unwrap().extract_text(),
+            "secondary"
+        );
+    }
+
+    #[test]
+    fn flattens_parts_system_content() {
+        // Documented behavior: `MessageContent::Parts` system content is
+        // extracted to plain text via ExtractText and re-emitted as
+        // `MessageContent::Text`. Non-text parts (e.g. images) are dropped
+        // — no production provider ships images in a system message.
+        let parts_system = Message {
+            role: Role::System,
+            content: Some(MessageContent::Parts(vec![
+                ContentPart::Text {
+                    text: "be brief".to_string(),
+                },
+                ContentPart::Text {
+                    text: " and polite".to_string(),
+                },
+            ])),
+            name: None,
+            tool_calls: None,
+            tool_call_id: None,
+        };
+        let mut req = req_with_messages(vec![parts_system, user_msg("hi")]);
+        inject_activated_skills_into_request(&mut req, &[skill("pdf", "body")]);
+        let messages = req.get_messages();
+        let system = &messages[0];
+        match system.content.as_ref().unwrap() {
+            MessageContent::Text(t) => {
+                assert!(t.contains("be brief"));
+                assert!(t.contains(" and polite"));
+                assert!(t.contains("<skill_content name=\"pdf\""));
+            }
+            MessageContent::Parts(_) => panic!("expected flattened text, got Parts"),
+        }
+    }
+
+    #[test]
+    fn injects_in_orchestrator_order_for_multiple_skills() {
+        let mut req = req_with_messages(vec![user_msg("hi")]);
+        inject_activated_skills_into_request(
+            &mut req,
+            &[skill("first", "alpha-body"), skill("second", "beta-body")],
+        );
+        let txt = first_system_text(&req);
+        let first_pos = txt.find("alpha-body").expect("first skill body present");
+        let second_pos = txt.find("beta-body").expect("second skill body present");
+        assert!(
+            first_pos < second_pos,
+            "skills should appear in the order they were activated"
+        );
+    }
+}
--- a/crates/brightstaff/src/main.rs
+++ b/crates/brightstaff/src/main.rs
@ -308,11 +308,12 @@ async fn init_app_state(
        .orchestrator_model_context_length
        .unwrap_or(brightstaff::router::orchestrator_model_v1::MAX_TOKEN_LEN);

-    let orchestrator_service = Arc::new(OrchestratorService::with_routing(
+    let orchestrator_service = Arc::new(OrchestratorService::with_routing_and_skills(
        format!("{llm_provider_url}{CHAT_COMPLETIONS_PATH}"),
        orchestrator_model_name,
        orchestrator_llm_provider,
        config.routing_preferences.clone(),
+        config.skills.clone(),
        metrics_service,
        session_ttl_seconds,
        session_cache,
--- a/crates/brightstaff/src/router/orchestrator.rs
+++ b/crates/brightstaff/src/router/orchestrator.rs
@ -1,19 +1,22 @@
 use std::{borrow::Cow, collections::HashMap, sync::Arc, time::Duration};

 use common::{
-    configuration::{AgentUsagePreference, OrchestrationPreference, TopLevelRoutingPreference},
+    configuration::{
+        AgentUsagePreference, OrchestrationPreference, SkillRef, TopLevelRoutingPreference,
+    },
    consts::{ARCH_PROVIDER_HINT_HEADER, REQUEST_ID_HEADER},
+    skills_runtime::{referenced_skills_catalog, resolve_for_route, resolve_selected_skills},
 };
 use hermesllm::apis::openai::Message;
 use hyper::header;
 use opentelemetry::global;
 use opentelemetry_http::HeaderInjector;
 use thiserror::Error;
-use tracing::{debug, info};
+use tracing::{debug, info, warn};

 use super::http::{self, post_and_extract_content};
 use super::model_metrics::ModelMetricsService;
-use super::orchestrator_model::OrchestratorModel;
+use super::orchestrator_model::{OrchestratorModel, OrchestratorSelection};

 use crate::metrics as bs_metrics;
 use crate::metrics::labels as metric_labels;
@ -30,12 +33,40 @@ pub struct OrchestratorService {
    orchestrator_model: Arc<dyn OrchestratorModel>,
    orchestrator_provider_name: String,
    top_level_preferences: HashMap<String, TopLevelRoutingPreference>,
+    /// Agent Skills catalog (deduplicated by name) attached to any
+    /// `routing_preferences[].skills` list. Empty when no route has skills.
+    skills_catalog: Vec<SkillRef>,
    metrics_service: Option<Arc<ModelMetricsService>>,
    session_cache: Option<Arc<dyn SessionCache>>,
    session_ttl: Duration,
    tenant_header: Option<String>,
 }

+/// Result of `determine_route`: which route was picked (if any), the
+/// ranked candidate models for that route, and the Agent Skill bodies the
+/// orchestrator chose to activate alongside it.
+///
+/// Two valid shapes:
+///
+/// * **Route + skills (typical):** `route_name = Some(...)`, `models`
+///   non-empty, `activated_skills` may be non-empty. Skills are resolved
+///   against `routing_preferences[<route>].skills`, so picks that aren't
+///   allow-listed for the route are dropped with a `warn!`.
+/// * **Skills-only:** `route_name = None`, `models` empty,
+///   `activated_skills` non-empty. The orchestrator decided no route
+///   needed to change but the user's intent matches one or more skills.
+///   Per `docs/source/resources/skills.rst`, the request falls back to the
+///   originally-requested model and the skill bodies are injected the
+///   same way. Allow-list filtering uses the catalog union (effectively
+///   the catalog itself, which is pre-filtered to skills referenced by
+///   some route).
+#[derive(Debug, Clone, Default)]
+pub struct RouteDecision {
+    pub route_name: Option<String>,
+    pub models: Vec<String>,
+    pub activated_skills: Vec<SkillRef>,
+}
+
 #[derive(Debug, Error)]
 pub enum OrchestrationError {
    #[error(transparent)]
@ -66,6 +97,7 @@ impl OrchestratorService {
            orchestrator_model,
            orchestrator_provider_name,
            top_level_preferences: HashMap::new(),
+            skills_catalog: Vec::new(),
            metrics_service: None,
            session_cache: None,
            session_ttl: Duration::from_secs(DEFAULT_SESSION_TTL_SECONDS),
@ -84,14 +116,53 @@ impl OrchestratorService {
        session_cache: Arc<dyn SessionCache>,
        tenant_header: Option<String>,
        max_token_length: usize,
+    ) -> Self {
+        Self::with_routing_and_skills(
+            orchestrator_url,
+            orchestration_model_name,
+            orchestrator_provider_name,
+            top_level_prefs,
+            None,
+            metrics_service,
+            session_ttl_seconds,
+            session_cache,
+            tenant_header,
+            max_token_length,
+        )
+    }
+
+    /// Like `with_routing`, but also seeds the orchestrator with a catalog of
+    /// Agent Skills referenced by `routing_preferences[].skills`. The
+    /// orchestrator gets a `<skills>` block in its system prompt and may
+    /// select zero or more skills alongside the picked route; this enables
+    /// the LLM handler to inject the chosen SKILL.md bodies into the
+    /// upstream request.
+    #[allow(clippy::too_many_arguments)]
+    pub fn with_routing_and_skills(
+        orchestrator_url: String,
+        orchestration_model_name: String,
+        orchestrator_provider_name: String,
+        top_level_prefs: Option<Vec<TopLevelRoutingPreference>>,
+        skills_catalog: Option<Vec<SkillRef>>,
+        metrics_service: Option<Arc<ModelMetricsService>>,
+        session_ttl_seconds: Option<u64>,
+        session_cache: Arc<dyn SessionCache>,
+        tenant_header: Option<String>,
+        max_token_length: usize,
    ) -> Self {
        let top_level_preferences: HashMap<String, TopLevelRoutingPreference> = top_level_prefs
            .map_or_else(HashMap::new, |prefs| {
                prefs.into_iter().map(|p| (p.name.clone(), p)).collect()
            });

-        let orchestrator_model = Arc::new(orchestrator_model_v1::OrchestratorModelV1::new(
+        let skills_catalog = referenced_skills_catalog(
+            skills_catalog.as_deref().unwrap_or(&[]),
+            &top_level_preferences,
+        );
+
+        let orchestrator_model = Arc::new(orchestrator_model_v1::OrchestratorModelV1::with_skills(
            HashMap::new(),
+            skills_catalog.clone(),
            orchestration_model_name,
            max_token_length,
        ));
@ -105,6 +176,7 @@ impl OrchestratorService {
            orchestrator_model,
            orchestrator_provider_name,
            top_level_preferences,
+            skills_catalog,
            metrics_service,
            session_cache: Some(session_cache),
            session_ttl,
@ -170,7 +242,7 @@ impl OrchestratorService {
        messages: &[Message],
        inline_routing_preferences: Option<Vec<TopLevelRoutingPreference>>,
        request_id: &str,
-    ) -> Result<Option<(String, Vec<String>)>> {
+    ) -> Result<Option<RouteDecision>> {
        if messages.is_empty() {
            return Ok(None);
        }
@ -206,9 +278,13 @@ impl OrchestratorService {
            )
            .await?;

-        let result = if let Some(ref routes) = orchestration_result {
-            if routes.len() > 1 {
-                let all_routes: Vec<&str> = routes.iter().map(|(name, _)| name.as_str()).collect();
+        let result = if let Some(ref selection) = orchestration_result {
+            if selection.routes.len() > 1 {
+                let all_routes: Vec<&str> = selection
+                    .routes
+                    .iter()
+                    .map(|(name, _)| name.as_str())
+                    .collect();
                info!(
                    routes = ?all_routes,
                    using = %all_routes.first().unwrap_or(&"none"),
@ -216,7 +292,8 @@ impl OrchestratorService {
                );
            }

-            if let Some((route_name, _)) = routes.first() {
+            if let Some((route_name, _)) = selection.routes.first() {
+                // Route + (optional) skills path.
                let top_pref = inline_top_map
                    .as_ref()
                    .and_then(|m| m.get(route_name))
@ -227,10 +304,43 @@ impl OrchestratorService {
                        Some(svc) => svc.rank_models(&pref.models, &pref.selection_policy).await,
                        None => pref.models.clone(),
                    };
-                    Some((route_name.clone(), ranked))
+                    let resolution = resolve_for_route(
+                        &self.skills_catalog,
+                        pref.skills.as_deref().unwrap_or(&[]),
+                        &selection.skills,
+                    );
+                    log_skill_drops(route_name, &resolution);
+                    let activated_skills: Vec<SkillRef> =
+                        resolution.activated.into_iter().cloned().collect();
+                    Some(RouteDecision {
+                        route_name: Some(route_name.clone()),
+                        models: ranked,
+                        activated_skills,
+                    })
                } else {
                    None
                }
+            } else if !selection.skills.is_empty() {
+                // Skills-only path: orchestrator picked no route but flagged
+                // skills. Per the documented contract the request still goes
+                // through with the originally-requested model and the skill
+                // bodies are injected. The catalog itself is the effective
+                // allow-list (it's already the union across every route's
+                // allow-list, so anything in it was deemed safe to expose).
+                let activated: Vec<SkillRef> =
+                    resolve_selected_skills(&self.skills_catalog, &selection.skills)
+                        .into_iter()
+                        .cloned()
+                        .collect();
+                if activated.is_empty() {
+                    None
+                } else {
+                    Some(RouteDecision {
+                        route_name: None,
+                        models: Vec::new(),
+                        activated_skills: activated,
+                    })
+                }
            } else {
                None
            }
@ -239,7 +349,7 @@ impl OrchestratorService {
        };

        info!(
-            selected_model = ?result,
+            selected_route = ?result.as_ref().map(|r| (&r.route_name, r.models.first(), r.activated_skills.iter().map(|s| s.name.as_str()).collect::<Vec<_>>())),
            "plano-orchestrator determined route"
        );

@ -253,7 +363,7 @@ impl OrchestratorService {
        messages: &[Message],
        usage_preferences: Option<Vec<AgentUsagePreference>>,
        request_id: Option<String>,
-    ) -> Result<Option<Vec<(String, String)>>> {
+    ) -> Result<Option<OrchestratorSelection>> {
        if messages.is_empty() {
            return Ok(None);
        }
@ -328,6 +438,30 @@ impl OrchestratorService {
    }
 }

+/// Emit `warn!` for any skill names the orchestrator selected but the
+/// resolver dropped. Surfacing these is critical for debuggability — a
+/// silently-dropped skill is hard to diagnose, and the most common causes
+/// (forgetting to add a skill to a route's allow-list, or the orchestrator
+/// hallucinating a name) are both fixable once visible.
+fn log_skill_drops(route_name: &str, resolution: &common::skills_runtime::SkillResolution<'_>) {
+    if !resolution.dropped_not_allowed.is_empty() {
+        warn!(
+            route = %route_name,
+            skills = ?resolution.dropped_not_allowed,
+            "orchestrator selected Agent Skills that are not on this route's allow-list; \
+             dropping (add them to routing_preferences[].skills if you want this route to use them)"
+        );
+    }
+    if !resolution.dropped_unknown.is_empty() {
+        warn!(
+            route = %route_name,
+            skills = ?resolution.dropped_unknown,
+            "orchestrator selected Agent Skills that are not in the runtime catalog \
+             (likely hallucinated or removed)"
+        );
+    }
+}
+
 #[cfg(test)]
 mod tests {
    use super::*;
@ -410,6 +544,50 @@ mod tests {
        assert!(svc.get_cached_route("s3", None).await.is_some());
    }

+    // ---- RouteDecision construction ----
+
+    fn skill_ref(name: &str) -> SkillRef {
+        SkillRef {
+            name: name.to_string(),
+            description: format!("desc for {name}"),
+            path: None,
+            base_dir: None,
+            body: Some(format!("body for {name}")),
+            scope: Some("project".to_string()),
+            compatibility: None,
+            license: None,
+            metadata: None,
+            allowed_tools: None,
+        }
+    }
+
+    #[test]
+    fn route_decision_holds_optional_route_name_for_skills_only_path() {
+        // Regression guard for the docs promise at skills.rst:153-155: a
+        // skills-only decision must be representable, with no route_name and
+        // empty models, so the LLM handler falls back to the original model.
+        let decision = RouteDecision {
+            route_name: None,
+            models: Vec::new(),
+            activated_skills: vec![skill_ref("pdf")],
+        };
+        assert!(decision.route_name.is_none());
+        assert!(decision.models.is_empty());
+        assert_eq!(decision.activated_skills.len(), 1);
+    }
+
+    #[test]
+    fn log_skill_drops_does_not_panic_on_empty_resolution() {
+        // The logger is fire-and-forget. We can't easily assert on the
+        // emitted warns here without setting up a tracing subscriber, so the
+        // contract under test is: empty resolutions are silent (no warn
+        // attempt). Confidence in the warn paths comes from
+        // common::skills_runtime tests for resolve_for_route, which is the
+        // function whose dropped_* lists drive this logger.
+        let empty = common::skills_runtime::SkillResolution::default();
+        log_skill_drops("any", &empty);
+    }
+
    #[tokio::test]
    async fn test_cache_update_existing_session_does_not_evict() {
        let svc = make_orchestrator_service(600, 2);
--- a/crates/brightstaff/src/router/orchestrator_model.rs
+++ b/crates/brightstaff/src/router/orchestrator_model.rs
@ -10,20 +10,37 @@ pub enum OrchestratorModelError {

 pub type Result<T> = std::result::Result<T, OrchestratorModelError>;

+/// The result of running Plano-Orchestrator over a conversation: zero or more
+/// selected routes (each mapped to its upstream model name) plus zero or more
+/// selected Agent Skills. Skills are filtered down by the consumer to the
+/// catalog defined under `routing_preferences[].skills` for the chosen route.
+#[derive(Debug, Clone, Default, PartialEq, Eq)]
+pub struct OrchestratorSelection {
+    pub routes: Vec<(String, String)>,
+    pub skills: Vec<String>,
+}
+
+impl OrchestratorSelection {
+    pub fn is_empty(&self) -> bool {
+        self.routes.is_empty() && self.skills.is_empty()
+    }
+}
+
 /// OrchestratorModel trait for handling orchestration requests.
-/// Returns multiple routes as the model output format is:
-/// {"route": ["route_name_1", "route_name_2", ...]}
+/// Returns multiple routes and skills as the model output format is:
+/// {"route": ["route_name_1", ...], "skills": ["skill_name_1", ...]}
 pub trait OrchestratorModel: Send + Sync {
    fn generate_request(
        &self,
        messages: &[Message],
        usage_preferences: &Option<Vec<AgentUsagePreference>>,
    ) -> ChatCompletionsRequest;
-    /// Returns a vector of (route_name, model_name) tuples for all matched routes.
+    /// Parses the orchestrator's raw model output into selected routes (each
+    /// mapped to a model) and selected skill names.
    fn parse_response(
        &self,
        content: &str,
        usage_preferences: &Option<Vec<AgentUsagePreference>>,
-    ) -> Result<Option<Vec<(String, String)>>>;
+    ) -> Result<Option<OrchestratorSelection>>;
    fn get_model_name(&self) -> String;
 }
--- a/crates/brightstaff/src/router/orchestrator_model_v1.rs
+++ b/crates/brightstaff/src/router/orchestrator_model_v1.rs
@ -1,12 +1,12 @@
 use std::collections::HashMap;

-use common::configuration::{AgentUsagePreference, OrchestrationPreference};
+use common::configuration::{AgentUsagePreference, OrchestrationPreference, SkillRef};
 use hermesllm::apis::openai::{ChatCompletionsRequest, Message, MessageContent, Role};
 use hermesllm::transforms::lib::ExtractText;
 use serde::{ser::Serialize as SerializeTrait, Deserialize, Serialize};
 use tracing::{debug, warn};

-use super::orchestrator_model::{OrchestratorModel, OrchestratorModelError};
+use super::orchestrator_model::{OrchestratorModel, OrchestratorModelError, OrchestratorSelection};

 pub const MAX_TOKEN_LEN: usize = 8192; // Default max token length for the orchestration model

@ -138,10 +138,47 @@ Return your answer strictly in JSON as follows:
 If no routes are needed, return an empty list for `route`.
 "#;

+/// System prompt used when one or more Agent Skills are attached to candidate
+/// routes. Adds a `<skills>` block alongside `<routes>` and asks the model to
+/// also pick zero or more skills that should be loaded into the downstream
+/// LLM's system prompt.
+pub const ARCH_ORCHESTRATOR_V1_SYSTEM_PROMPT_WITH_SKILLS: &str = r#"
+You are a helpful assistant that selects the most suitable routes and Agent Skills based on user intent.
+You are provided with a list of available routes enclosed within <routes></routes> XML tags:
+<routes>
+{routes}
+</routes>
+
+You are provided with a list of available Agent Skills enclosed within <skills></skills> XML tags:
+<skills>
+{skills}
+</skills>
+
+You are also given the conversation context enclosed within <conversation></conversation> XML tags:
+<conversation>
+{conversation}
+</conversation>
+
+## Instructions
+1. Analyze the latest user intent from the conversation.
+2. Compare it against the available routes to find which routes can help fulfill the request.
+3. Independently compare it against the available skills; pick the skills whose descriptions match what the user is trying to do. Skills can be combined with any route. Activating a skill loads detailed instructions into the next response's system prompt.
+4. Respond only with exact names from <routes> and <skills>.
+5. If no routes or skills can help, return empty lists.
+
+## Response Format
+Return your answer strictly in JSON as follows:
+{{"route": ["route_name_1", "..."], "skills": ["skill_name_1", "..."]}}
+Use empty lists for `route` and/or `skills` when nothing applies.
+"#;
+
 pub type Result<T> = std::result::Result<T, OrchestratorModelError>;
 pub struct OrchestratorModelV1 {
    agent_orchestration_json_str: String,
    agent_orchestration_to_model_map: HashMap<String, String>,
+    /// Pre-rendered `<skills>` block (one JSON entry per skill, name +
+    /// description). Empty when no skills are attached to any route.
+    skills_catalog_json_str: String,
    orchestration_model: String,
    max_token_length: usize,
 }
@ -151,10 +188,27 @@ impl OrchestratorModelV1 {
        agent_orchestrations: HashMap<String, Vec<OrchestrationPreference>>,
        orchestration_model: String,
        max_token_length: usize,
+    ) -> Self {
+        Self::with_skills(
+            agent_orchestrations,
+            Vec::new(),
+            orchestration_model,
+            max_token_length,
+        )
+    }
+
+    /// Like `new`, but additionally seeds the orchestrator with an Agent
+    /// Skills catalog. When `skills_catalog` is empty the orchestrator uses
+    /// the routes-only system prompt; otherwise it asks the model to also
+    /// pick zero or more skills from the catalog.
+    pub fn with_skills(
+        agent_orchestrations: HashMap<String, Vec<OrchestrationPreference>>,
+        skills_catalog: Vec<SkillRef>,
+        orchestration_model: String,
+        max_token_length: usize,
    ) -> Self {
        let agent_orchestration_values: Vec<OrchestrationPreference> =
            agent_orchestrations.values().flatten().cloned().collect();
-        // Format routes: each route as JSON on its own line with standard spacing
        let agent_orchestration_json_str = agent_orchestration_values
            .iter()
            .map(to_spaced_json)
@ -165,19 +219,48 @@ impl OrchestratorModelV1 {
            .flat_map(|(model, prefs)| prefs.iter().map(|pref| (pref.name.clone(), model.clone())))
            .collect();

+        let skills_catalog_json_str = render_skills_catalog(&skills_catalog);
+
        OrchestratorModelV1 {
            orchestration_model,
            max_token_length,
            agent_orchestration_json_str,
            agent_orchestration_to_model_map,
+            skills_catalog_json_str,
        }
    }
 }

+/// JSON shape suitable for the `<skills>` block in the orchestrator prompt:
+/// `{"name": "...", "description": "..."}`. Only metadata that helps the
+/// orchestrator pick a skill is included; the full SKILL.md body is injected
+/// separately, after a skill has been selected.
+#[derive(Debug, Clone, Serialize)]
+struct SkillCatalogEntry<'a> {
+    name: &'a str,
+    description: &'a str,
+}
+
+fn render_skills_catalog(skills: &[SkillRef]) -> String {
+    skills
+        .iter()
+        .map(|s| SkillCatalogEntry {
+            name: &s.name,
+            description: s.catalog_description(),
+        })
+        .map(|entry| to_spaced_json(&entry))
+        .collect::<Vec<String>>()
+        .join("\n")
+}
+
 #[derive(Debug, Clone, Serialize, Deserialize)]
 struct AgentOrchestratorResponse {
-    /// The route field now expects an array of route names: ["route_name_1", "route_name_2", ...]
+    /// The route field expects an array of route names: ["route_name_1", "route_name_2", ...].
    pub route: Option<Vec<String>>,
+    /// Optional array of Agent Skill names the orchestrator chose to activate.
+    /// Absent or empty when no skills should be loaded for this turn.
+    #[serde(default)]
+    pub skills: Option<Vec<String>>,
 }

 const TOKEN_LENGTH_DIVISOR: usize = 4; // Approximate token length divisor for UTF-8 characters
@ -209,7 +292,13 @@ impl OrchestratorModel for OrchestratorModelV1 {
        // Ensure the conversation does not exceed the configured token budget.
        // We use `len() / TOKEN_LENGTH_DIVISOR` as a cheap token estimate to
        // avoid running a real tokenizer on the hot path.
-        let mut token_count = ARCH_ORCHESTRATOR_V1_SYSTEM_PROMPT.len() / TOKEN_LENGTH_DIVISOR;
+        let template_len = if self.skills_catalog_json_str.is_empty() {
+            ARCH_ORCHESTRATOR_V1_SYSTEM_PROMPT.len()
+        } else {
+            ARCH_ORCHESTRATOR_V1_SYSTEM_PROMPT_WITH_SKILLS.len()
+                + self.skills_catalog_json_str.len()
+        };
+        let mut token_count = template_len / TOKEN_LENGTH_DIVISOR;
        let mut selected_messages_list_reversed: Vec<Message> = vec![];
        for (selected_messsage_count, message) in messages_vec.iter().rev().enumerate() {
            let message_text = message.content.extract_text();
@ -289,14 +378,16 @@ impl OrchestratorModel for OrchestratorModelV1 {
        // Generate the orchestrator request message based on the usage preferences.
        // If preferences are passed in request then we use them;
        // Otherwise, we use the default orchestration modelpreferences.
-        let orchestrator_message =
-            match convert_to_orchestrator_preferences(usage_preferences_from_request) {
-                Some(prefs) => generate_orchestrator_message(&prefs, &selected_conversation_list),
-                None => generate_orchestrator_message(
-                    &self.agent_orchestration_json_str,
-                    &selected_conversation_list,
-                ),
-            };
+        let routes_block = match convert_to_orchestrator_preferences(usage_preferences_from_request)
+        {
+            Some(prefs) => prefs,
+            None => self.agent_orchestration_json_str.clone(),
+        };
+        let orchestrator_message = generate_orchestrator_message(
+            &routes_block,
+            &self.skills_catalog_json_str,
+            &selected_conversation_list,
+        );

        ChatCompletionsRequest {
            model: self.orchestration_model.clone(),
@ -316,7 +407,7 @@ impl OrchestratorModel for OrchestratorModelV1 {
        &self,
        content: &str,
        usage_preferences: &Option<Vec<AgentUsagePreference>>,
-    ) -> Result<Option<Vec<(String, String)>>> {
+    ) -> Result<Option<OrchestratorSelection>> {
        if content.is_empty() {
            return Ok(None);
        }
@ -326,20 +417,21 @@ impl OrchestratorModel for OrchestratorModelV1 {

        let selected_routes = orchestrator_response.route.unwrap_or_default();

-        // Filter out empty routes
        let valid_routes: Vec<String> = selected_routes
            .into_iter()
            .filter(|route| !route.is_empty())
            .collect();

-        if valid_routes.is_empty() {
-            return Ok(None);
-        }
+        let selected_skills: Vec<String> = orchestrator_response
+            .skills
+            .unwrap_or_default()
+            .into_iter()
+            .filter(|s| !s.is_empty())
+            .collect();

-        let mut result: Vec<(String, String)> = Vec::new();
+        let mut routes: Vec<(String, String)> = Vec::new();

        if let Some(usage_preferences) = usage_preferences {
-            // If usage preferences are defined, we need to find the model that matches each selected route
            for selected_route in valid_routes {
                let model_name: Option<String> = usage_preferences
                    .iter()
@ -351,7 +443,7 @@ impl OrchestratorModel for OrchestratorModelV1 {
                    .map(|pref| pref.model.clone());

                if let Some(model_name) = model_name {
-                    result.push((selected_route, model_name));
+                    routes.push((selected_route, model_name));
                } else {
                    warn!(
                        route = %selected_route,
@ -361,14 +453,13 @@ impl OrchestratorModel for OrchestratorModelV1 {
                }
            }
        } else {
-            // If no usage preferences are passed in request then use the default orchestration model preferences
            for selected_route in valid_routes {
                if let Some(model) = self
                    .agent_orchestration_to_model_map
                    .get(&selected_route)
                    .cloned()
                {
-                    result.push((selected_route, model));
+                    routes.push((selected_route, model));
                } else {
                    warn!(
                        route = %selected_route,
@ -379,11 +470,14 @@ impl OrchestratorModel for OrchestratorModelV1 {
            }
        }

-        if result.is_empty() {
+        if routes.is_empty() && selected_skills.is_empty() {
            return Ok(None);
        }

-        Ok(Some(result))
+        Ok(Some(OrchestratorSelection {
+            routes,
+            skills: selected_skills,
+        }))
    }

    fn get_model_name(&self) -> String {
@ -391,7 +485,11 @@ impl OrchestratorModel for OrchestratorModelV1 {
    }
 }

-fn generate_orchestrator_message(prefs: &str, selected_conversation_list: &Vec<Message>) -> String {
+fn generate_orchestrator_message(
+    prefs: &str,
+    skills_catalog: &str,
+    selected_conversation_list: &Vec<Message>,
+) -> String {
    // Format conversation with 4-space indentation (equivalent to Python's json.dumps(obj, indent=4))
    let formatter = serde_json::ser::PrettyFormatter::with_indent(b"    ");
    let mut conversation_buf = Vec::new();
@ -399,9 +497,19 @@ fn generate_orchestrator_message(prefs: &str, selected_conversation_list: &Vec<M
    SerializeTrait::serialize(&selected_conversation_list, &mut serializer).unwrap();
    let conversation_json = String::from_utf8(conversation_buf).unwrap_or_default();

-    ARCH_ORCHESTRATOR_V1_SYSTEM_PROMPT
+    let template = if skills_catalog.is_empty() {
+        ARCH_ORCHESTRATOR_V1_SYSTEM_PROMPT
+    } else {
+        ARCH_ORCHESTRATOR_V1_SYSTEM_PROMPT_WITH_SKILLS
+    };
+
+    let mut out = template
        .replace("{routes}", prefs)
-        .replace("{conversation}", &conversation_json)
+        .replace("{conversation}", &conversation_json);
+    if !skills_catalog.is_empty() {
+        out = out.replace("{skills}", skills_catalog);
+    }
+    out
 }

 fn convert_to_orchestrator_preferences(
@ -1349,23 +1457,30 @@ If no routes are needed, return an empty list for `route`.
        let orchestrator =
            OrchestratorModelV1::new(agent_orchestrations, "test-model".to_string(), 2000);

+        fn routes(pairs: &[(&str, &str)]) -> OrchestratorSelection {
+            OrchestratorSelection {
+                routes: pairs
+                    .iter()
+                    .map(|(r, m)| (r.to_string(), m.to_string()))
+                    .collect(),
+                skills: Vec::new(),
+            }
+        }
+
        // Case 1: Valid JSON with single route in array
        let input = r#"{"route": ["Image generation"]}"#;
        let result = orchestrator.parse_response(input, &None).unwrap();
-        assert_eq!(
-            result,
-            Some(vec![("Image generation".to_string(), "gpt-4o".to_string())])
-        );
+        assert_eq!(result, Some(routes(&[("Image generation", "gpt-4o")])));

        // Case 2: Valid JSON with multiple routes in array
        let input = r#"{"route": ["Image generation", "Code generation"]}"#;
        let result = orchestrator.parse_response(input, &None).unwrap();
        assert_eq!(
            result,
-            Some(vec![
-                ("Image generation".to_string(), "gpt-4o".to_string()),
-                ("Code generation".to_string(), "gpt-4o".to_string())
-            ])
+            Some(routes(&[
+                ("Image generation", "gpt-4o"),
+                ("Code generation", "gpt-4o")
+            ]))
        );

        // Case 3: Valid JSON with empty array
@ -1396,14 +1511,65 @@ If no routes are needed, return an empty list for `route`.
        // Case 7: Single quotes and \n in JSON
        let input = "{'route': ['Image generation']}\\n";
        let result = orchestrator.parse_response(input, &None).unwrap();
-        assert_eq!(
-            result,
-            Some(vec![("Image generation".to_string(), "gpt-4o".to_string())])
-        );
+        assert_eq!(result, Some(routes(&[("Image generation", "gpt-4o")])));

        // Case 8: Array with unknown route (not in orchestrations map)
        let input = r#"{"route": ["Unknown route"]}"#;
        let result = orchestrator.parse_response(input, &None).unwrap();
        assert_eq!(result, None);
+
+        // Case 9: Routes plus selected skills are propagated through.
+        let input = r#"{"route": ["Image generation"], "skills": ["pdf-processing"]}"#;
+        let result = orchestrator.parse_response(input, &None).unwrap().unwrap();
+        assert_eq!(result.routes.len(), 1);
+        assert_eq!(result.skills, vec!["pdf-processing".to_string()]);
+
+        // Case 10: Skills-only selection (no routes) still surfaces as Some.
+        let input = r#"{"route": [], "skills": ["pdf-processing"]}"#;
+        let result = orchestrator.parse_response(input, &None).unwrap().unwrap();
+        assert!(result.routes.is_empty());
+        assert_eq!(result.skills, vec!["pdf-processing".to_string()]);
+    }
+
+    #[test]
+    fn test_system_prompt_with_skills_block() {
+        let orchestrator = OrchestratorModelV1::with_skills(
+            HashMap::from([(
+                "gpt-4o".to_string(),
+                vec![OrchestrationPreference {
+                    name: "Image generation".to_string(),
+                    description: "generating image".to_string(),
+                }],
+            )]),
+            vec![SkillRef {
+                name: "pdf-processing".to_string(),
+                description: "Extract structured data from PDFs.".to_string(),
+                path: None,
+                base_dir: None,
+                body: None,
+                scope: None,
+                compatibility: None,
+                license: None,
+                metadata: None,
+                allowed_tools: None,
+            }],
+            "test-model".to_string(),
+            usize::MAX,
+        );
+
+        let conversation: Vec<Message> = serde_json::from_str(
+            r#"[{"role": "user", "content": "extract the invoice totals from this pdf"}]"#,
+        )
+        .unwrap();
+        let req = orchestrator.generate_request(&conversation, &None);
+        let prompt = req.messages[0].content.extract_text();
+
+        assert!(prompt.contains("<skills>"));
+        assert!(prompt.contains("</skills>"));
+        assert!(prompt.contains(r#""name": "pdf-processing""#));
+        assert!(prompt.contains("Extract structured data from PDFs."));
+        // Response format documentation must mention the `skills` array
+        // so the orchestrator emits it.
+        assert!(prompt.contains(r#""skills""#));
    }
 }
--- a/crates/brightstaff/src/router/stress_tests.rs
+++ b/crates/brightstaff/src/router/stress_tests.rs
@ -33,6 +33,7 @@ mod tests {
                    "openai/gpt-4o".to_string(),
                    "openai/gpt-4o-mini".to_string(),
                ],
+                skills: None,
                selection_policy: SelectionPolicy {
                    prefer: SelectionPreference::None,
                },
@ -44,6 +45,7 @@ mod tests {
                    "anthropic/claude-3-sonnet".to_string(),
                    "openai/gpt-4o-mini".to_string(),
                ],
+                skills: None,
                selection_policy: SelectionPolicy {
                    prefer: SelectionPreference::None,
                },
--- a/crates/common/src/configuration.rs
+++ b/crates/common/src/configuration.rs
@ -163,6 +163,12 @@ pub struct TopLevelRoutingPreference {
    pub name: String,
    pub description: String,
    pub models: Vec<String>,
+    /// Agent Skills associated with this route. When Plano-Orchestrator
+    /// selects this route, every skill listed here is also offered to the
+    /// orchestrator in the `<skills>` block; selected skills have their
+    /// SKILL.md body prepended to the upstream system prompt.
+    #[serde(default)]
+    pub skills: Option<Vec<String>>,
    #[serde(default)]
    pub selection_policy: SelectionPolicy,
 }
@ -224,6 +230,17 @@ pub struct Configuration {
    pub state_storage: Option<StateStorageConfig>,
    pub routing_preferences: Option<Vec<TopLevelRoutingPreference>>,
    pub model_metrics_sources: Option<Vec<MetricsSource>>,
+    /// Agent Skills (https://agentskills.io) installed for this project.
+    ///
+    /// The Plano CLI discovers `.plano/skills/<name>/SKILL.md` files at render
+    /// time and materializes them into this list with `body` already loaded so
+    /// downstream consumers do not need filesystem access. Skills are scoped
+    /// to specific routes via `routing_preferences[].skills`; Plano-Orchestrator
+    /// receives a `<skills>` block alongside `<routes>` for any skills attached
+    /// to candidate routes, and selected skills have their SKILL.md body
+    /// injected into the upstream system prompt.
+    #[serde(default)]
+    pub skills: Option<Vec<SkillRef>>,
 }

 #[derive(Debug, Clone, Serialize, Deserialize, Default)]
@ -611,6 +628,45 @@ pub struct PromptTarget {
    pub auto_llm_dispatch_on_response: Option<bool>,
 }

+/// An Agent Skill (https://agentskills.io) as materialized by the Plano CLI.
+///
+/// At runtime brightstaff and the WASM filters reason over the catalog
+/// (`name` + `description`) and, when a skill is selected, inject the
+/// pre-loaded `body` into the downstream system prompt.
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
+pub struct SkillRef {
+    pub name: String,
+    pub description: String,
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub path: Option<String>,
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub base_dir: Option<String>,
+    /// Full SKILL.md markdown body (post-frontmatter). Inlined here at render
+    /// time so the WASM sandbox does not need filesystem access.
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub body: Option<String>,
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub scope: Option<String>,
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub compatibility: Option<String>,
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub license: Option<String>,
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub metadata: Option<HashMap<String, String>>,
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub allowed_tools: Option<String>,
+}
+
+impl SkillRef {
+    /// Best-effort short summary suitable for the `<skills>` block sent to
+    /// Plano-Orchestrator: only the public-facing description, never the
+    /// full SKILL.md body. The body is injected separately, after a skill
+    /// has been selected.
+    pub fn catalog_description(&self) -> &str {
+        &self.description
+    }
+}
+
 // convert PromptTarget to ChatCompletionTool
 impl From<&PromptTarget> for ChatCompletionTool {
    fn from(val: &PromptTarget) -> Self {
@ -807,4 +863,34 @@ disable_signals: false
        let overrides: super::Overrides = serde_yaml::from_str(yaml_missing).unwrap();
        assert_eq!(overrides.disable_signals, None);
    }
+
+    #[test]
+    fn test_top_level_routing_preference_skills_deserialize() {
+        let yaml = r#"
+name: code review
+description: reviewing, analyzing, and suggesting improvements to existing code
+models:
+  - openai/gpt-4o
+skills:
+  - code-review-skill
+"#;
+        let pref: super::TopLevelRoutingPreference = serde_yaml::from_str(yaml).unwrap();
+        assert_eq!(pref.name, "code review");
+        assert_eq!(
+            pref.skills.as_deref(),
+            Some(&["code-review-skill".to_string()][..])
+        );
+    }
+
+    #[test]
+    fn test_top_level_routing_preference_skills_optional() {
+        let yaml = r#"
+name: code generation
+description: generating new code
+models:
+  - openai/gpt-4o
+"#;
+        let pref: super::TopLevelRoutingPreference = serde_yaml::from_str(yaml).unwrap();
+        assert!(pref.skills.is_none());
+    }
 }
--- a/crates/common/src/lib.rs
+++ b/crates/common/src/lib.rs
@ -8,6 +8,7 @@ pub mod path;
 pub mod pii;
 pub mod ratelimit;
 pub mod routing;
+pub mod skills_runtime;
 pub mod stats;
 pub mod tokenizer;
 pub mod traces;
--- a/crates/common/src/skills_runtime.rs
+++ b/crates/common/src/skills_runtime.rs
@ -0,0 +1,415 @@
+//! Runtime helpers for handling Agent Skills selected by Plano-Orchestrator.
+//!
+//! These functions live in `common` (rather than `brightstaff` or a WASM
+//! crate) so they can be unit-tested on the native target without dragging
+//! in proxy-wasm host-call symbols or tokio runtime dependencies.
+
+use std::collections::{HashMap, HashSet};
+
+use crate::configuration::{SkillRef, TopLevelRoutingPreference};
+
+/// Upper bound on the byte length of a single skill body the runtime will
+/// inject into an upstream system prompt. SKILL.md files are typically a few
+/// kilobytes; this guard keeps a single oversized or malicious skill from
+/// blowing the downstream model's context window. Bodies longer than this
+/// are tail-trimmed with a marker line. ~32 KiB ≈ 8K tokens at the
+/// 4-bytes-per-token heuristic used elsewhere in brightstaff.
+pub const MAX_SKILL_BODY_BYTES: usize = 32 * 1024;
+
+const SKILL_BODY_TRUNCATION_MARKER: &str = "\n…[skill body truncated]\n";
+
+/// Outcome of resolving a list of orchestrator-selected skill names against
+/// a route's `skills:` allow-list and the runtime catalog. Callers should
+/// emit `warn!` for each name in `dropped_not_allowed` / `dropped_unknown`
+/// so misconfigured allow-lists and hallucinated picks are observable.
+#[derive(Debug, Default)]
+pub struct SkillResolution<'a> {
+    /// Skills that survived both the allow-list and catalog filters, in
+    /// orchestrator-selected order with duplicates removed.
+    pub activated: Vec<&'a SkillRef>,
+    /// Names the orchestrator selected that are NOT in the chosen route's
+    /// `skills:` allow-list. Typically a cross-route mention — the model
+    /// pulled a skill name from the global catalog that this route did not
+    /// expose. Callers should `warn!`.
+    pub dropped_not_allowed: Vec<String>,
+    /// Names that ARE allow-listed for the route but are missing from the
+    /// runtime catalog (skill removed / never installed / hallucinated).
+    pub dropped_unknown: Vec<String>,
+}
+
+/// Build the orchestrator-visible skills catalog from the union of every
+/// skill name referenced under `routing_preferences[].skills`. Skills not
+/// referenced by any route are excluded — they would just clutter the
+/// `<skills>` block with no way for the orchestrator to attach them. The
+/// result preserves `catalog` order and is deduplicated by name.
+pub fn referenced_skills_catalog(
+    catalog: &[SkillRef],
+    routes: &HashMap<String, TopLevelRoutingPreference>,
+) -> Vec<SkillRef> {
+    let mut referenced: HashSet<&str> = HashSet::new();
+    for route in routes.values() {
+        if let Some(names) = route.skills.as_ref() {
+            for name in names {
+                referenced.insert(name.as_str());
+            }
+        }
+    }
+
+    let mut out: Vec<SkillRef> = Vec::new();
+    let mut seen: HashSet<String> = HashSet::new();
+    for skill in catalog {
+        if referenced.contains(skill.name.as_str()) && seen.insert(skill.name.clone()) {
+            out.push(skill.clone());
+        }
+    }
+    out
+}
+
+/// Filter `selected` skill names to those that are both (a) allow-listed
+/// for the chosen route via `route_allowlist` and (b) present in `catalog`,
+/// preserving orchestrator order and deduplicating. Drops are reported on
+/// the `SkillResolution` struct so callers can `warn!` and surface
+/// misconfiguration without re-walking the lists.
+pub fn resolve_for_route<'a>(
+    catalog: &'a [SkillRef],
+    route_allowlist: &[String],
+    selected: &[String],
+) -> SkillResolution<'a> {
+    let allowed: HashSet<&str> = route_allowlist.iter().map(String::as_str).collect();
+    let mut activated: Vec<&SkillRef> = Vec::with_capacity(selected.len());
+    let mut taken: HashSet<&str> = HashSet::new();
+    let mut dropped_not_allowed: Vec<String> = Vec::new();
+    let mut dropped_unknown: Vec<String> = Vec::new();
+    for name in selected {
+        if !taken.insert(name.as_str()) {
+            continue;
+        }
+        if !allowed.contains(name.as_str()) {
+            dropped_not_allowed.push(name.clone());
+            continue;
+        }
+        match catalog.iter().find(|s| &s.name == name) {
+            Some(skill) => activated.push(skill),
+            None => dropped_unknown.push(name.clone()),
+        }
+    }
+    SkillResolution {
+        activated,
+        dropped_not_allowed,
+        dropped_unknown,
+    }
+}
+
+/// Resolve a list of orchestrator-selected skill names to their `SkillRef`s
+/// directly against the catalog, without any per-route allow-list. Use this
+/// for the "skills-only" path documented in `docs/source/resources/skills.rst`
+/// where the orchestrator returns skills but no route — the catalog itself
+/// (already pre-filtered to skills referenced by SOME route via
+/// `referenced_skills_catalog`) is the effective allow-list. Unknown names
+/// are dropped silently; results are deduplicated by name preserving order.
+pub fn resolve_selected_skills<'a>(
+    skills: &'a [SkillRef],
+    selected_names: &[String],
+) -> Vec<&'a SkillRef> {
+    let mut out: Vec<&SkillRef> = Vec::with_capacity(selected_names.len());
+    let mut seen: HashSet<&str> = HashSet::new();
+    for name in selected_names {
+        if !seen.insert(name.as_str()) {
+            continue;
+        }
+        if let Some(skill) = skills.iter().find(|s| &s.name == name) {
+            out.push(skill);
+        }
+    }
+    out
+}
+
+/// Append the bodies of activated skills to a system prompt, wrapped in
+/// `<skill_content name="..." [base_dir="..."]>…</skill_content>` tags so a
+/// future context-management pass can recognize and recompact them.
+///
+/// Behavior contract (relied on by `brightstaff::handlers::llm::model_selection`):
+///
+/// * Returns `None` only when no base prompt was supplied **and** no skills
+///   were activated. Otherwise always returns `Some`.
+/// * The base prompt (if any) is kept verbatim and the skill block is
+///   appended after a blank line.
+/// * Each skill body is tail-trimmed at `MAX_SKILL_BODY_BYTES` bytes (UTF-8
+///   boundary safe) with a truncation marker, so a single oversized
+///   SKILL.md cannot blow the downstream context window.
+/// * `name` and `base_dir` are XML-attribute-escaped (`&`, `<`, `>`, `"`)
+///   so a maliciously named skill cannot break out of the wrapper. Skill
+///   names are already validated upstream, but defense-in-depth matters
+///   here because the wrapper is part of the LLM's system prompt.
+pub fn augment_system_prompt_with_skills(
+    base_system_prompt: Option<String>,
+    activated_skills: &[&SkillRef],
+) -> Option<String> {
+    if activated_skills.is_empty() {
+        return base_system_prompt;
+    }
+    let mut buf = String::new();
+    if let Some(base) = base_system_prompt {
+        if !base.is_empty() {
+            buf.push_str(&base);
+            buf.push('\n');
+            buf.push('\n');
+        }
+    }
+    buf.push_str(
+        "The following Agent Skills have been activated for this request. \
+         Follow their instructions when relevant; resolve relative paths \
+         against each skill's base directory.\n\n",
+    );
+    for skill in activated_skills {
+        buf.push_str(&format!(
+            "<skill_content name=\"{}\"",
+            xml_attr_escape(&skill.name)
+        ));
+        if let Some(base_dir) = skill.base_dir.as_deref() {
+            buf.push_str(&format!(" base_dir=\"{}\"", xml_attr_escape(base_dir)));
+        }
+        buf.push_str(">\n");
+        if let Some(body) = skill.body.as_deref() {
+            buf.push_str(&truncate_skill_body(body));
+            buf.push('\n');
+        } else {
+            buf.push_str(&format!(
+                "(skill description) {}\n",
+                xml_attr_escape(&skill.description)
+            ));
+        }
+        buf.push_str("</skill_content>\n\n");
+    }
+    Some(buf.trim_end().to_string())
+}
+
+/// Escape a string for use inside an XML attribute value (double-quoted).
+/// Quotes `&`, `<`, `>`, and `"`; leaves single quotes alone since the
+/// wrapper always uses double quotes.
+fn xml_attr_escape(s: &str) -> String {
+    let mut out = String::with_capacity(s.len());
+    for ch in s.chars() {
+        match ch {
+            '&' => out.push_str("&amp;"),
+            '<' => out.push_str("&lt;"),
+            '>' => out.push_str("&gt;"),
+            '"' => out.push_str("&quot;"),
+            _ => out.push(ch),
+        }
+    }
+    out
+}
+
+/// Tail-trim `body` to at most `MAX_SKILL_BODY_BYTES` bytes, respecting
+/// UTF-8 character boundaries. Appends a marker so the downstream model
+/// can tell content was dropped. Pass-through for short bodies.
+fn truncate_skill_body(body: &str) -> String {
+    let trimmed = body.trim_end();
+    if trimmed.len() <= MAX_SKILL_BODY_BYTES {
+        return trimmed.to_string();
+    }
+    // Reserve room for the marker so the final length is still within the
+    // budget even when the marker is added.
+    let budget = MAX_SKILL_BODY_BYTES.saturating_sub(SKILL_BODY_TRUNCATION_MARKER.len());
+    let mut end = budget;
+    while end > 0 && !trimmed.is_char_boundary(end) {
+        end -= 1;
+    }
+    let mut out = String::with_capacity(end + SKILL_BODY_TRUNCATION_MARKER.len());
+    out.push_str(&trimmed[..end]);
+    out.push_str(SKILL_BODY_TRUNCATION_MARKER);
+    out
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::configuration::SelectionPolicy;
+
+    fn skill(name: &str, body: &str) -> SkillRef {
+        SkillRef {
+            name: name.to_string(),
+            description: format!("desc for {}", name),
+            path: Some(format!("/skills/{}/SKILL.md", name)),
+            base_dir: Some(format!("/skills/{}", name)),
+            body: Some(body.to_string()),
+            scope: Some("project".to_string()),
+            compatibility: None,
+            license: None,
+            metadata: None,
+            allowed_tools: None,
+        }
+    }
+
+    fn route(name: &str, skill_names: Option<Vec<&str>>) -> TopLevelRoutingPreference {
+        TopLevelRoutingPreference {
+            name: name.to_string(),
+            description: format!("desc for {}", name),
+            models: vec!["openai/gpt-4o".to_string()],
+            skills: skill_names.map(|v| v.into_iter().map(String::from).collect()),
+            selection_policy: SelectionPolicy::default(),
+        }
+    }
+
+    fn routes_map(
+        routes: Vec<TopLevelRoutingPreference>,
+    ) -> HashMap<String, TopLevelRoutingPreference> {
+        routes.into_iter().map(|r| (r.name.clone(), r)).collect()
+    }
+
+    // --- referenced_skills_catalog ---
+
+    #[test]
+    fn referenced_catalog_is_union_across_routes() {
+        let catalog = vec![
+            skill("pdf", "extract"),
+            skill("code-review", "review"),
+            skill("never-used", "x"),
+        ];
+        let routes = routes_map(vec![
+            route("docs", Some(vec!["pdf"])),
+            route("review", Some(vec!["code-review"])),
+            route("other", None),
+        ]);
+        let out = referenced_skills_catalog(&catalog, &routes);
+        let names: Vec<_> = out.iter().map(|s| s.name.as_str()).collect();
+        assert!(names.contains(&"pdf"));
+        assert!(names.contains(&"code-review"));
+        assert!(!names.contains(&"never-used"));
+    }
+
+    #[test]
+    fn referenced_catalog_deduplicates_when_multiple_routes_share_a_skill() {
+        let catalog = vec![skill("pdf", "extract")];
+        let routes = routes_map(vec![
+            route("a", Some(vec!["pdf"])),
+            route("b", Some(vec!["pdf"])),
+        ]);
+        let out = referenced_skills_catalog(&catalog, &routes);
+        assert_eq!(out.len(), 1);
+    }
+
+    // --- resolve_for_route ---
+
+    #[test]
+    fn resolve_for_route_keeps_allowlisted_skills_in_orchestrator_order() {
+        let catalog = vec![skill("a", ""), skill("b", ""), skill("c", "")];
+        let allow = vec!["a".to_string(), "b".to_string(), "c".to_string()];
+        let selected = vec!["c".to_string(), "a".to_string()];
+        let r = resolve_for_route(&catalog, &allow, &selected);
+        let names: Vec<_> = r.activated.iter().map(|s| s.name.as_str()).collect();
+        assert_eq!(names, vec!["c", "a"]);
+        assert!(r.dropped_not_allowed.is_empty());
+        assert!(r.dropped_unknown.is_empty());
+    }
+
+    #[test]
+    fn resolve_for_route_drops_cross_route_skill_into_not_allowed() {
+        let catalog = vec![skill("pdf", ""), skill("payment", "")];
+        let allow = vec!["pdf".to_string()]; // route only allows pdf
+        let selected = vec!["pdf".to_string(), "payment".to_string()];
+        let r = resolve_for_route(&catalog, &allow, &selected);
+        assert_eq!(r.activated.len(), 1);
+        assert_eq!(r.activated[0].name, "pdf");
+        assert_eq!(r.dropped_not_allowed, vec!["payment".to_string()]);
+        assert!(r.dropped_unknown.is_empty());
+    }
+
+    #[test]
+    fn resolve_for_route_drops_hallucinated_skill_into_unknown() {
+        let catalog = vec![skill("pdf", "")];
+        let allow = vec!["pdf".to_string(), "imaginary".to_string()];
+        let selected = vec!["pdf".to_string(), "imaginary".to_string()];
+        let r = resolve_for_route(&catalog, &allow, &selected);
+        assert_eq!(r.activated.len(), 1);
+        assert_eq!(r.activated[0].name, "pdf");
+        assert!(r.dropped_not_allowed.is_empty());
+        assert_eq!(r.dropped_unknown, vec!["imaginary".to_string()]);
+    }
+
+    #[test]
+    fn resolve_for_route_deduplicates_repeats() {
+        let catalog = vec![skill("pdf", "")];
+        let allow = vec!["pdf".to_string()];
+        let selected = vec!["pdf".to_string(), "pdf".to_string(), "pdf".to_string()];
+        let r = resolve_for_route(&catalog, &allow, &selected);
+        assert_eq!(r.activated.len(), 1);
+    }
+
+    // --- resolve_selected_skills (skills-only path) ---
+
+    #[test]
+    fn resolve_selected_skills_drops_unknown_and_dedupes() {
+        let catalog = vec![
+            skill("pdf-processing", "extract"),
+            skill("code-review", "review"),
+        ];
+        let names = vec![
+            "code-review".to_string(),
+            "ghost".to_string(),
+            "code-review".to_string(),
+            "pdf-processing".to_string(),
+        ];
+        let resolved = resolve_selected_skills(&catalog, &names);
+        assert_eq!(resolved.len(), 2);
+        assert_eq!(resolved[0].name, "code-review");
+        assert_eq!(resolved[1].name, "pdf-processing");
+    }
+
+    // --- augment_system_prompt_with_skills ---
+
+    #[test]
+    fn augment_passthrough_with_no_skills() {
+        let augmented = augment_system_prompt_with_skills(Some("you are helpful".to_string()), &[]);
+        assert_eq!(augmented.as_deref(), Some("you are helpful"));
+    }
+
+    #[test]
+    fn augment_includes_skill_bodies() {
+        let s = skill("pdf-processing", "extract text and tables");
+        let augmented =
+            augment_system_prompt_with_skills(Some("you are helpful".to_string()), &[&s])
+                .expect("augmented");
+        assert!(augmented.starts_with("you are helpful"));
+        assert!(augmented.contains("<skill_content name=\"pdf-processing\""));
+        assert!(augmented.contains("extract text and tables"));
+        assert!(augmented.contains("base_dir=\"/skills/pdf-processing\""));
+    }
+
+    #[test]
+    fn augment_without_base_prompt_still_works() {
+        let s = skill("code-review", "look at diffs");
+        let augmented = augment_system_prompt_with_skills(None, &[&s]).expect("augmented");
+        assert!(augmented.contains("<skill_content name=\"code-review\""));
+        assert!(augmented.contains("look at diffs"));
+    }
+
+    #[test]
+    fn augment_xml_escapes_skill_name_and_base_dir() {
+        let mut s = skill("safe-name", "body");
+        s.name = "bad\"name".to_string();
+        s.base_dir = Some("/path/with\"quote".to_string());
+        let augmented = augment_system_prompt_with_skills(None, &[&s]).expect("augmented");
+        // Raw double-quote must NOT appear inside the attribute value — only
+        // its escaped form. Otherwise it would close the attribute and let a
+        // skill name inject arbitrary attributes / break out of the wrapper.
+        assert!(augmented.contains("name=\"bad&quot;name\""));
+        assert!(augmented.contains("base_dir=\"/path/with&quot;quote\""));
+    }
+
+    #[test]
+    fn augment_truncates_oversized_skill_body() {
+        let big_body: String = "a".repeat(MAX_SKILL_BODY_BYTES * 2);
+        let s = skill("huge", &big_body);
+        let augmented = augment_system_prompt_with_skills(None, &[&s]).expect("augmented");
+        // Truncation marker is present, so the body did NOT pass through verbatim.
+        assert!(augmented.contains("[skill body truncated]"));
+        // And the body slice cannot be longer than MAX_SKILL_BODY_BYTES + a
+        // little wrapper overhead — definitely not 2× the cap.
+        let body_section_end = augmented.find("</skill_content>").unwrap();
+        let body_section_start = augmented.find(">\n").unwrap() + 2;
+        let body_len = body_section_end - body_section_start;
+        assert!(body_len <= MAX_SKILL_BODY_BYTES + 64);
+    }
+}
--- a/crates/prompt_gateway/src/stream_context.rs
+++ b/crates/prompt_gateway/src/stream_context.rs
@ -209,19 +209,15 @@ impl StreamContext {
            } else {
                info!("no default prompt target found, forwarding request to upstream llm");
                let mut messages = Vec::new();
-                // add system prompt
-                match self.system_prompt.as_ref() {
-                    None => {}
-                    Some(system_prompt) => {
-                        let system_prompt_message = Message {
-                            role: SYSTEM_ROLE.to_string(),
-                            content: Some(ContentType::Text(system_prompt.clone())),
-                            model: None,
-                            tool_calls: None,
-                            tool_call_id: None,
-                        };
-                        messages.push(system_prompt_message);
-                    }
+                if let Some(system_prompt) = self.system_prompt.as_ref().clone() {
+                    let system_prompt_message = Message {
+                        role: SYSTEM_ROLE.to_string(),
+                        content: Some(ContentType::Text(system_prompt)),
+                        model: None,
+                        tool_calls: None,
+                        tool_call_id: None,
+                    };
+                    messages.push(system_prompt_message);
                }

                messages.append(