plano/crates/brightstaff/src/router/orchestrator.rs
Spherrrical a0f77fddec fix(skills): honor skills-only orchestrator decisions, dedupe runtime helpers, warn on dropped picks
Addresses the code-review findings on 7f5bf641:

- Honor skills-only decisions: RouteDecision.route_name is now Option<String> and the orchestrator emits a decision when routes is empty but skills is non-empty. The LLM handler falls back to the originally-requested model and still injects activated skill bodies, matching the contract in docs/source/resources/skills.rst.
- Warn on allow-list misses: resolve_for_route now returns a SkillResolution that splits drops into "not allow-listed for this route" vs "not in catalog (hallucinated)". brightstaff logs each bucket so misconfigured routing_preferences[].skills lists become visible instead of vanishing silently.
- Consolidate runtime: common::skills_runtime is now the single source of truth (referenced_skills_catalog, resolve_for_route, resolve_selected_skills, augment_system_prompt_with_skills). brightstaff drops its local re-implementations and calls into common.
- Tests: 11 new tests in common::skills_runtime (catalog union, allow-list intersection, dedup, hallucination handling, XML escaping, body size cap) and 6 new tests in brightstaff::handlers::llm::model_selection cover inject_activated_skills_into_request, including the first-system-message rule and the Parts->Text flatten — both now documented on the function.
- Cap skill body size at 32 KiB with a UTF-8-safe tail-trim + marker so an oversized SKILL.md cannot blow the downstream context window.
- XML-escape skill name and base_dir in the <skill_content> wrapper as defense-in-depth (names are validated upstream, but the wrapper sits inside the system prompt).
- Bound find_project_root at \$HOME plus a 30-parent depth cap so CLI invocations outside HOME no longer walk to /.
2026-06-22 12:41:06 -07:00

611 lines
22 KiB
Rust

use std::{borrow::Cow, collections::HashMap, sync::Arc, time::Duration};
use common::{
configuration::{
AgentUsagePreference, OrchestrationPreference, SkillRef, TopLevelRoutingPreference,
},
consts::{ARCH_PROVIDER_HINT_HEADER, REQUEST_ID_HEADER},
skills_runtime::{referenced_skills_catalog, resolve_for_route, resolve_selected_skills},
};
use hermesllm::apis::openai::Message;
use hyper::header;
use opentelemetry::global;
use opentelemetry_http::HeaderInjector;
use thiserror::Error;
use tracing::{debug, info, warn};
use super::http::{self, post_and_extract_content};
use super::model_metrics::ModelMetricsService;
use super::orchestrator_model::{OrchestratorModel, OrchestratorSelection};
use crate::metrics as bs_metrics;
use crate::metrics::labels as metric_labels;
use crate::router::orchestrator_model_v1;
use crate::session_cache::SessionCache;
pub use crate::session_cache::CachedRoute;
const DEFAULT_SESSION_TTL_SECONDS: u64 = 600;
pub struct OrchestratorService {
orchestrator_url: String,
client: reqwest::Client,
orchestrator_model: Arc<dyn OrchestratorModel>,
orchestrator_provider_name: String,
top_level_preferences: HashMap<String, TopLevelRoutingPreference>,
/// Agent Skills catalog (deduplicated by name) attached to any
/// `routing_preferences[].skills` list. Empty when no route has skills.
skills_catalog: Vec<SkillRef>,
metrics_service: Option<Arc<ModelMetricsService>>,
session_cache: Option<Arc<dyn SessionCache>>,
session_ttl: Duration,
tenant_header: Option<String>,
}
/// Result of `determine_route`: which route was picked (if any), the
/// ranked candidate models for that route, and the Agent Skill bodies the
/// orchestrator chose to activate alongside it.
///
/// Two valid shapes:
///
/// * **Route + skills (typical):** `route_name = Some(...)`, `models`
/// non-empty, `activated_skills` may be non-empty. Skills are resolved
/// against `routing_preferences[<route>].skills`, so picks that aren't
/// allow-listed for the route are dropped with a `warn!`.
/// * **Skills-only:** `route_name = None`, `models` empty,
/// `activated_skills` non-empty. The orchestrator decided no route
/// needed to change but the user's intent matches one or more skills.
/// Per `docs/source/resources/skills.rst`, the request falls back to the
/// originally-requested model and the skill bodies are injected the
/// same way. Allow-list filtering uses the catalog union (effectively
/// the catalog itself, which is pre-filtered to skills referenced by
/// some route).
#[derive(Debug, Clone, Default)]
pub struct RouteDecision {
pub route_name: Option<String>,
pub models: Vec<String>,
pub activated_skills: Vec<SkillRef>,
}
#[derive(Debug, Error)]
pub enum OrchestrationError {
#[error(transparent)]
Http(#[from] http::HttpError),
#[error("Orchestrator model error: {0}")]
OrchestratorModelError(#[from] super::orchestrator_model::OrchestratorModelError),
}
pub type Result<T> = std::result::Result<T, OrchestrationError>;
impl OrchestratorService {
pub fn new(
orchestrator_url: String,
orchestration_model_name: String,
orchestrator_provider_name: String,
max_token_length: usize,
) -> Self {
let orchestrator_model = Arc::new(orchestrator_model_v1::OrchestratorModelV1::new(
HashMap::new(),
orchestration_model_name,
max_token_length,
));
OrchestratorService {
orchestrator_url,
client: reqwest::Client::new(),
orchestrator_model,
orchestrator_provider_name,
top_level_preferences: HashMap::new(),
skills_catalog: Vec::new(),
metrics_service: None,
session_cache: None,
session_ttl: Duration::from_secs(DEFAULT_SESSION_TTL_SECONDS),
tenant_header: None,
}
}
#[allow(clippy::too_many_arguments)]
pub fn with_routing(
orchestrator_url: String,
orchestration_model_name: String,
orchestrator_provider_name: String,
top_level_prefs: Option<Vec<TopLevelRoutingPreference>>,
metrics_service: Option<Arc<ModelMetricsService>>,
session_ttl_seconds: Option<u64>,
session_cache: Arc<dyn SessionCache>,
tenant_header: Option<String>,
max_token_length: usize,
) -> Self {
Self::with_routing_and_skills(
orchestrator_url,
orchestration_model_name,
orchestrator_provider_name,
top_level_prefs,
None,
metrics_service,
session_ttl_seconds,
session_cache,
tenant_header,
max_token_length,
)
}
/// Like `with_routing`, but also seeds the orchestrator with a catalog of
/// Agent Skills referenced by `routing_preferences[].skills`. The
/// orchestrator gets a `<skills>` block in its system prompt and may
/// select zero or more skills alongside the picked route; this enables
/// the LLM handler to inject the chosen SKILL.md bodies into the
/// upstream request.
#[allow(clippy::too_many_arguments)]
pub fn with_routing_and_skills(
orchestrator_url: String,
orchestration_model_name: String,
orchestrator_provider_name: String,
top_level_prefs: Option<Vec<TopLevelRoutingPreference>>,
skills_catalog: Option<Vec<SkillRef>>,
metrics_service: Option<Arc<ModelMetricsService>>,
session_ttl_seconds: Option<u64>,
session_cache: Arc<dyn SessionCache>,
tenant_header: Option<String>,
max_token_length: usize,
) -> Self {
let top_level_preferences: HashMap<String, TopLevelRoutingPreference> = top_level_prefs
.map_or_else(HashMap::new, |prefs| {
prefs.into_iter().map(|p| (p.name.clone(), p)).collect()
});
let skills_catalog = referenced_skills_catalog(
skills_catalog.as_deref().unwrap_or(&[]),
&top_level_preferences,
);
let orchestrator_model = Arc::new(orchestrator_model_v1::OrchestratorModelV1::with_skills(
HashMap::new(),
skills_catalog.clone(),
orchestration_model_name,
max_token_length,
));
let session_ttl =
Duration::from_secs(session_ttl_seconds.unwrap_or(DEFAULT_SESSION_TTL_SECONDS));
OrchestratorService {
orchestrator_url,
client: reqwest::Client::new(),
orchestrator_model,
orchestrator_provider_name,
top_level_preferences,
skills_catalog,
metrics_service,
session_cache: Some(session_cache),
session_ttl,
tenant_header,
}
}
// ---- Session cache methods ----
#[must_use]
pub fn tenant_header(&self) -> Option<&str> {
self.tenant_header.as_deref()
}
fn session_key<'a>(tenant_id: Option<&str>, session_id: &'a str) -> Cow<'a, str> {
match tenant_id {
Some(t) => Cow::Owned(format!("{t}:{session_id}")),
None => Cow::Borrowed(session_id),
}
}
pub async fn get_cached_route(
&self,
session_id: &str,
tenant_id: Option<&str>,
) -> Option<CachedRoute> {
let cache = self.session_cache.as_ref()?;
let result = cache.get(&Self::session_key(tenant_id, session_id)).await;
bs_metrics::record_session_cache_event(if result.is_some() {
metric_labels::SESSION_CACHE_HIT
} else {
metric_labels::SESSION_CACHE_MISS
});
result
}
pub async fn cache_route(
&self,
session_id: String,
tenant_id: Option<&str>,
model_name: String,
route_name: Option<String>,
) {
if let Some(ref cache) = self.session_cache {
cache
.put(
&Self::session_key(tenant_id, &session_id),
CachedRoute {
model_name,
route_name,
},
self.session_ttl,
)
.await;
bs_metrics::record_session_cache_event(metric_labels::SESSION_CACHE_STORE);
}
}
// ---- LLM routing ----
pub async fn determine_route(
&self,
messages: &[Message],
inline_routing_preferences: Option<Vec<TopLevelRoutingPreference>>,
request_id: &str,
) -> Result<Option<RouteDecision>> {
if messages.is_empty() {
return Ok(None);
}
let inline_top_map: Option<HashMap<String, TopLevelRoutingPreference>> =
inline_routing_preferences
.map(|prefs| prefs.into_iter().map(|p| (p.name.clone(), p)).collect());
if inline_top_map.is_none() && self.top_level_preferences.is_empty() {
return Ok(None);
}
let effective_source = inline_top_map
.as_ref()
.unwrap_or(&self.top_level_preferences);
let effective_prefs: Vec<AgentUsagePreference> = effective_source
.values()
.map(|p| AgentUsagePreference {
model: p.models.first().cloned().unwrap_or_default(),
orchestration_preferences: vec![OrchestrationPreference {
name: p.name.clone(),
description: p.description.clone(),
}],
})
.collect();
let orchestration_result = self
.determine_orchestration(
messages,
Some(effective_prefs),
Some(request_id.to_string()),
)
.await?;
let result = if let Some(ref selection) = orchestration_result {
if selection.routes.len() > 1 {
let all_routes: Vec<&str> = selection
.routes
.iter()
.map(|(name, _)| name.as_str())
.collect();
info!(
routes = ?all_routes,
using = %all_routes.first().unwrap_or(&"none"),
"plano-orchestrator detected multiple intents, using first"
);
}
if let Some((route_name, _)) = selection.routes.first() {
// Route + (optional) skills path.
let top_pref = inline_top_map
.as_ref()
.and_then(|m| m.get(route_name))
.or_else(|| self.top_level_preferences.get(route_name));
if let Some(pref) = top_pref {
let ranked = match &self.metrics_service {
Some(svc) => svc.rank_models(&pref.models, &pref.selection_policy).await,
None => pref.models.clone(),
};
let resolution = resolve_for_route(
&self.skills_catalog,
pref.skills.as_deref().unwrap_or(&[]),
&selection.skills,
);
log_skill_drops(route_name, &resolution);
let activated_skills: Vec<SkillRef> =
resolution.activated.into_iter().cloned().collect();
Some(RouteDecision {
route_name: Some(route_name.clone()),
models: ranked,
activated_skills,
})
} else {
None
}
} else if !selection.skills.is_empty() {
// Skills-only path: orchestrator picked no route but flagged
// skills. Per the documented contract the request still goes
// through with the originally-requested model and the skill
// bodies are injected. The catalog itself is the effective
// allow-list (it's already the union across every route's
// allow-list, so anything in it was deemed safe to expose).
let activated: Vec<SkillRef> =
resolve_selected_skills(&self.skills_catalog, &selection.skills)
.into_iter()
.cloned()
.collect();
if activated.is_empty() {
None
} else {
Some(RouteDecision {
route_name: None,
models: Vec::new(),
activated_skills: activated,
})
}
} else {
None
}
} else {
None
};
info!(
selected_route = ?result.as_ref().map(|r| (&r.route_name, r.models.first(), r.activated_skills.iter().map(|s| s.name.as_str()).collect::<Vec<_>>())),
"plano-orchestrator determined route"
);
Ok(result)
}
// ---- Agent orchestration (existing) ----
pub async fn determine_orchestration(
&self,
messages: &[Message],
usage_preferences: Option<Vec<AgentUsagePreference>>,
request_id: Option<String>,
) -> Result<Option<OrchestratorSelection>> {
if messages.is_empty() {
return Ok(None);
}
if usage_preferences
.as_ref()
.is_none_or(|prefs| prefs.is_empty())
{
return Ok(None);
}
let orchestrator_request = self
.orchestrator_model
.generate_request(messages, &usage_preferences);
debug!(
model = %self.orchestrator_model.get_model_name(),
endpoint = %self.orchestrator_url,
"sending request to plano-orchestrator"
);
let body = serde_json::to_string(&orchestrator_request)
.map_err(super::orchestrator_model::OrchestratorModelError::from)?;
debug!(body = %body, "plano-orchestrator request");
let mut headers = header::HeaderMap::new();
headers.insert(
header::CONTENT_TYPE,
header::HeaderValue::from_static("application/json"),
);
headers.insert(
header::HeaderName::from_static(ARCH_PROVIDER_HINT_HEADER),
header::HeaderValue::from_str(&self.orchestrator_provider_name)
.unwrap_or_else(|_| header::HeaderValue::from_static("plano-orchestrator")),
);
global::get_text_map_propagator(|propagator| {
let cx =
tracing_opentelemetry::OpenTelemetrySpanExt::context(&tracing::Span::current());
propagator.inject_context(&cx, &mut HeaderInjector(&mut headers));
});
if let Some(ref request_id) = request_id {
if let Ok(val) = header::HeaderValue::from_str(request_id) {
headers.insert(header::HeaderName::from_static(REQUEST_ID_HEADER), val);
}
}
headers.insert(
header::HeaderName::from_static("model"),
header::HeaderValue::from_static("plano-orchestrator"),
);
let Some((content, elapsed)) =
post_and_extract_content(&self.client, &self.orchestrator_url, headers, body).await?
else {
return Ok(None);
};
let parsed = self
.orchestrator_model
.parse_response(&content, &usage_preferences)?;
info!(
content = %content.replace("\n", "\\n"),
selected_routes = ?parsed,
response_time_ms = elapsed.as_millis(),
"plano-orchestrator determined routes"
);
Ok(parsed)
}
}
/// Emit `warn!` for any skill names the orchestrator selected but the
/// resolver dropped. Surfacing these is critical for debuggability — a
/// silently-dropped skill is hard to diagnose, and the most common causes
/// (forgetting to add a skill to a route's allow-list, or the orchestrator
/// hallucinating a name) are both fixable once visible.
fn log_skill_drops(route_name: &str, resolution: &common::skills_runtime::SkillResolution<'_>) {
if !resolution.dropped_not_allowed.is_empty() {
warn!(
route = %route_name,
skills = ?resolution.dropped_not_allowed,
"orchestrator selected Agent Skills that are not on this route's allow-list; \
dropping (add them to routing_preferences[].skills if you want this route to use them)"
);
}
if !resolution.dropped_unknown.is_empty() {
warn!(
route = %route_name,
skills = ?resolution.dropped_unknown,
"orchestrator selected Agent Skills that are not in the runtime catalog \
(likely hallucinated or removed)"
);
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::session_cache::memory::MemorySessionCache;
fn make_orchestrator_service(ttl_seconds: u64, max_entries: usize) -> OrchestratorService {
let session_cache = Arc::new(MemorySessionCache::new(max_entries));
OrchestratorService::with_routing(
"http://localhost:12001/v1/chat/completions".to_string(),
"Plano-Orchestrator".to_string(),
"plano-orchestrator".to_string(),
None,
None,
Some(ttl_seconds),
session_cache,
None,
orchestrator_model_v1::MAX_TOKEN_LEN,
)
}
#[tokio::test]
async fn test_cache_miss_returns_none() {
let svc = make_orchestrator_service(600, 100);
assert!(svc
.get_cached_route("unknown-session", None)
.await
.is_none());
}
#[tokio::test]
async fn test_cache_hit_returns_cached_route() {
let svc = make_orchestrator_service(600, 100);
svc.cache_route(
"s1".to_string(),
None,
"gpt-4o".to_string(),
Some("code".to_string()),
)
.await;
let cached = svc.get_cached_route("s1", None).await.unwrap();
assert_eq!(cached.model_name, "gpt-4o");
assert_eq!(cached.route_name, Some("code".to_string()));
}
#[tokio::test]
async fn test_cache_expired_entry_returns_none() {
let svc = make_orchestrator_service(0, 100);
svc.cache_route("s1".to_string(), None, "gpt-4o".to_string(), None)
.await;
assert!(svc.get_cached_route("s1", None).await.is_none());
}
#[tokio::test]
async fn test_expired_entries_not_returned() {
let svc = make_orchestrator_service(0, 100);
svc.cache_route("s1".to_string(), None, "gpt-4o".to_string(), None)
.await;
svc.cache_route("s2".to_string(), None, "claude".to_string(), None)
.await;
assert!(svc.get_cached_route("s1", None).await.is_none());
assert!(svc.get_cached_route("s2", None).await.is_none());
}
#[tokio::test]
async fn test_cache_evicts_oldest_when_full() {
let svc = make_orchestrator_service(600, 2);
svc.cache_route("s1".to_string(), None, "model-a".to_string(), None)
.await;
tokio::time::sleep(Duration::from_millis(10)).await;
svc.cache_route("s2".to_string(), None, "model-b".to_string(), None)
.await;
svc.cache_route("s3".to_string(), None, "model-c".to_string(), None)
.await;
assert!(svc.get_cached_route("s1", None).await.is_none());
assert!(svc.get_cached_route("s2", None).await.is_some());
assert!(svc.get_cached_route("s3", None).await.is_some());
}
// ---- RouteDecision construction ----
fn skill_ref(name: &str) -> SkillRef {
SkillRef {
name: name.to_string(),
description: format!("desc for {name}"),
path: None,
base_dir: None,
body: Some(format!("body for {name}")),
scope: Some("project".to_string()),
compatibility: None,
license: None,
metadata: None,
allowed_tools: None,
}
}
#[test]
fn route_decision_holds_optional_route_name_for_skills_only_path() {
// Regression guard for the docs promise at skills.rst:153-155: a
// skills-only decision must be representable, with no route_name and
// empty models, so the LLM handler falls back to the original model.
let decision = RouteDecision {
route_name: None,
models: Vec::new(),
activated_skills: vec![skill_ref("pdf")],
};
assert!(decision.route_name.is_none());
assert!(decision.models.is_empty());
assert_eq!(decision.activated_skills.len(), 1);
}
#[test]
fn log_skill_drops_does_not_panic_on_empty_resolution() {
// The logger is fire-and-forget. We can't easily assert on the
// emitted warns here without setting up a tracing subscriber, so the
// contract under test is: empty resolutions are silent (no warn
// attempt). Confidence in the warn paths comes from
// common::skills_runtime tests for resolve_for_route, which is the
// function whose dropped_* lists drive this logger.
let empty = common::skills_runtime::SkillResolution::default();
log_skill_drops("any", &empty);
}
#[tokio::test]
async fn test_cache_update_existing_session_does_not_evict() {
let svc = make_orchestrator_service(600, 2);
svc.cache_route("s1".to_string(), None, "model-a".to_string(), None)
.await;
svc.cache_route("s2".to_string(), None, "model-b".to_string(), None)
.await;
svc.cache_route(
"s1".to_string(),
None,
"model-a-updated".to_string(),
Some("route".to_string()),
)
.await;
let s1 = svc.get_cached_route("s1", None).await.unwrap();
assert_eq!(s1.model_name, "model-a-updated");
assert!(svc.get_cached_route("s2", None).await.is_some());
}
}