mirror of
https://github.com/katanemo/plano.git
synced 2026-04-24 16:26:34 +02:00
add overrides.disable_signals to skip CPU-heavy signal analysis (#906)
This commit is contained in:
parent
22f332f62d
commit
6701195a5d
7 changed files with 46 additions and 1 deletions
|
|
@ -278,6 +278,9 @@ properties:
|
|||
type: boolean
|
||||
use_agent_orchestrator:
|
||||
type: boolean
|
||||
disable_signals:
|
||||
type: boolean
|
||||
description: "Disable agentic signal analysis (frustration, repetition, escalation, etc.) on LLM responses to save CPU. Default false."
|
||||
upstream_connect_timeout:
|
||||
type: string
|
||||
description: "Connect timeout for upstream provider clusters (e.g., '5s', '10s'). Default is '5s'."
|
||||
|
|
|
|||
|
|
@ -24,4 +24,7 @@ pub struct AppState {
|
|||
/// Shared HTTP client for upstream LLM requests (connection pooling / keep-alive).
|
||||
pub http_client: reqwest::Client,
|
||||
pub filter_pipeline: Arc<FilterPipeline>,
|
||||
/// When false, agentic signal analysis is skipped on LLM responses to save CPU.
|
||||
/// Controlled by `overrides.disable_signals` in plano config.
|
||||
pub signals_enabled: bool,
|
||||
}
|
||||
|
|
|
|||
|
|
@ -143,6 +143,7 @@ async fn llm_chat_inner(
|
|||
&request_path,
|
||||
&state.model_aliases,
|
||||
&state.llm_providers,
|
||||
state.signals_enabled,
|
||||
)
|
||||
.await
|
||||
{
|
||||
|
|
@ -408,6 +409,7 @@ async fn parse_and_validate_request(
|
|||
request_path: &str,
|
||||
model_aliases: &Option<HashMap<String, ModelAlias>>,
|
||||
llm_providers: &Arc<RwLock<LlmProviders>>,
|
||||
signals_enabled: bool,
|
||||
) -> Result<PreparedRequest, Response<BoxBody<Bytes, hyper::Error>>> {
|
||||
let raw_bytes = request
|
||||
.collect()
|
||||
|
|
@ -486,7 +488,11 @@ async fn parse_and_validate_request(
|
|||
let user_message_preview = client_request
|
||||
.get_recent_user_message()
|
||||
.map(|msg| truncate_message(&msg, 50));
|
||||
let messages_for_signals = Some(client_request.get_messages());
|
||||
let messages_for_signals = if signals_enabled {
|
||||
Some(client_request.get_messages())
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
// Set the upstream model name and strip routing metadata
|
||||
client_request.set_model(model_name_only.clone());
|
||||
|
|
|
|||
|
|
@ -328,6 +328,8 @@ async fn init_app_state(
|
|||
.as_ref()
|
||||
.and_then(|tracing| tracing.span_attributes.clone());
|
||||
|
||||
let signals_enabled = !overrides.disable_signals.unwrap_or(false);
|
||||
|
||||
Ok(AppState {
|
||||
orchestrator_service,
|
||||
model_aliases: config.model_aliases.clone(),
|
||||
|
|
@ -339,6 +341,7 @@ async fn init_app_state(
|
|||
span_attributes,
|
||||
http_client: reqwest::Client::new(),
|
||||
filter_pipeline,
|
||||
signals_enabled,
|
||||
})
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -234,6 +234,7 @@ pub struct Overrides {
|
|||
pub llm_routing_model: Option<String>,
|
||||
pub agent_orchestration_model: Option<String>,
|
||||
pub orchestrator_model_context_length: Option<usize>,
|
||||
pub disable_signals: Option<bool>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
|
||||
|
|
@ -750,4 +751,29 @@ mod test {
|
|||
assert!(model_ids.contains(&"openai-gpt4".to_string()));
|
||||
assert!(!model_ids.contains(&"plano-orchestrator".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_overrides_disable_signals_default_none() {
|
||||
let overrides = super::Overrides::default();
|
||||
assert_eq!(overrides.disable_signals, None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_overrides_disable_signals_deserialize() {
|
||||
let yaml = r#"
|
||||
disable_signals: true
|
||||
"#;
|
||||
let overrides: super::Overrides = serde_yaml::from_str(yaml).unwrap();
|
||||
assert_eq!(overrides.disable_signals, Some(true));
|
||||
|
||||
let yaml_false = r#"
|
||||
disable_signals: false
|
||||
"#;
|
||||
let overrides: super::Overrides = serde_yaml::from_str(yaml_false).unwrap();
|
||||
assert_eq!(overrides.disable_signals, Some(false));
|
||||
|
||||
let yaml_missing = "{}";
|
||||
let overrides: super::Overrides = serde_yaml::from_str(yaml_missing).unwrap();
|
||||
assert_eq!(overrides.disable_signals, None);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -173,6 +173,9 @@ overrides:
|
|||
llm_routing_model: Plano-Orchestrator
|
||||
# Model used for agent orchestration (must be listed in model_providers)
|
||||
agent_orchestration_model: Plano-Orchestrator
|
||||
# Disable agentic signal analysis (frustration, repetition, escalation, etc.)
|
||||
# on LLM responses to save CPU. Default: false.
|
||||
disable_signals: false
|
||||
|
||||
# Model affinity — pin routing decisions for agentic loops
|
||||
routing:
|
||||
|
|
|
|||
|
|
@ -170,6 +170,7 @@ model_providers:
|
|||
provider_interface: plano
|
||||
overrides:
|
||||
agent_orchestration_model: Plano-Orchestrator
|
||||
disable_signals: false
|
||||
llm_routing_model: Plano-Orchestrator
|
||||
optimize_context_window: true
|
||||
prompt_target_intent_matching_threshold: 0.7
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue