mirror of
https://github.com/katanemo/plano.git
synced 2026-04-25 00:36:34 +02:00
add overrides.disable_signals to skip CPU-heavy signal analysis (#906)
This commit is contained in:
parent
22f332f62d
commit
6701195a5d
7 changed files with 46 additions and 1 deletions
|
|
@ -278,6 +278,9 @@ properties:
|
||||||
type: boolean
|
type: boolean
|
||||||
use_agent_orchestrator:
|
use_agent_orchestrator:
|
||||||
type: boolean
|
type: boolean
|
||||||
|
disable_signals:
|
||||||
|
type: boolean
|
||||||
|
description: "Disable agentic signal analysis (frustration, repetition, escalation, etc.) on LLM responses to save CPU. Default false."
|
||||||
upstream_connect_timeout:
|
upstream_connect_timeout:
|
||||||
type: string
|
type: string
|
||||||
description: "Connect timeout for upstream provider clusters (e.g., '5s', '10s'). Default is '5s'."
|
description: "Connect timeout for upstream provider clusters (e.g., '5s', '10s'). Default is '5s'."
|
||||||
|
|
|
||||||
|
|
@ -24,4 +24,7 @@ pub struct AppState {
|
||||||
/// Shared HTTP client for upstream LLM requests (connection pooling / keep-alive).
|
/// Shared HTTP client for upstream LLM requests (connection pooling / keep-alive).
|
||||||
pub http_client: reqwest::Client,
|
pub http_client: reqwest::Client,
|
||||||
pub filter_pipeline: Arc<FilterPipeline>,
|
pub filter_pipeline: Arc<FilterPipeline>,
|
||||||
|
/// When false, agentic signal analysis is skipped on LLM responses to save CPU.
|
||||||
|
/// Controlled by `overrides.disable_signals` in plano config.
|
||||||
|
pub signals_enabled: bool,
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -143,6 +143,7 @@ async fn llm_chat_inner(
|
||||||
&request_path,
|
&request_path,
|
||||||
&state.model_aliases,
|
&state.model_aliases,
|
||||||
&state.llm_providers,
|
&state.llm_providers,
|
||||||
|
state.signals_enabled,
|
||||||
)
|
)
|
||||||
.await
|
.await
|
||||||
{
|
{
|
||||||
|
|
@ -408,6 +409,7 @@ async fn parse_and_validate_request(
|
||||||
request_path: &str,
|
request_path: &str,
|
||||||
model_aliases: &Option<HashMap<String, ModelAlias>>,
|
model_aliases: &Option<HashMap<String, ModelAlias>>,
|
||||||
llm_providers: &Arc<RwLock<LlmProviders>>,
|
llm_providers: &Arc<RwLock<LlmProviders>>,
|
||||||
|
signals_enabled: bool,
|
||||||
) -> Result<PreparedRequest, Response<BoxBody<Bytes, hyper::Error>>> {
|
) -> Result<PreparedRequest, Response<BoxBody<Bytes, hyper::Error>>> {
|
||||||
let raw_bytes = request
|
let raw_bytes = request
|
||||||
.collect()
|
.collect()
|
||||||
|
|
@ -486,7 +488,11 @@ async fn parse_and_validate_request(
|
||||||
let user_message_preview = client_request
|
let user_message_preview = client_request
|
||||||
.get_recent_user_message()
|
.get_recent_user_message()
|
||||||
.map(|msg| truncate_message(&msg, 50));
|
.map(|msg| truncate_message(&msg, 50));
|
||||||
let messages_for_signals = Some(client_request.get_messages());
|
let messages_for_signals = if signals_enabled {
|
||||||
|
Some(client_request.get_messages())
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
|
||||||
// Set the upstream model name and strip routing metadata
|
// Set the upstream model name and strip routing metadata
|
||||||
client_request.set_model(model_name_only.clone());
|
client_request.set_model(model_name_only.clone());
|
||||||
|
|
|
||||||
|
|
@ -328,6 +328,8 @@ async fn init_app_state(
|
||||||
.as_ref()
|
.as_ref()
|
||||||
.and_then(|tracing| tracing.span_attributes.clone());
|
.and_then(|tracing| tracing.span_attributes.clone());
|
||||||
|
|
||||||
|
let signals_enabled = !overrides.disable_signals.unwrap_or(false);
|
||||||
|
|
||||||
Ok(AppState {
|
Ok(AppState {
|
||||||
orchestrator_service,
|
orchestrator_service,
|
||||||
model_aliases: config.model_aliases.clone(),
|
model_aliases: config.model_aliases.clone(),
|
||||||
|
|
@ -339,6 +341,7 @@ async fn init_app_state(
|
||||||
span_attributes,
|
span_attributes,
|
||||||
http_client: reqwest::Client::new(),
|
http_client: reqwest::Client::new(),
|
||||||
filter_pipeline,
|
filter_pipeline,
|
||||||
|
signals_enabled,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -234,6 +234,7 @@ pub struct Overrides {
|
||||||
pub llm_routing_model: Option<String>,
|
pub llm_routing_model: Option<String>,
|
||||||
pub agent_orchestration_model: Option<String>,
|
pub agent_orchestration_model: Option<String>,
|
||||||
pub orchestrator_model_context_length: Option<usize>,
|
pub orchestrator_model_context_length: Option<usize>,
|
||||||
|
pub disable_signals: Option<bool>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
|
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
|
||||||
|
|
@ -750,4 +751,29 @@ mod test {
|
||||||
assert!(model_ids.contains(&"openai-gpt4".to_string()));
|
assert!(model_ids.contains(&"openai-gpt4".to_string()));
|
||||||
assert!(!model_ids.contains(&"plano-orchestrator".to_string()));
|
assert!(!model_ids.contains(&"plano-orchestrator".to_string()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_overrides_disable_signals_default_none() {
|
||||||
|
let overrides = super::Overrides::default();
|
||||||
|
assert_eq!(overrides.disable_signals, None);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_overrides_disable_signals_deserialize() {
|
||||||
|
let yaml = r#"
|
||||||
|
disable_signals: true
|
||||||
|
"#;
|
||||||
|
let overrides: super::Overrides = serde_yaml::from_str(yaml).unwrap();
|
||||||
|
assert_eq!(overrides.disable_signals, Some(true));
|
||||||
|
|
||||||
|
let yaml_false = r#"
|
||||||
|
disable_signals: false
|
||||||
|
"#;
|
||||||
|
let overrides: super::Overrides = serde_yaml::from_str(yaml_false).unwrap();
|
||||||
|
assert_eq!(overrides.disable_signals, Some(false));
|
||||||
|
|
||||||
|
let yaml_missing = "{}";
|
||||||
|
let overrides: super::Overrides = serde_yaml::from_str(yaml_missing).unwrap();
|
||||||
|
assert_eq!(overrides.disable_signals, None);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -173,6 +173,9 @@ overrides:
|
||||||
llm_routing_model: Plano-Orchestrator
|
llm_routing_model: Plano-Orchestrator
|
||||||
# Model used for agent orchestration (must be listed in model_providers)
|
# Model used for agent orchestration (must be listed in model_providers)
|
||||||
agent_orchestration_model: Plano-Orchestrator
|
agent_orchestration_model: Plano-Orchestrator
|
||||||
|
# Disable agentic signal analysis (frustration, repetition, escalation, etc.)
|
||||||
|
# on LLM responses to save CPU. Default: false.
|
||||||
|
disable_signals: false
|
||||||
|
|
||||||
# Model affinity — pin routing decisions for agentic loops
|
# Model affinity — pin routing decisions for agentic loops
|
||||||
routing:
|
routing:
|
||||||
|
|
|
||||||
|
|
@ -170,6 +170,7 @@ model_providers:
|
||||||
provider_interface: plano
|
provider_interface: plano
|
||||||
overrides:
|
overrides:
|
||||||
agent_orchestration_model: Plano-Orchestrator
|
agent_orchestration_model: Plano-Orchestrator
|
||||||
|
disable_signals: false
|
||||||
llm_routing_model: Plano-Orchestrator
|
llm_routing_model: Plano-Orchestrator
|
||||||
optimize_context_window: true
|
optimize_context_window: true
|
||||||
prompt_target_intent_matching_threshold: 0.7
|
prompt_target_intent_matching_threshold: 0.7
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue