diff --git a/config/plano_config_schema.yaml b/config/plano_config_schema.yaml index a82449d8..d3d6a643 100644 --- a/config/plano_config_schema.yaml +++ b/config/plano_config_schema.yaml @@ -288,6 +288,9 @@ properties: agent_orchestration_model: type: string description: "Model name for the agent orchestrator (e.g., 'Plano-Orchestrator'). Must match a model in model_providers." + orchestrator_model_context_length: + type: integer + description: "Maximum token length for the orchestrator/routing model context window. Default is 8192." system_prompt: type: string prompt_targets: diff --git a/crates/brightstaff/src/handlers/agents/selector.rs b/crates/brightstaff/src/handlers/agents/selector.rs index 8225a003..e0467163 100644 --- a/crates/brightstaff/src/handlers/agents/selector.rs +++ b/crates/brightstaff/src/handlers/agents/selector.rs @@ -177,6 +177,7 @@ mod tests { "http://localhost:8080".to_string(), "test-model".to_string(), "plano-orchestrator".to_string(), + crate::router::orchestrator_model_v1::MAX_TOKEN_LEN, )) } diff --git a/crates/brightstaff/src/handlers/integration_tests.rs b/crates/brightstaff/src/handlers/integration_tests.rs index b4166baa..c826dc50 100644 --- a/crates/brightstaff/src/handlers/integration_tests.rs +++ b/crates/brightstaff/src/handlers/integration_tests.rs @@ -23,6 +23,7 @@ mod tests { "http://localhost:8080".to_string(), "test-model".to_string(), "plano-orchestrator".to_string(), + crate::router::orchestrator_model_v1::MAX_TOKEN_LEN, )) } diff --git a/crates/brightstaff/src/main.rs b/crates/brightstaff/src/main.rs index aa910d5f..40ac429d 100644 --- a/crates/brightstaff/src/main.rs +++ b/crates/brightstaff/src/main.rs @@ -303,6 +303,10 @@ async fn init_app_state( .map(|p| p.name.clone()) .unwrap_or_else(|| DEFAULT_ORCHESTRATOR_LLM_PROVIDER.to_string()); + let orchestrator_max_tokens = overrides + .orchestrator_model_context_length + .unwrap_or(brightstaff::router::orchestrator_model_v1::MAX_TOKEN_LEN); + let orchestrator_service = Arc::new(OrchestratorService::with_routing( format!("{llm_provider_url}{CHAT_COMPLETIONS_PATH}"), orchestrator_model_name, @@ -312,6 +316,7 @@ async fn init_app_state( session_ttl_seconds, session_cache, session_tenant_header, + orchestrator_max_tokens, )); let state_storage = init_state_storage(config).await?; diff --git a/crates/brightstaff/src/router/orchestrator.rs b/crates/brightstaff/src/router/orchestrator.rs index 9fe8515f..7aaf70a2 100644 --- a/crates/brightstaff/src/router/orchestrator.rs +++ b/crates/brightstaff/src/router/orchestrator.rs @@ -50,11 +50,12 @@ impl OrchestratorService { orchestrator_url: String, orchestration_model_name: String, orchestrator_provider_name: String, + max_token_length: usize, ) -> Self { let orchestrator_model = Arc::new(orchestrator_model_v1::OrchestratorModelV1::new( HashMap::new(), orchestration_model_name, - orchestrator_model_v1::MAX_TOKEN_LEN, + max_token_length, )); OrchestratorService { @@ -80,6 +81,7 @@ impl OrchestratorService { session_ttl_seconds: Option, session_cache: Arc, tenant_header: Option, + max_token_length: usize, ) -> Self { let top_level_preferences: HashMap = top_level_prefs .map_or_else(HashMap::new, |prefs| { @@ -89,7 +91,7 @@ impl OrchestratorService { let orchestrator_model = Arc::new(orchestrator_model_v1::OrchestratorModelV1::new( HashMap::new(), orchestration_model_name, - orchestrator_model_v1::MAX_TOKEN_LEN, + max_token_length, )); let session_ttl = @@ -333,6 +335,7 @@ mod tests { Some(ttl_seconds), session_cache, None, + orchestrator_model_v1::MAX_TOKEN_LEN, ) } diff --git a/crates/common/src/configuration.rs b/crates/common/src/configuration.rs index 7652af65..125a986d 100644 --- a/crates/common/src/configuration.rs +++ b/crates/common/src/configuration.rs @@ -233,6 +233,7 @@ pub struct Overrides { pub use_agent_orchestrator: Option, pub llm_routing_model: Option, pub agent_orchestration_model: Option, + pub orchestrator_model_context_length: Option, } #[derive(Debug, Clone, Serialize, Deserialize, Default)]