add configurable orchestrator_model_context_length override (default 8192)

Made-with: Cursor
This commit is contained in:
Adil Hafeez 2026-04-15 15:48:59 -07:00
parent aff9164c57
commit fba744d2b1
6 changed files with 16 additions and 2 deletions

View file

@ -288,6 +288,9 @@ properties:
agent_orchestration_model:
type: string
description: "Model name for the agent orchestrator (e.g., 'Plano-Orchestrator'). Must match a model in model_providers."
orchestrator_model_context_length:
type: integer
description: "Maximum token length for the orchestrator/routing model context window. Default is 8192."
system_prompt:
type: string
prompt_targets:

View file

@ -177,6 +177,7 @@ mod tests {
"http://localhost:8080".to_string(),
"test-model".to_string(),
"plano-orchestrator".to_string(),
crate::router::orchestrator_model_v1::MAX_TOKEN_LEN,
))
}

View file

@ -23,6 +23,7 @@ mod tests {
"http://localhost:8080".to_string(),
"test-model".to_string(),
"plano-orchestrator".to_string(),
crate::router::orchestrator_model_v1::MAX_TOKEN_LEN,
))
}

View file

@ -303,6 +303,10 @@ async fn init_app_state(
.map(|p| p.name.clone())
.unwrap_or_else(|| DEFAULT_ORCHESTRATOR_LLM_PROVIDER.to_string());
let orchestrator_max_tokens = overrides
.orchestrator_model_context_length
.unwrap_or(brightstaff::router::orchestrator_model_v1::MAX_TOKEN_LEN);
let orchestrator_service = Arc::new(OrchestratorService::with_routing(
format!("{llm_provider_url}{CHAT_COMPLETIONS_PATH}"),
orchestrator_model_name,
@ -312,6 +316,7 @@ async fn init_app_state(
session_ttl_seconds,
session_cache,
session_tenant_header,
orchestrator_max_tokens,
));
let state_storage = init_state_storage(config).await?;

View file

@ -50,11 +50,12 @@ impl OrchestratorService {
orchestrator_url: String,
orchestration_model_name: String,
orchestrator_provider_name: String,
max_token_length: usize,
) -> Self {
let orchestrator_model = Arc::new(orchestrator_model_v1::OrchestratorModelV1::new(
HashMap::new(),
orchestration_model_name,
orchestrator_model_v1::MAX_TOKEN_LEN,
max_token_length,
));
OrchestratorService {
@ -80,6 +81,7 @@ impl OrchestratorService {
session_ttl_seconds: Option<u64>,
session_cache: Arc<dyn SessionCache>,
tenant_header: Option<String>,
max_token_length: usize,
) -> Self {
let top_level_preferences: HashMap<String, TopLevelRoutingPreference> = top_level_prefs
.map_or_else(HashMap::new, |prefs| {
@ -89,7 +91,7 @@ impl OrchestratorService {
let orchestrator_model = Arc::new(orchestrator_model_v1::OrchestratorModelV1::new(
HashMap::new(),
orchestration_model_name,
orchestrator_model_v1::MAX_TOKEN_LEN,
max_token_length,
));
let session_ttl =
@ -333,6 +335,7 @@ mod tests {
Some(ttl_seconds),
session_cache,
None,
orchestrator_model_v1::MAX_TOKEN_LEN,
)
}

View file

@ -233,6 +233,7 @@ pub struct Overrides {
pub use_agent_orchestrator: Option<bool>,
pub llm_routing_model: Option<String>,
pub agent_orchestration_model: Option<String>,
pub orchestrator_model_context_length: Option<usize>,
}
#[derive(Debug, Clone, Serialize, Deserialize, Default)]