mirror of
https://github.com/katanemo/plano.git
synced 2026-05-21 13:55:15 +02:00
add configurable orchestrator_model_context_length override (default 8192)
Made-with: Cursor
This commit is contained in:
parent
aff9164c57
commit
fba744d2b1
6 changed files with 16 additions and 2 deletions
|
|
@ -288,6 +288,9 @@ properties:
|
||||||
agent_orchestration_model:
|
agent_orchestration_model:
|
||||||
type: string
|
type: string
|
||||||
description: "Model name for the agent orchestrator (e.g., 'Plano-Orchestrator'). Must match a model in model_providers."
|
description: "Model name for the agent orchestrator (e.g., 'Plano-Orchestrator'). Must match a model in model_providers."
|
||||||
|
orchestrator_model_context_length:
|
||||||
|
type: integer
|
||||||
|
description: "Maximum token length for the orchestrator/routing model context window. Default is 8192."
|
||||||
system_prompt:
|
system_prompt:
|
||||||
type: string
|
type: string
|
||||||
prompt_targets:
|
prompt_targets:
|
||||||
|
|
|
||||||
|
|
@ -177,6 +177,7 @@ mod tests {
|
||||||
"http://localhost:8080".to_string(),
|
"http://localhost:8080".to_string(),
|
||||||
"test-model".to_string(),
|
"test-model".to_string(),
|
||||||
"plano-orchestrator".to_string(),
|
"plano-orchestrator".to_string(),
|
||||||
|
crate::router::orchestrator_model_v1::MAX_TOKEN_LEN,
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -23,6 +23,7 @@ mod tests {
|
||||||
"http://localhost:8080".to_string(),
|
"http://localhost:8080".to_string(),
|
||||||
"test-model".to_string(),
|
"test-model".to_string(),
|
||||||
"plano-orchestrator".to_string(),
|
"plano-orchestrator".to_string(),
|
||||||
|
crate::router::orchestrator_model_v1::MAX_TOKEN_LEN,
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -303,6 +303,10 @@ async fn init_app_state(
|
||||||
.map(|p| p.name.clone())
|
.map(|p| p.name.clone())
|
||||||
.unwrap_or_else(|| DEFAULT_ORCHESTRATOR_LLM_PROVIDER.to_string());
|
.unwrap_or_else(|| DEFAULT_ORCHESTRATOR_LLM_PROVIDER.to_string());
|
||||||
|
|
||||||
|
let orchestrator_max_tokens = overrides
|
||||||
|
.orchestrator_model_context_length
|
||||||
|
.unwrap_or(brightstaff::router::orchestrator_model_v1::MAX_TOKEN_LEN);
|
||||||
|
|
||||||
let orchestrator_service = Arc::new(OrchestratorService::with_routing(
|
let orchestrator_service = Arc::new(OrchestratorService::with_routing(
|
||||||
format!("{llm_provider_url}{CHAT_COMPLETIONS_PATH}"),
|
format!("{llm_provider_url}{CHAT_COMPLETIONS_PATH}"),
|
||||||
orchestrator_model_name,
|
orchestrator_model_name,
|
||||||
|
|
@ -312,6 +316,7 @@ async fn init_app_state(
|
||||||
session_ttl_seconds,
|
session_ttl_seconds,
|
||||||
session_cache,
|
session_cache,
|
||||||
session_tenant_header,
|
session_tenant_header,
|
||||||
|
orchestrator_max_tokens,
|
||||||
));
|
));
|
||||||
|
|
||||||
let state_storage = init_state_storage(config).await?;
|
let state_storage = init_state_storage(config).await?;
|
||||||
|
|
|
||||||
|
|
@ -50,11 +50,12 @@ impl OrchestratorService {
|
||||||
orchestrator_url: String,
|
orchestrator_url: String,
|
||||||
orchestration_model_name: String,
|
orchestration_model_name: String,
|
||||||
orchestrator_provider_name: String,
|
orchestrator_provider_name: String,
|
||||||
|
max_token_length: usize,
|
||||||
) -> Self {
|
) -> Self {
|
||||||
let orchestrator_model = Arc::new(orchestrator_model_v1::OrchestratorModelV1::new(
|
let orchestrator_model = Arc::new(orchestrator_model_v1::OrchestratorModelV1::new(
|
||||||
HashMap::new(),
|
HashMap::new(),
|
||||||
orchestration_model_name,
|
orchestration_model_name,
|
||||||
orchestrator_model_v1::MAX_TOKEN_LEN,
|
max_token_length,
|
||||||
));
|
));
|
||||||
|
|
||||||
OrchestratorService {
|
OrchestratorService {
|
||||||
|
|
@ -80,6 +81,7 @@ impl OrchestratorService {
|
||||||
session_ttl_seconds: Option<u64>,
|
session_ttl_seconds: Option<u64>,
|
||||||
session_cache: Arc<dyn SessionCache>,
|
session_cache: Arc<dyn SessionCache>,
|
||||||
tenant_header: Option<String>,
|
tenant_header: Option<String>,
|
||||||
|
max_token_length: usize,
|
||||||
) -> Self {
|
) -> Self {
|
||||||
let top_level_preferences: HashMap<String, TopLevelRoutingPreference> = top_level_prefs
|
let top_level_preferences: HashMap<String, TopLevelRoutingPreference> = top_level_prefs
|
||||||
.map_or_else(HashMap::new, |prefs| {
|
.map_or_else(HashMap::new, |prefs| {
|
||||||
|
|
@ -89,7 +91,7 @@ impl OrchestratorService {
|
||||||
let orchestrator_model = Arc::new(orchestrator_model_v1::OrchestratorModelV1::new(
|
let orchestrator_model = Arc::new(orchestrator_model_v1::OrchestratorModelV1::new(
|
||||||
HashMap::new(),
|
HashMap::new(),
|
||||||
orchestration_model_name,
|
orchestration_model_name,
|
||||||
orchestrator_model_v1::MAX_TOKEN_LEN,
|
max_token_length,
|
||||||
));
|
));
|
||||||
|
|
||||||
let session_ttl =
|
let session_ttl =
|
||||||
|
|
@ -333,6 +335,7 @@ mod tests {
|
||||||
Some(ttl_seconds),
|
Some(ttl_seconds),
|
||||||
session_cache,
|
session_cache,
|
||||||
None,
|
None,
|
||||||
|
orchestrator_model_v1::MAX_TOKEN_LEN,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -233,6 +233,7 @@ pub struct Overrides {
|
||||||
pub use_agent_orchestrator: Option<bool>,
|
pub use_agent_orchestrator: Option<bool>,
|
||||||
pub llm_routing_model: Option<String>,
|
pub llm_routing_model: Option<String>,
|
||||||
pub agent_orchestration_model: Option<String>,
|
pub agent_orchestration_model: Option<String>,
|
||||||
|
pub orchestrator_model_context_length: Option<usize>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
|
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue