mirror of
https://github.com/katanemo/plano.git
synced 2026-05-15 11:02:39 +02:00
add configurable orchestrator_model_context_length override (default 8192)
Made-with: Cursor
This commit is contained in:
parent
aff9164c57
commit
fba744d2b1
6 changed files with 16 additions and 2 deletions
|
|
@ -288,6 +288,9 @@ properties:
|
|||
agent_orchestration_model:
|
||||
type: string
|
||||
description: "Model name for the agent orchestrator (e.g., 'Plano-Orchestrator'). Must match a model in model_providers."
|
||||
orchestrator_model_context_length:
|
||||
type: integer
|
||||
description: "Maximum token length for the orchestrator/routing model context window. Default is 8192."
|
||||
system_prompt:
|
||||
type: string
|
||||
prompt_targets:
|
||||
|
|
|
|||
|
|
@ -177,6 +177,7 @@ mod tests {
|
|||
"http://localhost:8080".to_string(),
|
||||
"test-model".to_string(),
|
||||
"plano-orchestrator".to_string(),
|
||||
crate::router::orchestrator_model_v1::MAX_TOKEN_LEN,
|
||||
))
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -23,6 +23,7 @@ mod tests {
|
|||
"http://localhost:8080".to_string(),
|
||||
"test-model".to_string(),
|
||||
"plano-orchestrator".to_string(),
|
||||
crate::router::orchestrator_model_v1::MAX_TOKEN_LEN,
|
||||
))
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -303,6 +303,10 @@ async fn init_app_state(
|
|||
.map(|p| p.name.clone())
|
||||
.unwrap_or_else(|| DEFAULT_ORCHESTRATOR_LLM_PROVIDER.to_string());
|
||||
|
||||
let orchestrator_max_tokens = overrides
|
||||
.orchestrator_model_context_length
|
||||
.unwrap_or(brightstaff::router::orchestrator_model_v1::MAX_TOKEN_LEN);
|
||||
|
||||
let orchestrator_service = Arc::new(OrchestratorService::with_routing(
|
||||
format!("{llm_provider_url}{CHAT_COMPLETIONS_PATH}"),
|
||||
orchestrator_model_name,
|
||||
|
|
@ -312,6 +316,7 @@ async fn init_app_state(
|
|||
session_ttl_seconds,
|
||||
session_cache,
|
||||
session_tenant_header,
|
||||
orchestrator_max_tokens,
|
||||
));
|
||||
|
||||
let state_storage = init_state_storage(config).await?;
|
||||
|
|
|
|||
|
|
@ -50,11 +50,12 @@ impl OrchestratorService {
|
|||
orchestrator_url: String,
|
||||
orchestration_model_name: String,
|
||||
orchestrator_provider_name: String,
|
||||
max_token_length: usize,
|
||||
) -> Self {
|
||||
let orchestrator_model = Arc::new(orchestrator_model_v1::OrchestratorModelV1::new(
|
||||
HashMap::new(),
|
||||
orchestration_model_name,
|
||||
orchestrator_model_v1::MAX_TOKEN_LEN,
|
||||
max_token_length,
|
||||
));
|
||||
|
||||
OrchestratorService {
|
||||
|
|
@ -80,6 +81,7 @@ impl OrchestratorService {
|
|||
session_ttl_seconds: Option<u64>,
|
||||
session_cache: Arc<dyn SessionCache>,
|
||||
tenant_header: Option<String>,
|
||||
max_token_length: usize,
|
||||
) -> Self {
|
||||
let top_level_preferences: HashMap<String, TopLevelRoutingPreference> = top_level_prefs
|
||||
.map_or_else(HashMap::new, |prefs| {
|
||||
|
|
@ -89,7 +91,7 @@ impl OrchestratorService {
|
|||
let orchestrator_model = Arc::new(orchestrator_model_v1::OrchestratorModelV1::new(
|
||||
HashMap::new(),
|
||||
orchestration_model_name,
|
||||
orchestrator_model_v1::MAX_TOKEN_LEN,
|
||||
max_token_length,
|
||||
));
|
||||
|
||||
let session_ttl =
|
||||
|
|
@ -333,6 +335,7 @@ mod tests {
|
|||
Some(ttl_seconds),
|
||||
session_cache,
|
||||
None,
|
||||
orchestrator_model_v1::MAX_TOKEN_LEN,
|
||||
)
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -233,6 +233,7 @@ pub struct Overrides {
|
|||
pub use_agent_orchestrator: Option<bool>,
|
||||
pub llm_routing_model: Option<String>,
|
||||
pub agent_orchestration_model: Option<String>,
|
||||
pub orchestrator_model_context_length: Option<usize>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue