diff --git a/cli/planoai/utils.py b/cli/planoai/utils.py index 171006f1..a4076660 100644 --- a/cli/planoai/utils.py +++ b/cli/planoai/utils.py @@ -92,7 +92,7 @@ def convert_legacy_listeners( "type": "model_listener", "port": 12000, "address": "0.0.0.0", - "timeout": "30s", + "timeout": "300s", "model_providers": model_providers or [], } @@ -101,7 +101,7 @@ def convert_legacy_listeners( "type": "prompt_listener", "port": 10000, "address": "0.0.0.0", - "timeout": "30s", + "timeout": "300s", } # Handle None case diff --git a/config/plano_config_schema.yaml b/config/plano_config_schema.yaml index b63cb824..4fa4c133 100644 --- a/config/plano_config_schema.yaml +++ b/config/plano_config_schema.yaml @@ -265,6 +265,10 @@ properties: type: boolean use_agent_orchestrator: type: boolean + upstream_timeout_ms: + type: integer + minimum: 1000 + description: "Timeout in milliseconds for outbound upstream calls from WASM filters (tool endpoints, function calling, default prompt targets). Default is 300000 (300s)." upstream_connect_timeout: type: string description: "Connect timeout for upstream provider clusters (e.g., '5s', '10s'). Default is '5s'." diff --git a/crates/common/src/configuration.rs b/crates/common/src/configuration.rs index f4e2b7b4..3afcecbf 100644 --- a/crates/common/src/configuration.rs +++ b/crates/common/src/configuration.rs @@ -84,6 +84,7 @@ pub struct Overrides { pub prompt_target_intent_matching_threshold: Option, pub optimize_context_window: Option, pub use_agent_orchestrator: Option, + pub upstream_timeout_ms: Option, } #[derive(Debug, Clone, Serialize, Deserialize, Default)] diff --git a/crates/common/src/consts.rs b/crates/common/src/consts.rs index cafc8e80..81e28e86 100644 --- a/crates/common/src/consts.rs +++ b/crates/common/src/consts.rs @@ -3,10 +3,10 @@ pub const SYSTEM_ROLE: &str = "system"; pub const USER_ROLE: &str = "user"; pub const TOOL_ROLE: &str = "tool"; pub const ASSISTANT_ROLE: &str = "assistant"; -pub const ARCH_FC_REQUEST_TIMEOUT_MS: u64 = 30000; // 30 seconds -pub const DEFAULT_TARGET_REQUEST_TIMEOUT_MS: u64 = 30000; // 30 seconds -pub const API_REQUEST_TIMEOUT_MS: u64 = 30000; // 30 seconds -pub const MODEL_SERVER_REQUEST_TIMEOUT_MS: u64 = 30000; // 30 seconds +pub const ARCH_FC_REQUEST_TIMEOUT_MS: u64 = 300_000; // 300 seconds +pub const DEFAULT_TARGET_REQUEST_TIMEOUT_MS: u64 = 300_000; // 300 seconds +pub const API_REQUEST_TIMEOUT_MS: u64 = 300_000; // 300 seconds +pub const MODEL_SERVER_REQUEST_TIMEOUT_MS: u64 = 300_000; // 300 seconds pub const MODEL_SERVER_NAME: &str = "bright_staff"; pub const ARCH_ROUTING_HEADER: &str = "x-arch-llm-provider"; pub const MESSAGES_KEY: &str = "messages"; diff --git a/crates/prompt_gateway/src/http_context.rs b/crates/prompt_gateway/src/http_context.rs index e3d00b3f..e33b2a4c 100644 --- a/crates/prompt_gateway/src/http_context.rs +++ b/crates/prompt_gateway/src/http_context.rs @@ -205,7 +205,12 @@ impl HttpContext for StreamContext { info!("on_http_request_body: sending request to model server"); debug!("request body: {}", json_data); - let timeout_str = MODEL_SERVER_REQUEST_TIMEOUT_MS.to_string(); + let timeout_ms = if let Some(overrides) = self.overrides.as_ref() { + overrides.upstream_timeout_ms.unwrap_or(MODEL_SERVER_REQUEST_TIMEOUT_MS) + } else { + MODEL_SERVER_REQUEST_TIMEOUT_MS + }; + let timeout_str = timeout_ms.to_string(); let mut headers = vec![ (ARCH_UPSTREAM_HOST_HEADER, MODEL_SERVER_NAME), @@ -230,7 +235,7 @@ impl HttpContext for StreamContext { headers, Some(json_data.as_bytes()), vec![], - Duration::from_secs(5), + Duration::from_millis(timeout_ms), ); if let Some(content) = self.user_prompt.as_ref().unwrap().content.as_ref() { diff --git a/crates/prompt_gateway/src/stream_context.rs b/crates/prompt_gateway/src/stream_context.rs index 8ff44d52..173fcaf5 100644 --- a/crates/prompt_gateway/src/stream_context.rs +++ b/crates/prompt_gateway/src/stream_context.rs @@ -171,7 +171,14 @@ impl StreamContext { callout_context.request_body.messages.clone(), ); let arch_messages_json = serde_json::to_string(¶ms).unwrap(); - let timeout_str = DEFAULT_TARGET_REQUEST_TIMEOUT_MS.to_string(); + let timeout_ms = if let Some(overrides) = self.overrides.as_ref() { + overrides + .upstream_timeout_ms + .unwrap_or(DEFAULT_TARGET_REQUEST_TIMEOUT_MS) + } else { + DEFAULT_TARGET_REQUEST_TIMEOUT_MS + }; + let timeout_str = timeout_ms.to_string(); let mut headers = vec![ (":method", "POST"), @@ -193,7 +200,7 @@ impl StreamContext { headers, Some(arch_messages_json.as_bytes()), vec![], - Duration::from_secs(5), + Duration::from_millis(timeout_ms), ); callout_context.response_handler_type = ResponseHandlerType::DefaultTarget; callout_context.prompt_target_name = Some(default_prompt_target.name.clone()); @@ -422,7 +429,12 @@ impl StreamContext { debug!("on_http_call_response: api call body {:?}", api_call_body); - let timeout_str = API_REQUEST_TIMEOUT_MS.to_string(); + let timeout_ms = if let Some(overrides) = self.overrides.as_ref() { + overrides.upstream_timeout_ms.unwrap_or(API_REQUEST_TIMEOUT_MS) + } else { + API_REQUEST_TIMEOUT_MS + }; + let timeout_str = timeout_ms.to_string(); let http_method_str = http_method.to_string(); let mut headers: HashMap<_, _> = [ @@ -457,7 +469,7 @@ impl StreamContext { headers.into_iter().collect(), api_call_body.as_deref().map(|s| s.as_bytes()), vec![], - Duration::from_secs(5), + Duration::from_millis(timeout_ms), ); info!(