mirror of
https://github.com/katanemo/plano.git
synced 2026-06-23 15:38:07 +02:00
add request timeout
This commit is contained in:
parent
2fe91932d3
commit
d76ced3aff
3 changed files with 20 additions and 7 deletions
|
|
@ -4,6 +4,9 @@ pub const USER_ROLE: &str = "user";
|
||||||
pub const TOOL_ROLE: &str = "tool";
|
pub const TOOL_ROLE: &str = "tool";
|
||||||
pub const ASSISTANT_ROLE: &str = "assistant";
|
pub const ASSISTANT_ROLE: &str = "assistant";
|
||||||
pub const ARCH_FC_REQUEST_TIMEOUT_MS: u64 = 120000; // 2 minutes
|
pub const ARCH_FC_REQUEST_TIMEOUT_MS: u64 = 120000; // 2 minutes
|
||||||
|
pub const DEFAULT_TARGET_REQUEST_TIMEOUT_MS: u64 = 120000; // 2 minutes
|
||||||
|
pub const API_REQUEST_TIMEOUT_MS: u64 = 120000; // 2 minutes
|
||||||
|
pub const MODEL_SERVER_REQUEST_TIMEOUT_MS: u64 = 120000; // 2 minutes
|
||||||
pub const MODEL_SERVER_NAME: &str = "model_server";
|
pub const MODEL_SERVER_NAME: &str = "model_server";
|
||||||
pub const ARCH_ROUTING_HEADER: &str = "x-arch-llm-provider";
|
pub const ARCH_ROUTING_HEADER: &str = "x-arch-llm-provider";
|
||||||
pub const MESSAGES_KEY: &str = "messages";
|
pub const MESSAGES_KEY: &str = "messages";
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,8 @@ use common::{
|
||||||
consts::{
|
consts::{
|
||||||
ARCH_FC_MODEL_NAME, ARCH_INTERNAL_CLUSTER_NAME, ARCH_STATE_HEADER,
|
ARCH_FC_MODEL_NAME, ARCH_INTERNAL_CLUSTER_NAME, ARCH_STATE_HEADER,
|
||||||
ARCH_UPSTREAM_HOST_HEADER, ASSISTANT_ROLE, CHAT_COMPLETIONS_PATH, HEALTHZ_PATH,
|
ARCH_UPSTREAM_HOST_HEADER, ASSISTANT_ROLE, CHAT_COMPLETIONS_PATH, HEALTHZ_PATH,
|
||||||
MODEL_SERVER_NAME, REQUEST_ID_HEADER, TOOL_ROLE, TRACE_PARENT_HEADER, USER_ROLE,
|
MODEL_SERVER_NAME, MODEL_SERVER_REQUEST_TIMEOUT_MS, REQUEST_ID_HEADER, TOOL_ROLE,
|
||||||
|
TRACE_PARENT_HEADER, USER_ROLE,
|
||||||
},
|
},
|
||||||
errors::ServerError,
|
errors::ServerError,
|
||||||
http::{CallArgs, Client},
|
http::{CallArgs, Client},
|
||||||
|
|
@ -144,7 +145,10 @@ impl HttpContext for StreamContext {
|
||||||
if metadata.is_none() {
|
if metadata.is_none() {
|
||||||
metadata = Some(HashMap::new());
|
metadata = Some(HashMap::new());
|
||||||
}
|
}
|
||||||
metadata.as_mut().unwrap().insert("optimize_context_window".to_string(), "true".to_string());
|
metadata
|
||||||
|
.as_mut()
|
||||||
|
.unwrap()
|
||||||
|
.insert("optimize_context_window".to_string(), "true".to_string());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -170,12 +174,15 @@ impl HttpContext for StreamContext {
|
||||||
debug!("sending request to model server");
|
debug!("sending request to model server");
|
||||||
trace!("request body: {}", json_data);
|
trace!("request body: {}", json_data);
|
||||||
|
|
||||||
|
let timeout_str = MODEL_SERVER_REQUEST_TIMEOUT_MS.to_string();
|
||||||
|
|
||||||
let mut headers = vec![
|
let mut headers = vec![
|
||||||
(ARCH_UPSTREAM_HOST_HEADER, MODEL_SERVER_NAME),
|
(ARCH_UPSTREAM_HOST_HEADER, MODEL_SERVER_NAME),
|
||||||
(":method", "POST"),
|
(":method", "POST"),
|
||||||
(":path", "/function_calling"),
|
(":path", "/function_calling"),
|
||||||
("content-type", "application/json"),
|
("content-type", "application/json"),
|
||||||
(":authority", MODEL_SERVER_NAME),
|
(":authority", MODEL_SERVER_NAME),
|
||||||
|
("x-envoy-upstream-rq-timeout-ms", timeout_str.as_str()),
|
||||||
];
|
];
|
||||||
|
|
||||||
if self.request_id.is_some() {
|
if self.request_id.is_some() {
|
||||||
|
|
|
||||||
|
|
@ -6,9 +6,9 @@ use common::api::open_ai::{
|
||||||
};
|
};
|
||||||
use common::configuration::{Overrides, PromptTarget, Tracing};
|
use common::configuration::{Overrides, PromptTarget, Tracing};
|
||||||
use common::consts::{
|
use common::consts::{
|
||||||
ARCH_FC_MODEL_NAME, ARCH_FC_REQUEST_TIMEOUT_MS, ARCH_INTERNAL_CLUSTER_NAME,
|
API_REQUEST_TIMEOUT_MS, ARCH_FC_MODEL_NAME, ARCH_INTERNAL_CLUSTER_NAME,
|
||||||
ARCH_UPSTREAM_HOST_HEADER, ASSISTANT_ROLE, MESSAGES_KEY, REQUEST_ID_HEADER, SYSTEM_ROLE,
|
ARCH_UPSTREAM_HOST_HEADER, ASSISTANT_ROLE, DEFAULT_TARGET_REQUEST_TIMEOUT_MS, MESSAGES_KEY,
|
||||||
TOOL_ROLE, TRACE_PARENT_HEADER, USER_ROLE,
|
REQUEST_ID_HEADER, SYSTEM_ROLE, TOOL_ROLE, TRACE_PARENT_HEADER, USER_ROLE,
|
||||||
};
|
};
|
||||||
use common::errors::ServerError;
|
use common::errors::ServerError;
|
||||||
use common::http::{CallArgs, Client};
|
use common::http::{CallArgs, Client};
|
||||||
|
|
@ -89,7 +89,7 @@ impl StreamContext {
|
||||||
streaming_response: false,
|
streaming_response: false,
|
||||||
user_prompt: None,
|
user_prompt: None,
|
||||||
is_chat_completions_request: false,
|
is_chat_completions_request: false,
|
||||||
overrides: overrides,
|
overrides,
|
||||||
request_id: None,
|
request_id: None,
|
||||||
traceparent: None,
|
traceparent: None,
|
||||||
_tracing: tracing,
|
_tracing: tracing,
|
||||||
|
|
@ -160,7 +160,7 @@ impl StreamContext {
|
||||||
callout_context.request_body.messages.clone(),
|
callout_context.request_body.messages.clone(),
|
||||||
);
|
);
|
||||||
let arch_messages_json = serde_json::to_string(¶ms).unwrap();
|
let arch_messages_json = serde_json::to_string(¶ms).unwrap();
|
||||||
let timeout_str = ARCH_FC_REQUEST_TIMEOUT_MS.to_string();
|
let timeout_str = DEFAULT_TARGET_REQUEST_TIMEOUT_MS.to_string();
|
||||||
|
|
||||||
let mut headers = vec![
|
let mut headers = vec![
|
||||||
(":method", "POST"),
|
(":method", "POST"),
|
||||||
|
|
@ -302,6 +302,8 @@ impl StreamContext {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
let timeout_str = API_REQUEST_TIMEOUT_MS.to_string();
|
||||||
|
|
||||||
let http_method_str = http_method.to_string();
|
let http_method_str = http_method.to_string();
|
||||||
let mut headers: HashMap<_, _> = [
|
let mut headers: HashMap<_, _> = [
|
||||||
(ARCH_UPSTREAM_HOST_HEADER, endpoint_details.name.as_str()),
|
(ARCH_UPSTREAM_HOST_HEADER, endpoint_details.name.as_str()),
|
||||||
|
|
@ -310,6 +312,7 @@ impl StreamContext {
|
||||||
(":authority", endpoint_details.name.as_str()),
|
(":authority", endpoint_details.name.as_str()),
|
||||||
("content-type", "application/json"),
|
("content-type", "application/json"),
|
||||||
("x-envoy-max-retries", "3"),
|
("x-envoy-max-retries", "3"),
|
||||||
|
("x-envoy-upstream-rq-timeout-ms", timeout_str.as_str()),
|
||||||
]
|
]
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.collect();
|
.collect();
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue