Merge branch 'main' into adil/add_acm_demo

This commit is contained in:
Adil Hafeez 2025-02-12 10:39:08 -08:00
commit ca766f81fa
No known key found for this signature in database
GPG key ID: 9B18EF7691369645
130 changed files with 1716 additions and 5580 deletions

302
crates/Cargo.lock generated
View file

@ -234,6 +234,8 @@ dependencies = [
"serde_yaml",
"thiserror",
"tiktoken-rs",
"url",
"urlencoding",
]
[[package]]
@ -477,6 +479,17 @@ dependencies = [
"winapi",
]
[[package]]
name = "displaydoc"
version = "0.2.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.79",
]
[[package]]
name = "duration-string"
version = "0.3.0"
@ -557,6 +570,15 @@ version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f81ec6369c545a7d40e4589b5597581fa1c441fe1cce96dd1de43159910a36a2"
[[package]]
name = "form_urlencoded"
version = "1.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456"
dependencies = [
"percent-encoding",
]
[[package]]
name = "futures"
version = "0.3.31"
@ -782,12 +804,151 @@ dependencies = [
"itoa",
]
[[package]]
name = "icu_collections"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "db2fa452206ebee18c4b5c2274dbf1de17008e874b4dc4f0aea9d01ca79e4526"
dependencies = [
"displaydoc",
"yoke",
"zerofrom",
"zerovec",
]
[[package]]
name = "icu_locid"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "13acbb8371917fc971be86fc8057c41a64b521c184808a698c02acc242dbf637"
dependencies = [
"displaydoc",
"litemap",
"tinystr",
"writeable",
"zerovec",
]
[[package]]
name = "icu_locid_transform"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "01d11ac35de8e40fdeda00d9e1e9d92525f3f9d887cdd7aa81d727596788b54e"
dependencies = [
"displaydoc",
"icu_locid",
"icu_locid_transform_data",
"icu_provider",
"tinystr",
"zerovec",
]
[[package]]
name = "icu_locid_transform_data"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fdc8ff3388f852bede6b579ad4e978ab004f139284d7b28715f773507b946f6e"
[[package]]
name = "icu_normalizer"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "19ce3e0da2ec68599d193c93d088142efd7f9c5d6fc9b803774855747dc6a84f"
dependencies = [
"displaydoc",
"icu_collections",
"icu_normalizer_data",
"icu_properties",
"icu_provider",
"smallvec",
"utf16_iter",
"utf8_iter",
"write16",
"zerovec",
]
[[package]]
name = "icu_normalizer_data"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f8cafbf7aa791e9b22bec55a167906f9e1215fd475cd22adfcf660e03e989516"
[[package]]
name = "icu_properties"
version = "1.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "93d6020766cfc6302c15dbbc9c8778c37e62c14427cb7f6e601d849e092aeef5"
dependencies = [
"displaydoc",
"icu_collections",
"icu_locid_transform",
"icu_properties_data",
"icu_provider",
"tinystr",
"zerovec",
]
[[package]]
name = "icu_properties_data"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "67a8effbc3dd3e4ba1afa8ad918d5684b8868b3b26500753effea8d2eed19569"
[[package]]
name = "icu_provider"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6ed421c8a8ef78d3e2dbc98a973be2f3770cb42b606e3ab18d6237c4dfde68d9"
dependencies = [
"displaydoc",
"icu_locid",
"icu_provider_macros",
"stable_deref_trait",
"tinystr",
"writeable",
"yoke",
"zerofrom",
"zerovec",
]
[[package]]
name = "icu_provider_macros"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.79",
]
[[package]]
name = "id-arena"
version = "2.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "25a2bc672d1148e28034f176e01fffebb08b35768468cc954630da77a1449005"
[[package]]
name = "idna"
version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "686f825264d630750a544639377bae737628043f20d38bbc029e8f29ea968a7e"
dependencies = [
"idna_adapter",
"smallvec",
"utf8_iter",
]
[[package]]
name = "idna_adapter"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "daca1df1c957320b2cf139ac61e7bd64fed304c5040df000a745aa1de3b4ef71"
dependencies = [
"icu_normalizer",
"icu_properties",
]
[[package]]
name = "indexmap"
version = "2.6.0"
@ -883,6 +1044,12 @@ version = "0.4.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89"
[[package]]
name = "litemap"
version = "0.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4ee93343901ab17bd981295f2cf0026d4ad018c7c31ba84549a4ddbb47a45104"
[[package]]
name = "llm_gateway"
version = "0.1.0"
@ -1028,6 +1195,12 @@ version = "1.0.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a"
[[package]]
name = "percent-encoding"
version = "2.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e"
[[package]]
name = "pin-project-lite"
version = "0.2.14"
@ -1547,6 +1720,17 @@ dependencies = [
"unicode-ident",
]
[[package]]
name = "synstructure"
version = "0.13.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.79",
]
[[package]]
name = "target-lexicon"
version = "0.12.16"
@ -1606,6 +1790,16 @@ dependencies = [
"rustc-hash",
]
[[package]]
name = "tinystr"
version = "0.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9117f5d4db391c1cf6927e7bea3db74b9a1c1add8f7eda9ffd5364f40f57b82f"
dependencies = [
"displaydoc",
"zerovec",
]
[[package]]
name = "toml"
version = "0.8.19"
@ -1676,6 +1870,35 @@ version = "0.2.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "673aac59facbab8a9007c7f6108d11f63b603f7cabff99fabf650fea5c32b861"
[[package]]
name = "url"
version = "2.5.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "32f8b686cadd1473f4bd0117a5d28d36b1ade384ea9b5069a1c40aefed7fda60"
dependencies = [
"form_urlencoded",
"idna",
"percent-encoding",
]
[[package]]
name = "urlencoding"
version = "2.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da"
[[package]]
name = "utf16_iter"
version = "1.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c8232dd3cdaed5356e0f716d285e4b40b932ac434100fe9b7e0e8e935b9e6246"
[[package]]
name = "utf8_iter"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be"
[[package]]
name = "uuid"
version = "1.11.0"
@ -2189,12 +2412,48 @@ dependencies = [
"wasmparser 0.212.0",
]
[[package]]
name = "write16"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d1890f4022759daae28ed4fe62859b1236caebfc61ede2f63ed4e695f3f6d936"
[[package]]
name = "writeable"
version = "0.5.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51"
[[package]]
name = "yansi"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049"
[[package]]
name = "yoke"
version = "0.7.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "120e6aef9aa629e3d4f52dc8cc43a015c7724194c97dfaf45180d2daf2b77f40"
dependencies = [
"serde",
"stable_deref_trait",
"yoke-derive",
"zerofrom",
]
[[package]]
name = "yoke-derive"
version = "0.7.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2380878cad4ac9aac1e2435f3eb4020e8374b5f13c296cb75b4620ff8e229154"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.79",
"synstructure",
]
[[package]]
name = "zerocopy"
version = "0.7.35"
@ -2216,6 +2475,49 @@ dependencies = [
"syn 2.0.79",
]
[[package]]
name = "zerofrom"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cff3ee08c995dee1859d998dea82f7374f2826091dd9cd47def953cae446cd2e"
dependencies = [
"zerofrom-derive",
]
[[package]]
name = "zerofrom-derive"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "595eed982f7d355beb85837f651fa22e90b3c044842dc7f2c2842c086f295808"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.79",
"synstructure",
]
[[package]]
name = "zerovec"
version = "0.10.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aa2b893d79df23bfb12d5461018d408ea19dfafe76c2c7ef6d4eba614f8ff079"
dependencies = [
"yoke",
"zerofrom",
"zerovec-derive",
]
[[package]]
name = "zerovec-derive"
version = "0.10.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6eafa6dfb17584ea3e2bd6e76e0cc15ad7af12b09abdd1ca55961bed9b1063c6"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.79",
]
[[package]]
name = "zstd"
version = "0.13.2"

View file

@ -16,6 +16,8 @@ tiktoken-rs = "0.5.9"
rand = "0.8.5"
serde_json = "1.0"
hex = "0.4.3"
urlencoding = "2.1.3"
url = "2.5.4"
[dev-dependencies]
pretty_assertions = "1.4.1"

View file

@ -25,6 +25,7 @@ pub struct Configuration {
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct Overrides {
pub prompt_target_intent_matching_threshold: Option<f64>,
pub optimize_context_window: Option<bool>,
}
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
@ -242,6 +243,7 @@ pub struct EndpointDetails {
pub path: Option<String>,
#[serde(rename = "http_method")]
pub method: Option<HttpMethod>,
pub http_headers: Option<HashMap<String, String>>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]

View file

@ -1,21 +1,30 @@
use std::collections::HashMap;
use std::collections::{HashMap, HashSet};
use url::Url;
use urlencoding;
use crate::configuration::Parameter;
pub fn replace_params_in_path(
path: &str,
params: &HashMap<String, String>,
) -> Result<String, String> {
let mut result = String::new();
let mut in_param = false;
tool_params: &HashMap<String, String>,
prompt_target_params: &[Parameter],
) -> Result<(String, String, HashMap<String, String>), String> {
let mut query_string_replaced = String::new();
let mut current_param = String::new();
let mut vars_replaced = HashSet::new();
let mut params: HashMap<String, String> = HashMap::new();
let mut in_param = false;
for c in path.chars() {
if c == '{' {
in_param = true;
} else if c == '}' {
in_param = false;
let param_name = current_param.clone();
if let Some(value) = params.get(&param_name) {
result.push_str(value);
if let Some(value) = tool_params.get(&param_name) {
let value = urlencoding::encode(value);
query_string_replaced.push_str(value.into_owned().as_str());
vars_replaced.insert(param_name.clone());
} else {
return Err(format!("Missing value for parameter `{}`", param_name));
}
@ -23,31 +32,106 @@ pub fn replace_params_in_path(
} else if in_param {
current_param.push(c);
} else {
result.push(c);
query_string_replaced.push(c);
}
}
Ok(result)
// add the remaining params in path
for (param_name, value) in tool_params.iter() {
let value = urlencoding::encode(value).into_owned();
if !vars_replaced.contains(param_name) {
vars_replaced.insert(param_name.clone());
params.insert(param_name.clone(), value.clone());
if query_string_replaced.contains("?") {
query_string_replaced.push_str(&format!("&{}={}", param_name, value));
} else {
query_string_replaced.push_str(&format!("?{}={}", param_name, value));
}
}
}
// add default values
for param in prompt_target_params.iter() {
if !vars_replaced.contains(&param.name) && param.default.is_some() {
params.insert(param.name.clone(), param.default.clone().unwrap());
if query_string_replaced.contains("?") {
query_string_replaced.push_str(&format!(
"&{}={}",
param.name,
param.default.as_ref().unwrap()
));
} else {
query_string_replaced.push_str(&format!(
"?{}={}",
param.name,
param.default.as_ref().unwrap()
));
}
}
}
let parsed_uri = Url::parse("http://dummy.com").unwrap();
let parsed_uri = parsed_uri
.join(&query_string_replaced)
.map_err(|e| e.to_string())?;
let query_string = parsed_uri.query().unwrap_or("");
let path_uri = parsed_uri.path();
Ok((path_uri.to_string(), query_string.to_string(), params))
}
#[cfg(test)]
mod test {
use std::collections::HashMap;
use crate::configuration::Parameter;
#[test]
fn test_replace_path() {
let path = "/cluster.open-cluster-management.io/v1/managedclusters/{cluster_name}";
let params = vec![("cluster_name".to_string(), "test1".to_string())]
.into_iter()
.collect();
let params = vec![
("cluster_name".to_string(), "test1".to_string()),
("hello".to_string(), "hello world".to_string()),
]
.into_iter()
.collect();
let prompt_target_params = vec![Parameter {
name: "country".to_string(),
parameter_type: None,
description: "test target".to_string(),
required: None,
enum_values: None,
default: Some("US".to_string()),
in_path: None,
format: None,
}];
let out_params: HashMap<String, String> = vec![
("country".to_string(), "US".to_string()),
("hello".to_string(), "hello%20world".to_string()),
]
.into_iter()
.collect();
assert_eq!(
super::replace_params_in_path(path, &params),
Ok("/cluster.open-cluster-management.io/v1/managedclusters/test1".to_string())
super::replace_params_in_path(path, &params, &prompt_target_params),
Ok((
"/cluster.open-cluster-management.io/v1/managedclusters/test1".to_string(),
"hello=hello%20world&country=US".to_string(),
out_params.clone()
))
);
let out_params = HashMap::new();
let prompt_target_params = vec![];
let path = "/cluster.open-cluster-management.io/v1/managedclusters";
let params = vec![].into_iter().collect();
assert_eq!(
super::replace_params_in_path(path, &params),
Ok("/cluster.open-cluster-management.io/v1/managedclusters".to_string())
super::replace_params_in_path(path, &params, &prompt_target_params),
Ok((
"/cluster.open-cluster-management.io/v1/managedclusters".to_string(),
"".to_string(),
out_params
))
);
let path = "/foo/{bar}/baz";
@ -55,8 +139,8 @@ mod test {
.into_iter()
.collect();
assert_eq!(
super::replace_params_in_path(path, &params),
Ok("/foo/qux/baz".to_string())
super::replace_params_in_path(path, &params, &prompt_target_params),
Ok(("/foo/qux/baz".to_string(), "".to_string(), HashMap::new()))
);
let path = "/foo/{bar}/baz/{qux}";
@ -67,8 +151,45 @@ mod test {
.into_iter()
.collect();
assert_eq!(
super::replace_params_in_path(path, &params),
Ok("/foo/qux/baz/quux".to_string())
super::replace_params_in_path(path, &params, &prompt_target_params),
Ok((
"/foo/qux/baz/quux".to_string(),
"".to_string(),
HashMap::new()
))
);
let path = "/foo/{bar}/baz/{qux}?hello=world";
let params = vec![
("bar".to_string(), "qux".to_string()),
("qux".to_string(), "quux".to_string()),
]
.into_iter()
.collect();
assert_eq!(
super::replace_params_in_path(path, &params, &prompt_target_params),
Ok((
"/foo/qux/baz/quux".to_string(),
"hello=world".to_string(),
HashMap::new()
))
);
let path = "/foo/{bar}/baz/{qux}?hello={hello}";
let params = vec![
("bar".to_string(), "qux".to_string()),
("qux".to_string(), "quux".to_string()),
("hello".to_string(), "hello world".to_string()),
]
.into_iter()
.collect();
assert_eq!(
super::replace_params_in_path(path, &params, &prompt_target_params),
Ok((
"/foo/qux/baz/quux".to_string(),
"hello=hello%20world".to_string(),
HashMap::new()
))
);
let path = "/foo/{bar}/baz/{qux}";
@ -76,7 +197,7 @@ mod test {
.into_iter()
.collect();
assert_eq!(
super::replace_params_in_path(path, &params),
super::replace_params_in_path(path, &params, &prompt_target_params),
Err("Missing value for parameter `qux`".to_string())
);
}

File diff suppressed because it is too large Load diff

View file

@ -79,6 +79,11 @@ impl RootContext for FilterContext {
}
fn create_http_context(&self, context_id: u32) -> Option<Box<dyn HttpContext>> {
trace!(
"||| create_http_context called with context_id: {:?} |||",
context_id
);
Some(Box::new(StreamContext::new(
context_id,
Rc::clone(&self.metrics),

View file

@ -87,7 +87,7 @@ impl StreamContext {
));
debug!(
"llm provider hint: {:?}, selected llm: {}",
"request received: llm provider hint: {:?}, selected llm: {}",
self.get_http_request_header(ARCH_PROVIDER_HINT_HEADER),
self.llm_provider.as_ref().unwrap().name
);
@ -309,6 +309,12 @@ impl HttpContext for StreamContext {
}
fn on_http_response_headers(&mut self, _num_headers: usize, _end_of_stream: bool) -> Action {
trace!(
"on_http_response_headers [S={}] end_stream={}",
self.context_id,
_end_of_stream
);
self.set_property(
vec!["metadata", "filter_metadata", "llm_filter", "user_prompt"],
Some("hello world from filter".as_bytes()),
@ -318,6 +324,13 @@ impl HttpContext for StreamContext {
}
fn on_http_response_body(&mut self, body_size: usize, end_of_stream: bool) -> Action {
trace!(
"on_http_response_body [S={}] bytes={} end_stream={}",
self.context_id,
body_size,
end_of_stream
);
if !self.is_chat_completions_request {
debug!("non-chatcompletion request");
return Action::Continue;
@ -517,8 +530,11 @@ impl HttpContext for StreamContext {
let chat_completions_response: ChatCompletionsResponse =
match serde_json::from_str(body_utf8.as_str()) {
Ok(de) => de,
Err(_e) => {
debug!("invalid response: {}", body_utf8);
Err(err) => {
debug!(
"non chat-completion compliant response received err: {}, body: {}",
err, body_utf8
);
return Action::Continue;
}
};

View file

@ -22,12 +22,8 @@ fn request_headers_expectations(module: &mut Tester, http_context: i32) {
Some(MapType::HttpRequestHeaders),
Some("x-arch-llm-provider-hint"),
)
.returning(Some("default"))
.expect_log(
Some(LogLevel::Debug),
Some("llm provider hint: Some(Default)"),
)
.expect_log(Some(LogLevel::Debug), Some("selected llm: open-ai-gpt-4"))
.returning(None)
.expect_log(Some(LogLevel::Debug), Some("request received: llm provider hint: Some(\"default\"), selected llm: open-ai-gpt-4"))
.expect_add_header_map_value(
Some(MapType::HttpRequestHeaders),
Some("x-arch-llm-provider"),
@ -38,7 +34,11 @@ fn request_headers_expectations(module: &mut Tester, http_context: i32) {
Some("Authorization"),
Some("Bearer secret_key"),
)
.expect_remove_header_map_value(Some(MapType::HttpRequestHeaders), Some("content-length"))
.expect_get_header_map_value(
Some(MapType::HttpRequestHeaders),
Some("x-arch-llm-provider-hint"),
)
.returning(Some("default"))
.expect_get_header_map_value(
Some(MapType::HttpRequestHeaders),
Some("x-arch-ratelimit-selector"),
@ -50,7 +50,6 @@ fn request_headers_expectations(module: &mut Tester, http_context: i32) {
.returning(None)
.expect_get_header_map_value(Some(MapType::HttpRequestHeaders), Some(":path"))
.returning(Some("/v1/chat/completions"))
.expect_log(Some(LogLevel::Debug), None)
.expect_get_header_map_value(Some(MapType::HttpRequestHeaders), Some("x-request-id"))
.returning(None)
.expect_get_header_map_value(Some(MapType::HttpRequestHeaders), Some("traceparent"))
@ -62,7 +61,7 @@ fn request_headers_expectations(module: &mut Tester, http_context: i32) {
fn normal_flow(module: &mut Tester, filter_context: i32, http_context: i32) {
module
.call_proxy_on_context_create(http_context, filter_context)
.expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Trace), None)
.execute_and_expect(ReturnType::None)
.unwrap();
@ -187,7 +186,10 @@ fn llm_gateway_successful_request_to_open_ai_chat_completions() {
module
.call_proxy_on_context_create(http_context, filter_context)
.expect_log(Some(LogLevel::Debug), None)
.expect_log(
Some(LogLevel::Trace),
Some("||| create_http_context called with context_id: 2 |||"),
)
.execute_and_expect(ReturnType::None)
.unwrap();
@ -218,9 +220,9 @@ fn llm_gateway_successful_request_to_open_ai_chat_completions() {
.expect_get_buffer_bytes(Some(BufferType::HttpRequestBody))
.returning(Some(chat_completions_request_body))
.expect_log(Some(LogLevel::Trace), None)
.expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Trace), None)
.expect_metric_record("input_sequence_length", 21)
.expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Trace), None)
.expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Debug), None)
.expect_set_buffer_bytes(Some(BufferType::HttpRequestBody), None)
@ -251,7 +253,7 @@ fn llm_gateway_bad_request_to_open_ai_chat_completions() {
module
.call_proxy_on_context_create(http_context, filter_context)
.expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Trace), None)
.execute_and_expect(ReturnType::None)
.unwrap();
@ -339,9 +341,9 @@ fn llm_gateway_request_ratelimited() {
.returning(Some(chat_completions_request_body))
// The actual call is not important in this test, we just need to grab the token_id
.expect_log(Some(LogLevel::Trace), None)
.expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Trace), None)
.expect_metric_record("input_sequence_length", 107)
.expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Trace), None)
.expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Debug), None)
@ -405,9 +407,9 @@ fn llm_gateway_request_not_ratelimited() {
.returning(Some(chat_completions_request_body))
// The actual call is not important in this test, we just need to grab the token_id
.expect_log(Some(LogLevel::Trace), None)
.expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Trace), None)
.expect_metric_record("input_sequence_length", 29)
.expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Trace), None)
.expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Debug), None)
.expect_set_buffer_bytes(Some(BufferType::HttpRequestBody), None)

File diff suppressed because it is too large Load diff

View file

@ -27,21 +27,34 @@ impl Context for StreamContext {
.get_http_call_response_body(0, body_size)
.unwrap_or_default();
let http_status = self
.get_http_call_response_header(":status")
.unwrap_or(StatusCode::OK.as_str().to_string());
if http_status != StatusCode::OK.as_str() {
let server_error = ServerError::Upstream {
host: callout_context.upstream_cluster.unwrap(),
path: callout_context.upstream_cluster_path.unwrap(),
status: http_status.clone(),
body: String::from_utf8(body).unwrap(),
};
warn!("filter received non 2xx code: {:?}", server_error);
return self.send_server_error(
server_error,
Some(StatusCode::from_str(http_status.as_str()).unwrap()),
);
if let Some(http_status) = self.get_http_call_response_header(":status") {
match StatusCode::from_str(http_status.as_str()) {
Ok(status_code) => {
if !status_code.is_success() {
let server_error = ServerError::Upstream {
host: callout_context.upstream_cluster.unwrap(),
path: callout_context.upstream_cluster_path.unwrap(),
status: http_status.clone(),
body: String::from_utf8(body).unwrap(),
};
warn!("received non 2xx code: {:?}", server_error);
return self.send_server_error(
server_error,
Some(StatusCode::from_str(http_status.as_str()).unwrap()),
);
}
}
Err(_) => {
// invalid status code (status code non numeric)
return self.send_server_error(
ServerError::LogicError(format!("invalid status code: {}", http_status)),
Some(StatusCode::from_str(http_status.as_str()).unwrap()),
);
}
}
} else {
// :status header not found
warn!("missing :status header");
}
#[cfg_attr(any(), rustfmt::skip)]

View file

@ -3,6 +3,7 @@ use crate::stream_context::StreamContext;
use common::configuration::{Configuration, Overrides, PromptGuards, PromptTarget, Tracing};
use common::http::Client;
use common::stats::Gauge;
use log::trace;
use proxy_wasm::traits::*;
use proxy_wasm::types::*;
use std::cell::RefCell;
@ -83,6 +84,11 @@ impl RootContext for FilterContext {
}
fn create_http_context(&self, context_id: u32) -> Option<Box<dyn HttpContext>> {
trace!(
"||| create_http_context called with context_id: {:?} |||",
context_id
);
Some(Box::new(StreamContext::new(
context_id,
Rc::clone(&self.metrics),

View file

@ -78,10 +78,7 @@ impl HttpContext for StreamContext {
}
};
debug!(
"developer => archgw: {}",
String::from_utf8_lossy(&body_bytes)
);
trace!("request body: {}", String::from_utf8_lossy(&body_bytes));
// Deserialize body into spec.
// Currently OpenAI API.
@ -133,9 +130,23 @@ impl HttpContext for StreamContext {
.map(|(_, pt)| pt.into())
.collect();
let mut metadata = deserialized_body.metadata.clone();
if let Some(overrides) = self.overrides.as_ref() {
if overrides.optimize_context_window.unwrap_or_default() {
if metadata.is_none() {
metadata = Some(HashMap::new());
}
metadata
.as_mut()
.unwrap()
.insert("optimize_context_window".to_string(), "true".to_string());
}
}
let arch_fc_chat_completion_request = ChatCompletionsRequest {
messages: deserialized_body.messages.clone(),
metadata: deserialized_body.metadata.clone(),
metadata,
stream: deserialized_body.stream,
model: "--".to_string(),
stream_options: deserialized_body.stream_options.clone(),
@ -152,7 +163,8 @@ impl HttpContext for StreamContext {
}
};
debug!("archgw => archfc: {}", json_data);
debug!("sending request to model server");
trace!("request body: {}", json_data);
let mut headers = vec![
(ARCH_UPSTREAM_HOST_HEADER, MODEL_SERVER_NAME),

View file

@ -7,6 +7,7 @@ mod filter_context;
mod http_context;
mod metrics;
mod stream_context;
mod tools;
proxy_wasm::main! {{
proxy_wasm::set_log_level(LogLevel::Trace);

View file

@ -1,4 +1,5 @@
use crate::metrics::Metrics;
use crate::tools::compute_request_path_body;
use common::api::open_ai::{
to_server_events, ArchState, ChatCompletionStreamResponse, ChatCompletionsRequest,
ChatCompletionsResponse, Message, ModelServerResponse, ToolCall,
@ -14,9 +15,8 @@ use common::http::{CallArgs, Client};
use common::stats::Gauge;
use derivative::Derivative;
use http::StatusCode;
use log::{debug, warn};
use log::{debug, trace, warn};
use proxy_wasm::traits::*;
use serde_yaml::Value;
use std::cell::RefCell;
use std::collections::HashMap;
use std::rc::Rc;
@ -46,7 +46,7 @@ pub struct StreamCallContext {
pub struct StreamContext {
system_prompt: Rc<Option<String>>,
pub prompt_targets: Rc<HashMap<String, PromptTarget>>,
_overrides: Rc<Option<Overrides>>,
pub overrides: Rc<Option<Overrides>>,
pub metrics: Rc<Metrics>,
pub callouts: RefCell<HashMap<u32, StreamCallContext>>,
pub context_id: u32,
@ -89,7 +89,7 @@ impl StreamContext {
streaming_response: false,
user_prompt: None,
is_chat_completions_request: false,
_overrides: overrides,
overrides: overrides,
request_id: None,
traceparent: None,
_tracing: tracing,
@ -125,13 +125,14 @@ impl StreamContext {
mut callout_context: StreamCallContext,
) {
let body_str = String::from_utf8(body).unwrap();
debug!("archgw <= archfc response: {}", body_str);
debug!("model server response received");
trace!("response body: {}", body_str);
let model_server_response: ModelServerResponse = match serde_json::from_str(&body_str) {
Ok(arch_fc_response) => arch_fc_response,
Err(e) => {
warn!(
"error deserializing archfc response: {}, body: {}",
"error deserializing modelserver response: {}, body: {}",
e, body_str
);
return self.send_server_error(ServerError::Deserialization(e), None);
@ -141,7 +142,7 @@ impl StreamContext {
let arch_fc_response = match model_server_response {
ModelServerResponse::ChatCompletionsResponse(response) => response,
ModelServerResponse::ModelServerErrorResponse(response) => {
debug!("archgw <= archfc error response: {}", response.result);
debug!("archgw <= modelserver error response: {}", response.result);
if response.result == "No intent matched" {
if let Some(default_prompt_target) = self
.prompt_targets
@ -272,85 +273,76 @@ impl StreamContext {
fn schedule_api_call_request(&mut self, mut callout_context: StreamCallContext) {
let tools_call_name = self.tool_calls.as_ref().unwrap()[0].function.name.clone();
let prompt_target = self.prompt_targets.get(&tools_call_name).unwrap();
let tool_params = &self.tool_calls.as_ref().unwrap()[0].function.arguments;
let endpoint_details = prompt_target.endpoint.as_ref().unwrap();
let endpoint_path: String = endpoint_details
.path
.as_ref()
.unwrap_or(&String::from("/"))
.to_string();
let prompt_target = self.prompt_targets.get(&tools_call_name).unwrap().clone();
let http_method = endpoint_details.method.clone().unwrap_or_default();
let prompt_target_params = prompt_target.parameters.clone().unwrap_or_default();
let mut tool_params = self.tool_calls.as_ref().unwrap()[0]
.function
.arguments
.clone();
tool_params.insert(
String::from(MESSAGES_KEY),
serde_yaml::to_value(&callout_context.request_body.messages).unwrap(),
);
let tool_params_json_str = serde_json::to_string(&tool_params).unwrap();
let endpoint = prompt_target.endpoint.unwrap();
let path: String = endpoint.path.unwrap_or(String::from("/"));
// only add params that are of string, number and bool type
let url_params = tool_params
.iter()
.filter(|(_, value)| value.is_number() || value.is_string() || value.is_bool())
.map(|(key, value)| match value {
Value::Number(n) => (key.clone(), n.to_string()),
Value::String(s) => (key.clone(), s.clone()),
Value::Bool(b) => (key.clone(), b.to_string()),
Value::Null => todo!(),
Value::Sequence(_) => todo!(),
Value::Mapping(_) => todo!(),
Value::Tagged(_) => todo!(),
})
.collect::<HashMap<String, String>>();
let path = match common::path::replace_params_in_path(&path, &url_params) {
Ok(path) => path,
let (path, body) = match compute_request_path_body(
&endpoint_path,
tool_params,
&prompt_target_params,
&http_method,
) {
Ok((path, body)) => (path, body),
Err(e) => {
return self.send_server_error(
ServerError::BadRequest {
why: format!("error replacing params in path: {}", e),
why: format!("error computing api request path or body: {}", e),
},
Some(StatusCode::BAD_REQUEST),
);
}
};
let http_method = endpoint.method.unwrap_or_default().to_string();
let mut headers = vec![
(ARCH_UPSTREAM_HOST_HEADER, endpoint.name.as_str()),
(":method", &http_method),
let http_method_str = http_method.to_string();
let mut headers: HashMap<_, _> = [
(ARCH_UPSTREAM_HOST_HEADER, endpoint_details.name.as_str()),
(":method", &http_method_str),
(":path", &path),
(":authority", endpoint.name.as_str()),
(":authority", endpoint_details.name.as_str()),
("content-type", "application/json"),
("x-envoy-max-retries", "3"),
];
]
.into_iter()
.collect();
if self.request_id.is_some() {
headers.push((REQUEST_ID_HEADER, self.request_id.as_ref().unwrap()));
headers.insert(REQUEST_ID_HEADER, self.request_id.as_ref().unwrap());
}
if self.traceparent.is_some() {
headers.push((TRACE_PARENT_HEADER, self.traceparent.as_ref().unwrap()));
headers.insert(TRACE_PARENT_HEADER, self.traceparent.as_ref().unwrap());
}
// override http headers that are set in the prompt target
let http_headers = endpoint_details.http_headers.clone().unwrap_or_default();
for (key, value) in http_headers.iter() {
headers.insert(key.as_str(), value.as_str());
}
let call_args = CallArgs::new(
ARCH_INTERNAL_CLUSTER_NAME,
&path,
headers,
Some(tool_params_json_str.as_bytes()),
headers.into_iter().collect(),
body.as_deref().map(|s| s.as_bytes()),
vec![],
Duration::from_secs(5),
);
debug!(
"archgw => api call, endpoint: {}{}, body: {}",
endpoint.name.as_str(),
path,
tool_params_json_str
"dispatching api call to developer endpoint: {}, path: {}, method: {}",
endpoint_details.name, path, http_method_str
);
callout_context.upstream_cluster = Some(endpoint.name.to_owned());
callout_context.upstream_cluster = Some(endpoint_details.name.to_owned());
callout_context.upstream_cluster_path = Some(path.to_owned());
callout_context.response_handler_type = ResponseHandlerType::FunctionCall;
@ -363,6 +355,10 @@ impl StreamContext {
let http_status = self
.get_http_call_response_header(":status")
.unwrap_or(StatusCode::OK.as_str().to_string());
debug!(
"developer api call response received: status code: {}",
http_status
);
if http_status != StatusCode::OK.as_str() {
warn!(
"api server responded with non 2xx status code: {}",
@ -379,8 +375,8 @@ impl StreamContext {
);
}
self.tool_call_response = Some(String::from_utf8(body).unwrap());
debug!(
"archgw <= api call response: {}",
trace!(
"response body: {}",
self.tool_call_response.as_ref().unwrap()
);
@ -430,7 +426,8 @@ impl StreamContext {
return self.send_server_error(ServerError::Serialization(e), None);
}
};
debug!("archgw => llm request: {}", llm_request_str);
debug!("sending request to upstream llm");
trace!("request body: {}", llm_request_str);
self.start_upstream_llm_request_time = SystemTime::now()
.duration_since(UNIX_EPOCH)

View file

@ -0,0 +1,157 @@
use common::configuration::{HttpMethod, Parameter};
use std::collections::HashMap;
use serde_yaml::Value;
// only add params that are of string, number and bool type
pub fn filter_tool_params(tool_params: &HashMap<String, Value>) -> HashMap<String, String> {
tool_params
.iter()
.filter(|(_, value)| value.is_number() || value.is_string() || value.is_bool())
.map(|(key, value)| match value {
Value::Number(n) => (key.clone(), n.to_string()),
Value::String(s) => (key.clone(), s.clone()),
Value::Bool(b) => (key.clone(), b.to_string()),
Value::Null => todo!(),
Value::Sequence(_) => todo!(),
Value::Mapping(_) => todo!(),
Value::Tagged(_) => todo!(),
})
.collect::<HashMap<String, String>>()
}
pub fn compute_request_path_body(
endpoint_path: &str,
tool_params: &HashMap<String, Value>,
prompt_target_params: &[Parameter],
http_method: &HttpMethod,
) -> Result<(String, Option<String>), String> {
let tool_url_params = filter_tool_params(tool_params);
let (path_with_params, query_string, additional_params) = common::path::replace_params_in_path(
endpoint_path,
&tool_url_params,
prompt_target_params,
)?;
let (path, body) = match http_method {
HttpMethod::Get => (format!("{}?{}", path_with_params, query_string), None),
HttpMethod::Post => {
let mut additional_params = additional_params;
if !query_string.is_empty() {
query_string.split("&").for_each(|param| {
let mut parts = param.split("=");
let key = parts.next().unwrap();
let value = parts.next().unwrap();
additional_params.insert(key.to_string(), value.to_string());
});
}
let body = serde_json::to_string(&additional_params).unwrap();
(path_with_params, Some(body))
}
};
Ok((path, body))
}
#[cfg(test)]
mod test {
use common::configuration::{HttpMethod, Parameter};
#[test]
fn test_compute_request_path_body() {
let endpoint_path = "/cluster.open-cluster-management.io/v1/managedclusters/{cluster_name}";
let tool_params = serde_yaml::from_str(
r#"
cluster_name: test1
hello: hello world
"#,
)
.unwrap();
let prompt_target_params = vec![Parameter {
name: "country".to_string(),
parameter_type: None,
description: "test target".to_string(),
required: None,
enum_values: None,
default: Some("US".to_string()),
in_path: None,
format: None,
}];
let http_method = HttpMethod::Get;
let (path, body) = super::compute_request_path_body(
endpoint_path,
&tool_params,
&prompt_target_params,
&http_method,
)
.unwrap();
assert_eq!(
path,
"/cluster.open-cluster-management.io/v1/managedclusters/test1?hello=hello%20world&country=US"
);
assert_eq!(body, None);
}
#[test]
fn test_compute_request_path_body_empty_params() {
let endpoint_path = "/cluster.open-cluster-management.io/v1/managedclusters/";
let tool_params = serde_yaml::from_str(r#"{}"#).unwrap();
let prompt_target_params = vec![Parameter {
name: "country".to_string(),
parameter_type: None,
description: "test target".to_string(),
required: None,
enum_values: None,
default: Some("US".to_string()),
in_path: None,
format: None,
}];
let http_method = HttpMethod::Get;
let (path, body) = super::compute_request_path_body(
endpoint_path,
&tool_params,
&prompt_target_params,
&http_method,
)
.unwrap();
assert_eq!(
path,
"/cluster.open-cluster-management.io/v1/managedclusters/?country=US"
);
assert_eq!(body, None);
}
#[test]
fn test_compute_request_path_body_override_default_val() {
let endpoint_path = "/cluster.open-cluster-management.io/v1/managedclusters/";
let tool_params = serde_yaml::from_str(
r#"
country: UK
"#,
)
.unwrap();
let prompt_target_params = vec![Parameter {
name: "country".to_string(),
parameter_type: None,
description: "test target".to_string(),
required: None,
enum_values: None,
default: Some("US".to_string()),
in_path: None,
format: None,
}];
let http_method = HttpMethod::Get;
let (path, body) = super::compute_request_path_body(
endpoint_path,
&tool_params,
&prompt_target_params,
&http_method,
)
.unwrap();
assert_eq!(
path,
"/cluster.open-cluster-management.io/v1/managedclusters/?country=UK"
);
assert_eq!(body, None);
}
}

View file

@ -41,7 +41,7 @@ fn request_headers_expectations(module: &mut Tester, http_context: i32) {
fn normal_flow(module: &mut Tester, filter_context: i32, http_context: i32) {
module
.call_proxy_on_context_create(http_context, filter_context)
.expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Trace), None)
.execute_and_expect(ReturnType::None)
.unwrap();
@ -87,8 +87,9 @@ fn normal_flow(module: &mut Tester, filter_context: i32, http_context: i32) {
None,
)
.returning(Some(1))
.expect_log(Some(LogLevel::Trace), None)
.expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Trace), None)
.expect_log(Some(LogLevel::Trace), None)
.expect_metric_increment("active_http_calls", 1)
.execute_and_expect(ReturnType::Action(Action::Pause))
@ -203,7 +204,7 @@ fn prompt_gateway_successful_request_to_open_ai_chat_completions() {
module
.call_proxy_on_context_create(http_context, filter_context)
.expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Trace), None)
.execute_and_expect(ReturnType::None)
.unwrap();
@ -234,8 +235,9 @@ fn prompt_gateway_successful_request_to_open_ai_chat_completions() {
.expect_get_buffer_bytes(Some(BufferType::HttpRequestBody))
.returning(Some(chat_completions_request_body))
.expect_log(Some(LogLevel::Trace), None)
.expect_log(Some(LogLevel::Trace), None)
.expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Trace), None)
.expect_log(Some(LogLevel::Trace), None)
.expect_http_call(Some("arch_internal"), None, None, None, None)
.returning(Some(4))
@ -267,7 +269,7 @@ fn prompt_gateway_bad_request_to_open_ai_chat_completions() {
module
.call_proxy_on_context_create(http_context, filter_context)
.expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Trace), None)
.execute_and_expect(ReturnType::None)
.unwrap();
@ -302,7 +304,7 @@ fn prompt_gateway_bad_request_to_open_ai_chat_completions() {
None,
None,
)
.expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Trace), None)
.execute_and_expect(ReturnType::Action(Action::Pause))
.unwrap();
}
@ -363,29 +365,30 @@ fn prompt_gateway_request_to_llm_gateway() {
metadata: None,
};
let expected_body = "{\"city\":\"seattle\"}";
let arch_fc_resp_str = serde_json::to_string(&arch_fc_resp).unwrap();
module
.call_proxy_on_http_call_response(http_context, 1, 0, arch_fc_resp_str.len() as i32, 0)
.expect_metric_increment("active_http_calls", -1)
.expect_get_buffer_bytes(Some(BufferType::HttpCallResponseBody))
.returning(Some(&arch_fc_resp_str))
.expect_log(Some(LogLevel::Warn), None)
.expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Trace), None)
.expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Trace), None)
.expect_log(Some(LogLevel::Trace), None)
.expect_http_call(
Some("arch_internal"),
Some(vec![
("x-arch-upstream", "api_server"),
(":method", "POST"),
(":path", "/weather"),
(":authority", "api_server"),
("content-type", "application/json"),
("x-arch-upstream", "api_server"),
(":authority", "api_server"),
("x-envoy-max-retries", "3"),
(":path", "/weather"),
]),
None,
Some(expected_body),
None,
None,
)
@ -401,13 +404,12 @@ fn prompt_gateway_request_to_llm_gateway() {
.expect_get_buffer_bytes(Some(BufferType::HttpCallResponseBody))
.returning(Some(&body_text))
.expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Trace), None)
.expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Trace), None)
.expect_log(Some(LogLevel::Trace), None)
.expect_get_header_map_value(Some(MapType::HttpCallResponseHeaders), Some(":status"))
.returning(Some("200"))
.expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Debug), None)
.expect_set_buffer_bytes(Some(BufferType::HttpRequestBody), None)
.execute_and_expect(ReturnType::None)
.unwrap();