Revert "Revert "Add support for multiple LLM Providers (#60)"" (#83)

* Revert "Revert "Add support for multiple LLM Providers (#60)""

This reverts commit 43d6bc80e9.

* wip

Signed-off-by: José Ulises Niño Rivera <junr03@users.noreply.github.com>

* Revert "wip"

This reverts commit 7c4dde5d1f.

* fix parameter name

Signed-off-by: José Ulises Niño Rivera <junr03@users.noreply.github.com>

* force use openai

---------

Signed-off-by: José Ulises Niño Rivera <junr03@users.noreply.github.com>
Co-authored-by: Adil Hafeez <adil@katanemo.com>
This commit is contained in:
José Ulises Niño Rivera 2024-09-26 00:15:17 -06:00 committed by GitHub
parent 370f3bb2c5
commit 9ea6bb0d73
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
12 changed files with 457 additions and 127 deletions

View file

@ -25,6 +25,52 @@ fn wasm_module() -> String {
wasm_file.to_str().unwrap().to_string()
}
fn request_headers_expectations(module: &mut Tester, http_context: i32) {
module
.call_proxy_on_request_headers(http_context, 0, false)
.expect_get_header_map_value(
Some(MapType::HttpRequestHeaders),
Some("x-bolt-deterministic-provider"),
)
.returning(Some("true"))
.expect_add_header_map_value(
Some(MapType::HttpRequestHeaders),
Some("x-bolt-llm-provider"),
Some("openai"),
)
.expect_get_header_map_value(
Some(MapType::HttpRequestHeaders),
Some("x-bolt-openai-api-key"),
)
.returning(Some("api-key"))
.expect_replace_header_map_value(
Some(MapType::HttpRequestHeaders),
Some("Authorization"),
Some("Bearer api-key"),
)
.expect_remove_header_map_value(
Some(MapType::HttpRequestHeaders),
Some("x-bolt-openai-api-key"),
)
.expect_remove_header_map_value(
Some(MapType::HttpRequestHeaders),
Some("x-bolt-mistral-api-key"),
)
.expect_remove_header_map_value(Some(MapType::HttpRequestHeaders), Some("content-length"))
.expect_get_header_map_value(
Some(MapType::HttpRequestHeaders),
Some("x-bolt-ratelimit-selector"),
)
.returning(Some("selector-key"))
.expect_get_header_map_value(Some(MapType::HttpRequestHeaders), Some("selector-key"))
.returning(Some("selector-value"))
.expect_get_header_map_pairs(Some(MapType::HttpRequestHeaders))
.returning(None)
.expect_log(Some(LogLevel::Debug), None)
.execute_and_expect(ReturnType::Action(Action::Continue))
.unwrap();
}
fn normal_flow(module: &mut Tester, filter_context: i32, http_context: i32) {
module
.call_proxy_on_context_create(http_context, filter_context)
@ -32,28 +78,7 @@ fn normal_flow(module: &mut Tester, filter_context: i32, http_context: i32) {
.execute_and_expect(ReturnType::None)
.unwrap();
// Request Headers
module
.call_proxy_on_request_headers(http_context, 0, false)
.expect_get_header_map_value(Some(MapType::HttpRequestHeaders), Some(":host"))
.returning(Some("api.openai.com"))
.expect_remove_header_map_value(Some(MapType::HttpRequestHeaders), Some("content-length"))
.expect_get_header_map_value(Some(MapType::HttpRequestHeaders), Some(":path"))
.returning(Some("/llmrouting"))
.expect_replace_header_map_value(
Some(MapType::HttpRequestHeaders),
Some(":path"),
Some("/v1/chat/completions"),
)
.expect_get_header_map_value(
Some(MapType::HttpRequestHeaders),
Some("x-katanemo-ratelimit-selector"),
)
.returning(Some("selector-key"))
.expect_get_header_map_value(Some(MapType::HttpRequestHeaders), Some("selector-key"))
.returning(Some("selector-value"))
.execute_and_expect(ReturnType::Action(Action::Continue))
.unwrap();
request_headers_expectations(module, http_context);
// Request Body
let chat_completions_request_body = "\
@ -82,8 +107,8 @@ fn normal_flow(module: &mut Tester, filter_context: i32, http_context: i32) {
// The actual call is not important in this test, we just need to grab the token_id
.expect_http_call(Some("model_server"), None, None, None, None)
.returning(Some(1))
.expect_metric_increment("active_http_calls", 1)
.expect_log(Some(LogLevel::Debug), None)
.expect_metric_increment("active_http_calls", 1)
.expect_log(Some(LogLevel::Info), None)
.execute_and_expect(ReturnType::Action(Action::Pause))
.unwrap();
@ -115,6 +140,7 @@ fn normal_flow(module: &mut Tester, filter_context: i32, http_context: i32) {
.expect_http_call(Some("model_server"), None, None, None, None)
.returning(Some(2))
.expect_metric_increment("active_http_calls", 1)
.expect_log(Some(LogLevel::Debug), None)
.execute_and_expect(ReturnType::None)
.unwrap();
@ -235,26 +261,7 @@ fn successful_request_to_open_ai_chat_completions() {
.execute_and_expect(ReturnType::None)
.unwrap();
// Request Headers
module
.call_proxy_on_request_headers(http_context, 0, false)
.expect_get_header_map_value(Some(MapType::HttpRequestHeaders), Some(":host"))
.returning(Some("api.openai.com"))
.expect_remove_header_map_value(Some(MapType::HttpRequestHeaders), Some("content-length"))
.expect_get_header_map_value(Some(MapType::HttpRequestHeaders), Some(":path"))
.returning(Some("/llmrouting"))
.expect_replace_header_map_value(
Some(MapType::HttpRequestHeaders),
Some(":path"),
Some("/v1/chat/completions"),
)
.expect_get_header_map_value(
Some(MapType::HttpRequestHeaders),
Some("x-katanemo-ratelimit-selector"),
)
.returning(None)
.execute_and_expect(ReturnType::Action(Action::Continue))
.unwrap();
request_headers_expectations(&mut module, http_context);
// Request Body
let chat_completions_request_body = "\
@ -323,26 +330,7 @@ fn bad_request_to_open_ai_chat_completions() {
.execute_and_expect(ReturnType::None)
.unwrap();
// Request Headers
module
.call_proxy_on_request_headers(http_context, 0, false)
.expect_get_header_map_value(Some(MapType::HttpRequestHeaders), Some(":host"))
.returning(Some("api.openai.com"))
.expect_remove_header_map_value(Some(MapType::HttpRequestHeaders), Some("content-length"))
.expect_get_header_map_value(Some(MapType::HttpRequestHeaders), Some(":path"))
.returning(Some("/llmrouting"))
.expect_replace_header_map_value(
Some(MapType::HttpRequestHeaders),
Some(":path"),
Some("/v1/chat/completions"),
)
.expect_get_header_map_value(
Some(MapType::HttpRequestHeaders),
Some("x-katanemo-ratelimit-selector"),
)
.returning(None)
.execute_and_expect(ReturnType::Action(Action::Continue))
.unwrap();
request_headers_expectations(&mut module, http_context);
// Request Body
let incomplete_chat_completions_request_body = "\