2024-07-29 12:15:26 -07:00
use http ::StatusCode ;
2024-09-04 17:28:12 -07:00
use proxy_wasm_test_framework ::tester ::{ self , Tester } ;
use proxy_wasm_test_framework ::types ::{
Action , BufferType , LogLevel , MapType , MetricType , ReturnType ,
} ;
2024-07-29 12:15:26 -07:00
use serial_test ::serial ;
2024-07-25 14:41:36 -07:00
use std ::path ::Path ;
fn wasm_module ( ) -> String {
2024-10-18 12:44:52 -07:00
let wasm_file = Path ::new ( " ../target/wasm32-wasi/release/llm_gateway.wasm " ) ;
2024-07-25 14:41:36 -07:00
assert! (
wasm_file . exists ( ) ,
" Run `cargo build --release --target=wasm32-wasi` first "
) ;
wasm_file . to_str ( ) . unwrap ( ) . to_string ( )
}
2024-09-26 00:15:17 -06:00
fn request_headers_expectations ( module : & mut Tester , http_context : i32 ) {
2024-09-04 17:28:12 -07:00
module
. call_proxy_on_request_headers ( http_context , 0 , false )
2024-09-26 00:15:17 -06:00
. expect_get_header_map_value (
Some ( MapType ::HttpRequestHeaders ) ,
2024-10-03 10:57:01 -07:00
Some ( " x-arch-llm-provider-hint " ) ,
2024-09-26 00:15:17 -06:00
)
2024-10-03 10:57:01 -07:00
. returning ( Some ( " default " ) )
2024-10-09 15:47:32 -07:00
. expect_log ( Some ( LogLevel ::Debug ) , None )
2024-09-26 00:15:17 -06:00
. expect_add_header_map_value (
Some ( MapType ::HttpRequestHeaders ) ,
2024-09-27 16:41:39 -07:00
Some ( " x-arch-llm-provider " ) ,
2024-10-03 10:57:01 -07:00
Some ( " open-ai-gpt-4 " ) ,
2024-09-26 00:15:17 -06:00
)
2024-09-17 16:12:41 -07:00
. expect_replace_header_map_value (
2024-09-04 17:28:12 -07:00
Some ( MapType ::HttpRequestHeaders ) ,
2024-09-26 00:15:17 -06:00
Some ( " Authorization " ) ,
2024-10-03 10:57:01 -07:00
Some ( " Bearer secret_key " ) ,
2024-09-04 17:28:12 -07:00
)
2024-09-26 00:15:17 -06:00
. expect_remove_header_map_value ( Some ( MapType ::HttpRequestHeaders ) , Some ( " content-length " ) )
2024-09-04 17:28:12 -07:00
. expect_get_header_map_value (
Some ( MapType ::HttpRequestHeaders ) ,
2024-09-27 16:41:39 -07:00
Some ( " x-arch-ratelimit-selector " ) ,
2024-09-04 17:28:12 -07:00
)
. returning ( Some ( " selector-key " ) )
. expect_get_header_map_value ( Some ( MapType ::HttpRequestHeaders ) , Some ( " selector-key " ) )
. returning ( Some ( " selector-value " ) )
2024-09-26 00:15:17 -06:00
. expect_get_header_map_pairs ( Some ( MapType ::HttpRequestHeaders ) )
. returning ( None )
2024-10-04 19:19:44 -07:00
. expect_get_header_map_value ( Some ( MapType ::HttpRequestHeaders ) , Some ( " :path " ) )
. returning ( Some ( " /v1/chat/completions " ) )
2024-10-17 10:16:40 -07:00
. expect_get_header_map_pairs ( Some ( MapType ::HttpRequestHeaders ) )
. returning ( None )
2024-09-26 00:15:17 -06:00
. expect_log ( Some ( LogLevel ::Debug ) , None )
2024-10-08 16:24:08 -07:00
. expect_get_header_map_value ( Some ( MapType ::HttpRequestHeaders ) , Some ( " x-request-id " ) )
. returning ( None )
2024-09-04 17:28:12 -07:00
. execute_and_expect ( ReturnType ::Action ( Action ::Continue ) )
. unwrap ( ) ;
2024-09-26 00:15:17 -06:00
}
fn normal_flow ( module : & mut Tester , filter_context : i32 , http_context : i32 ) {
module
. call_proxy_on_context_create ( http_context , filter_context )
. expect_log ( Some ( LogLevel ::Debug ) , None )
. execute_and_expect ( ReturnType ::None )
. unwrap ( ) ;
request_headers_expectations ( module , http_context ) ;
2024-09-04 17:28:12 -07:00
}
2024-10-04 19:33:52 -07:00
fn setup_filter ( module : & mut Tester , config : & str ) -> i32 {
let filter_context = 1 ;
module
. call_proxy_on_context_create ( filter_context , 0 )
. expect_metric_creation ( MetricType ::Gauge , " active_http_calls " )
. expect_metric_creation ( MetricType ::Counter , " ratelimited_rq " )
. execute_and_expect ( ReturnType ::None )
. unwrap ( ) ;
module
. call_proxy_on_configure ( filter_context , config . len ( ) as i32 )
. expect_get_buffer_bytes ( Some ( BufferType ::PluginConfiguration ) )
2024-10-05 19:25:16 -07:00
. returning ( Some ( config ) )
2024-10-04 19:33:52 -07:00
. execute_and_expect ( ReturnType ::Bool ( true ) )
. unwrap ( ) ;
filter_context
}
fn default_config ( ) -> & 'static str {
r #"
2024-09-30 17:49:05 -07:00
version : " 0.1-beta "
listener :
address : 0. 0. 0.0
port : 10000
message_format : huggingface
connect_timeout : 0.005 s
endpoints :
api_server :
endpoint : api_server :80
connect_timeout : 0.005 s
2024-09-04 17:28:12 -07:00
llm_providers :
2024-09-30 17:49:05 -07:00
- name : open - ai - gpt - 4
2024-10-03 10:57:01 -07:00
provider : openai
access_key : secret_key
2024-09-04 17:28:12 -07:00
model : gpt - 4
2024-09-30 17:49:05 -07:00
default : true
2024-10-17 10:16:40 -07:00
- name : open - ai - gpt - 4 o
provider : openai
access_key : secret_key
model : gpt - 4 o
2024-09-30 17:49:05 -07:00
overrides :
# confidence threshold for prompt target intent matching
prompt_target_intent_matching_threshold : 0.6
2024-09-04 17:28:12 -07:00
system_prompt : |
2024-09-30 17:49:05 -07:00
You are a helpful assistant .
2024-09-04 17:28:12 -07:00
2024-10-03 12:21:35 -07:00
prompt_guards :
input_guards :
jailbreak :
on_exception :
message : " Looks like you're curious about my abilities, but I can only provide assistance within my programmed parameters. "
2024-09-30 17:49:05 -07:00
2024-10-03 12:21:35 -07:00
prompt_targets :
2024-09-30 17:49:05 -07:00
- name : weather_forecast
description : This function provides realtime weather forecast information for a given city .
2024-09-16 19:20:07 -07:00
parameters :
- name : city
2024-09-04 17:28:12 -07:00
required : true
2024-09-16 19:20:07 -07:00
description : The city for which the weather forecast is requested .
- name : days
description : The number of days for which the weather forecast is requested .
- name : units
description : The units in which the weather forecast is requested .
2024-09-04 17:28:12 -07:00
endpoint :
2024-09-30 17:49:05 -07:00
name : api_server
2024-09-04 17:28:12 -07:00
path : / weather
2024-09-30 17:49:05 -07:00
system_prompt : |
You are a helpful weather forecaster . Use weater data that is provided to you . Please following following guidelines when responding to user queries :
- Use farenheight for temperature
- Use miles per hour for wind speed
2024-09-04 17:28:12 -07:00
ratelimits :
2024-10-03 10:57:01 -07:00
- model : gpt - 4
2024-09-04 17:28:12 -07:00
selector :
key : selector - key
value : selector - value
limit :
2024-10-17 10:16:40 -07:00
tokens : 50
2024-09-04 17:28:12 -07:00
unit : minute
2024-10-04 19:33:52 -07:00
" #
2024-09-04 17:28:12 -07:00
}
2024-07-25 14:41:36 -07:00
#[ test ]
2024-07-29 12:15:26 -07:00
#[ serial ]
fn successful_request_to_open_ai_chat_completions ( ) {
2024-07-25 14:41:36 -07:00
let args = tester ::MockSettings {
wasm_path : wasm_module ( ) ,
quiet : false ,
allow_unexpected : false ,
} ;
let mut module = tester ::mock ( args ) . unwrap ( ) ;
module
. call_start ( )
. execute_and_expect ( ReturnType ::None )
. unwrap ( ) ;
// Setup Filter
2024-10-04 19:33:52 -07:00
let filter_context = setup_filter ( & mut module , default_config ( ) ) ;
2024-10-03 10:57:01 -07:00
2024-07-25 14:41:36 -07:00
// Setup HTTP Stream
let http_context = 2 ;
module
2024-10-03 10:57:01 -07:00
. call_proxy_on_context_create ( http_context , filter_context )
2024-09-23 22:56:35 -07:00
. expect_log ( Some ( LogLevel ::Debug ) , None )
2024-07-25 14:41:36 -07:00
. execute_and_expect ( ReturnType ::None )
. unwrap ( ) ;
2024-09-26 00:15:17 -06:00
request_headers_expectations ( & mut module , http_context ) ;
2024-07-25 14:41:36 -07:00
// Request Body
let chat_completions_request_body = " \
{ \
\ " messages \" : [ \
{ \
\ " role \" : \" system \" , \
\ " content \" : \" You are a poetic assistant, skilled in explaining complex programming concepts with creative flair. \" \
} , \
{ \
\ " role \" : \" user \" , \
2024-10-17 10:16:40 -07:00
\ " content \" : \" Compose a poem. \" \
2024-07-25 14:41:36 -07:00
} \
2024-09-04 17:28:12 -07:00
] , \
\ " model \" : \" gpt-4 \" \
2024-07-25 14:41:36 -07:00
} " ;
module
. call_proxy_on_request_body (
http_context ,
chat_completions_request_body . len ( ) as i32 ,
true ,
)
. expect_get_buffer_bytes ( Some ( BufferType ::HttpRequestBody ) )
. returning ( Some ( chat_completions_request_body ) )
2024-09-10 14:24:46 -07:00
. expect_log ( Some ( LogLevel ::Debug ) , None )
2024-10-17 10:16:40 -07:00
. expect_log ( Some ( LogLevel ::Debug ) , None )
. expect_log ( Some ( LogLevel ::Debug ) , None )
. expect_set_buffer_bytes ( Some ( BufferType ::HttpRequestBody ) , None )
. execute_and_expect ( ReturnType ::Action ( Action ::Continue ) )
2024-07-25 14:41:36 -07:00
. unwrap ( ) ;
}
2024-07-29 12:15:26 -07:00
#[ test ]
#[ serial ]
fn bad_request_to_open_ai_chat_completions ( ) {
let args = tester ::MockSettings {
wasm_path : wasm_module ( ) ,
quiet : false ,
allow_unexpected : false ,
} ;
let mut module = tester ::mock ( args ) . unwrap ( ) ;
module
. call_start ( )
. execute_and_expect ( ReturnType ::None )
. unwrap ( ) ;
// Setup Filter
2024-10-04 19:33:52 -07:00
let filter_context = setup_filter ( & mut module , default_config ( ) ) ;
2024-10-03 10:57:01 -07:00
2024-07-29 12:15:26 -07:00
// Setup HTTP Stream
let http_context = 2 ;
module
2024-10-03 10:57:01 -07:00
. call_proxy_on_context_create ( http_context , filter_context )
2024-09-23 22:56:35 -07:00
. expect_log ( Some ( LogLevel ::Debug ) , None )
2024-07-29 12:15:26 -07:00
. execute_and_expect ( ReturnType ::None )
. unwrap ( ) ;
2024-09-26 00:15:17 -06:00
request_headers_expectations ( & mut module , http_context ) ;
2024-07-29 12:15:26 -07:00
// Request Body
let incomplete_chat_completions_request_body = " \
{ \
\ " messages \" : [ \
{ \
\ " role \" : \" system \" , \
} , \
{ \
\ " role \" : \" user \" , \
\ " content \" : \" Compose a poem that explains the concept of recursion in programming. \" \
} \
] \
} " ;
module
. call_proxy_on_request_body (
http_context ,
incomplete_chat_completions_request_body . len ( ) as i32 ,
true ,
)
. expect_get_buffer_bytes ( Some ( BufferType ::HttpRequestBody ) )
. returning ( Some ( incomplete_chat_completions_request_body ) )
2024-09-17 16:12:41 -07:00
. expect_log ( Some ( LogLevel ::Debug ) , None )
2024-07-29 12:15:26 -07:00
. expect_send_local_response (
Some ( StatusCode ::BAD_REQUEST . as_u16 ( ) . into ( ) ) ,
None ,
None ,
None ,
)
. execute_and_expect ( ReturnType ::Action ( Action ::Pause ) )
. unwrap ( ) ;
}
2024-09-04 15:31:05 -07:00
#[ test ]
#[ serial ]
2024-09-04 17:28:12 -07:00
fn request_ratelimited ( ) {
2024-09-04 15:31:05 -07:00
let args = tester ::MockSettings {
wasm_path : wasm_module ( ) ,
quiet : false ,
allow_unexpected : false ,
} ;
let mut module = tester ::mock ( args ) . unwrap ( ) ;
module
. call_start ( )
. execute_and_expect ( ReturnType ::None )
. unwrap ( ) ;
// Setup Filter
2024-10-04 19:33:52 -07:00
let filter_context = setup_filter ( & mut module , default_config ( ) ) ;
2024-09-04 15:31:05 -07:00
// Setup HTTP Stream
let http_context = 2 ;
2024-09-04 17:28:12 -07:00
normal_flow ( & mut module , filter_context , http_context ) ;
2024-10-17 10:16:40 -07:00
// Request Body
let chat_completions_request_body = " \
{ \
\ " messages \" : [ \
{ \
\ " role \" : \" system \" , \
\ " content \" : \" You are a poetic assistant, skilled in explaining complex programming concepts with creative flair. \" \
} , \
{ \
\ " role \" : \" user \" , \
\ " content \" : \" Compose a poem that explains the concept of recursion in programming. Compose a poem that explains the concept of recursion in programming. Compose a poem that explains the concept of recursion in programming. \" \
} \
] , \
\ " model \" : \" gpt-4 \" \
} " ;
2024-10-07 18:38:55 -07:00
module
2024-10-17 10:16:40 -07:00
. call_proxy_on_request_body (
http_context ,
chat_completions_request_body . len ( ) as i32 ,
true ,
2024-10-03 12:21:35 -07:00
)
2024-10-17 10:16:40 -07:00
. expect_get_buffer_bytes ( Some ( BufferType ::HttpRequestBody ) )
. returning ( Some ( chat_completions_request_body ) )
// The actual call is not important in this test, we just need to grab the token_id
2024-09-04 17:28:12 -07:00
. expect_log ( Some ( LogLevel ::Debug ) , None )
. expect_log ( Some ( LogLevel ::Debug ) , None )
. expect_log ( Some ( LogLevel ::Debug ) , None )
2024-10-17 10:16:40 -07:00
// .expect_metric_increment("active_http_calls", 1)
2024-09-04 17:28:12 -07:00
. expect_send_local_response (
Some ( StatusCode ::TOO_MANY_REQUESTS . as_u16 ( ) . into ( ) ) ,
None ,
None ,
None ,
)
. expect_metric_increment ( " ratelimited_rq " , 1 )
2024-10-17 10:16:40 -07:00
. execute_and_expect ( ReturnType ::Action ( Action ::Continue ) )
2024-09-04 15:31:05 -07:00
. unwrap ( ) ;
2024-09-04 17:28:12 -07:00
}
#[ test ]
#[ serial ]
fn request_not_ratelimited ( ) {
let args = tester ::MockSettings {
wasm_path : wasm_module ( ) ,
quiet : false ,
allow_unexpected : false ,
} ;
let mut module = tester ::mock ( args ) . unwrap ( ) ;
2024-09-04 15:31:05 -07:00
module
2024-09-04 17:28:12 -07:00
. call_start ( )
. execute_and_expect ( ReturnType ::None )
2024-09-04 15:31:05 -07:00
. unwrap ( ) ;
2024-09-04 17:28:12 -07:00
// Setup Filter
2024-10-17 10:16:40 -07:00
let filter_context = setup_filter ( & mut module , default_config ( ) ) ;
2024-09-04 17:28:12 -07:00
// Setup HTTP Stream
let http_context = 2 ;
normal_flow ( & mut module , filter_context , http_context ) ;
2024-10-17 10:16:40 -07:00
// give shorter body to avoid rate limiting
let chat_completions_request_body = " \
{ \
\ " messages \" : [ \
{ \
\ " role \" : \" system \" , \
\ " content \" : \" You are a poetic assistant, skilled in explaining complex programming concepts with creative flair. \" \
} , \
{ \
\ " role \" : \" user \" , \
\ " content \" : \" Compose a poem that explains the concept of recursion in programming. \" \
} \
] , \
\ " model \" : \" gpt-4 \" \
} " ;
2024-09-10 14:24:46 -07:00
2024-09-04 17:28:12 -07:00
module
2024-10-17 10:16:40 -07:00
. call_proxy_on_request_body (
http_context ,
chat_completions_request_body . len ( ) as i32 ,
true ,
)
. expect_get_buffer_bytes ( Some ( BufferType ::HttpRequestBody ) )
. returning ( Some ( chat_completions_request_body ) )
// The actual call is not important in this test, we just need to grab the token_id
2024-09-10 14:24:46 -07:00
. expect_log ( Some ( LogLevel ::Debug ) , None )
2024-10-07 19:50:15 -04:00
. expect_log ( Some ( LogLevel ::Debug ) , None )
2024-10-15 11:43:05 -07:00
. expect_log ( Some ( LogLevel ::Debug ) , None )
2024-10-17 10:16:40 -07:00
// .expect_metric_increment("active_http_calls", 1)
. expect_send_local_response (
Some ( StatusCode ::TOO_MANY_REQUESTS . as_u16 ( ) . into ( ) ) ,
2024-10-03 12:21:35 -07:00
None ,
None ,
None ,
)
2024-10-17 10:16:40 -07:00
. expect_metric_increment ( " ratelimited_rq " , 1 )
. execute_and_expect ( ReturnType ::Action ( Action ::Continue ) )
2024-09-04 15:31:05 -07:00
. unwrap ( ) ;
}