Add Ratelimit on request tokens (#44)

Signed-off-by: José Ulises Niño Rivera <junr03@users.noreply.github.com>
This commit is contained in:
José Ulises Niño Rivera 2024-09-04 17:28:12 -07:00 committed by GitHub
parent d98517f240
commit dd48689aee
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 698 additions and 200 deletions

384
envoyfilter/Cargo.lock generated
View file

@ -38,6 +38,15 @@ dependencies = [
"zerocopy",
]
[[package]]
name = "aho-corasick"
version = "1.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
dependencies = [
"memchr",
]
[[package]]
name = "allocator-api2"
version = "0.2.18"
@ -110,7 +119,7 @@ dependencies = [
"cfg-if 1.0.0",
"libc",
"miniz_oxide",
"object 0.36.2",
"object",
"rustc-demangle",
]
@ -127,14 +136,20 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6"
[[package]]
name = "bincode"
version = "1.3.3"
name = "bit-set"
version = "0.5.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad"
checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1"
dependencies = [
"serde",
"bit-vec",
]
[[package]]
name = "bit-vec"
version = "0.6.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb"
[[package]]
name = "bitflags"
version = "1.3.2"
@ -156,6 +171,17 @@ dependencies = [
"generic-array",
]
[[package]]
name = "bstr"
version = "1.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "40723b8fb387abc38f4f4a37c09073622e41dd12327033091ef8950659e6dc0c"
dependencies = [
"memchr",
"regex-automata",
"serde",
]
[[package]]
name = "bumpalo"
version = "3.16.0"
@ -211,6 +237,12 @@ dependencies = [
"vec_map",
]
[[package]]
name = "cobs"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "67ba02a97a2bd10f4b59b25c7973101c79642302776489e030cd13cdab09ed15"
[[package]]
name = "core-foundation"
version = "0.9.4"
@ -247,21 +279,32 @@ dependencies = [
[[package]]
name = "cranelift-bforest"
version = "0.107.2"
version = "0.110.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ebf72ceaf38f7d41194d0cf6748214d8ef7389167fe09aad80f87646dbfa325b"
checksum = "305d51c180ebdc46ef61bc60c54ae6512db3bc9a05842a1f1e762e45977019ab"
dependencies = [
"cranelift-entity",
]
[[package]]
name = "cranelift-codegen"
version = "0.107.2"
name = "cranelift-bitset"
version = "0.110.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9ee7fde5cd9173f00ce02c491ee9e306d64740f4b1a697946e0474f389999e13"
checksum = "e3247afacd9b13d620033f3190d9e49d1beefc1acb33d5604a249956c9c13709"
dependencies = [
"serde",
"serde_derive",
]
[[package]]
name = "cranelift-codegen"
version = "0.110.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bd7ca95e831c18d1356da783765c344207cbdffea91e13e47fa9327dbb2e0719"
dependencies = [
"bumpalo",
"cranelift-bforest",
"cranelift-bitset",
"cranelift-codegen-meta",
"cranelift-codegen-shared",
"cranelift-control",
@ -271,49 +314,51 @@ dependencies = [
"hashbrown 0.14.5",
"log",
"regalloc2",
"rustc-hash",
"smallvec",
"target-lexicon",
]
[[package]]
name = "cranelift-codegen-meta"
version = "0.107.2"
version = "0.110.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b49bec6a517e78d4067500dc16acb558e772491a2bcb37301127448adfb8413c"
checksum = "450c105fa1e51bfba4e95a86e926504a867ad5639d63f31d43fe3b7ec1f1c9ef"
dependencies = [
"cranelift-codegen-shared",
]
[[package]]
name = "cranelift-codegen-shared"
version = "0.107.2"
version = "0.110.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ead4ea497b2dc2ac31fcabd6d5d0d5dc25b3964814122e343724bdf65a53c843"
checksum = "5479117cd1266881479908d383086561cee37e49affbea9b1e6b594cc21cc220"
[[package]]
name = "cranelift-control"
version = "0.107.2"
version = "0.110.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f81e8028c8d711ea7592648e70221f2e54acb8665f7ecd49545f021ec14c3341"
checksum = "34378804f0abfdd22c068a741cfeed86938b92375b2a96fb0b42c878e0141bfb"
dependencies = [
"arbitrary",
]
[[package]]
name = "cranelift-entity"
version = "0.107.2"
version = "0.110.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "32acd0632ba65c2566e75f64af9ef094bb8d90e58a9fbd33d920977a9d85c054"
checksum = "a48cb0a194c9ba82fec35a1e492055388d89b2e3c03dee9dcf2488892be8004d"
dependencies = [
"cranelift-bitset",
"serde",
"serde_derive",
]
[[package]]
name = "cranelift-frontend"
version = "0.107.2"
version = "0.110.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a395a704934aa944ba8939cac9001174b9ae5236f48bc091f89e33bb968336f6"
checksum = "8327afc6c1c05f4be62fefce5b439fa83521c65363a322e86ea32c85e7ceaf64"
dependencies = [
"cranelift-codegen",
"log",
@ -323,15 +368,15 @@ dependencies = [
[[package]]
name = "cranelift-isle"
version = "0.107.2"
version = "0.110.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b325ce81c4ee7082dc894537eb342c37898e14230fe7c02ea945691db3e2dd01"
checksum = "56b08621c00321efcfa3eee6a3179adc009e21ea8d24ca7adc3c326184bc3f48"
[[package]]
name = "cranelift-native"
version = "0.107.2"
version = "0.110.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ea11f5ac85996fa093075d66397922d4f56085d5d84ec13043d0cd4f159c6818"
checksum = "d51180b147c8557c1196c77b098f04140c91962e135ea152cd2fcabf40cf365c"
dependencies = [
"cranelift-codegen",
"libc",
@ -340,9 +385,9 @@ dependencies = [
[[package]]
name = "cranelift-wasm"
version = "0.107.2"
version = "0.110.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e4f175d4e299a8edabfbd64fa93c7650836cc8ad7f4879f9bd2632575a1f12d0"
checksum = "019e3dccb7f15e0bc14f0ddc034ec608a66df8e05c9e1e16f75a7716f8461799"
dependencies = [
"cranelift-codegen",
"cranelift-entity",
@ -457,6 +502,18 @@ version = "1.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0"
[[package]]
name = "embedded-io"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ef1a6892d9eef45c8fa6b9e0086428a2cca8491aca8f787c534a3d6d0bcb3ced"
[[package]]
name = "embedded-io"
version = "0.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "edd0f118536f44f5ccd48bcb8b111bdc3de888b58c74639dfb034a357d0f206d"
[[package]]
name = "encoding_rs"
version = "0.8.34"
@ -488,6 +545,16 @@ version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649"
[[package]]
name = "fancy-regex"
version = "0.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7493d4c459da9f84325ad297371a6b2b8a162800873a22e3b6b6512e61d18c05"
dependencies = [
"bit-set",
"regex",
]
[[package]]
name = "fastrand"
version = "2.1.0"
@ -735,6 +802,7 @@ checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1"
dependencies = [
"ahash",
"allocator-api2",
"serde",
]
[[package]]
@ -923,6 +991,7 @@ dependencies = [
"serde_json",
"serde_yaml",
"serial_test",
"tiktoken-rs",
]
[[package]]
@ -1002,6 +1071,12 @@ version = "0.2.155"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c"
[[package]]
name = "libm"
version = "0.2.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058"
[[package]]
name = "libredox"
version = "0.1.3"
@ -1064,15 +1139,6 @@ dependencies = [
"rustix",
]
[[package]]
name = "memoffset"
version = "0.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a"
dependencies = [
"autocfg",
]
[[package]]
name = "mime"
version = "0.3.17"
@ -1145,24 +1211,15 @@ version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "38bf9645c8b145698bb0b18a4637dcacbc421ea49bef2317e4fd8065a387cf21"
[[package]]
name = "object"
version = "0.33.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d8dd6c0cdf9429bce006e1362bfce61fa1bfd8c898a643ed8d2b471934701d3d"
dependencies = [
"crc32fast",
"hashbrown 0.14.5",
"indexmap",
"memchr",
]
[[package]]
name = "object"
version = "0.36.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3f203fa8daa7bb185f760ae12bd8e097f63d17041dcdcaf675ac54cdf863170e"
dependencies = [
"crc32fast",
"hashbrown 0.14.5",
"indexmap",
"memchr",
]
@ -1306,6 +1363,18 @@ version = "1.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "da544ee218f0d287a911e9c99a39a8c9bc8fcad3cb8db5959940044ecfc67265"
[[package]]
name = "postcard"
version = "1.0.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5f7f0a8d620d71c457dd1d47df76bb18960378da56af4527aaa10f515eee732e"
dependencies = [
"cobs",
"embedded-io 0.4.0",
"embedded-io 0.6.1",
"serde",
]
[[package]]
name = "ppv-lite86"
version = "0.2.17"
@ -1358,7 +1427,7 @@ dependencies = [
[[package]]
name = "proxy-wasm-test-framework"
version = "0.1.0"
source = "git+https://github.com/katanemo/test-framework.git?branch=main#3593b817e6dff69ca6bfe32aea8a92f783a6ecd8"
source = "git+https://github.com/katanemo/test-framework.git?branch=main#c2511cd9030705e14d5f60aca77d6c96c81c6dfa"
dependencies = [
"anyhow",
"cfg-if 0.1.10",
@ -1502,6 +1571,35 @@ dependencies = [
"smallvec",
]
[[package]]
name = "regex"
version = "1.10.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4219d74c6b67a3654a9fbebc4b419e22126d13d2f3c4a07ee0cb61ff79a79619"
dependencies = [
"aho-corasick",
"memchr",
"regex-automata",
"regex-syntax",
]
[[package]]
name = "regex-automata"
version = "0.4.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax",
]
[[package]]
name = "regex-syntax"
version = "0.8.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b"
[[package]]
name = "reqwest"
version = "0.12.5"
@ -1690,6 +1788,9 @@ name = "semver"
version = "1.0.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "61697e0a1c7e512e84a621326239844a24d8207b4669b41bc18b32ea5cbf988b"
dependencies = [
"serde",
]
[[package]]
name = "serde"
@ -1812,6 +1913,9 @@ name = "smallvec"
version = "1.13.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67"
dependencies = [
"serde",
]
[[package]]
name = "socket2"
@ -1953,6 +2057,15 @@ dependencies = [
"windows-sys 0.52.0",
]
[[package]]
name = "termcolor"
version = "1.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755"
dependencies = [
"winapi-util",
]
[[package]]
name = "textwrap"
version = "0.11.0"
@ -1982,6 +2095,21 @@ dependencies = [
"syn 2.0.72",
]
[[package]]
name = "tiktoken-rs"
version = "0.5.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c314e7ce51440f9e8f5a497394682a57b7c323d0f4d0a6b1b13c429056e0e234"
dependencies = [
"anyhow",
"base64 0.21.7",
"bstr",
"fancy-regex",
"lazy_static",
"parking_lot",
"rustc-hash",
]
[[package]]
name = "tinyvec"
version = "1.8.0"
@ -2320,9 +2448,9 @@ checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96"
[[package]]
name = "wasm-encoder"
version = "0.202.0"
version = "0.212.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bfd106365a7f5f7aa3c1916a98cbb3ad477f5ff96ddb130285a91c6e7429e67a"
checksum = "501940df4418b8929eb6d52f1aade1fdd15a5b86c92453cb696e3c906bd3fc33"
dependencies = [
"leb128",
]
@ -2338,56 +2466,70 @@ dependencies = [
[[package]]
name = "wasmparser"
version = "0.202.0"
version = "0.212.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d6998515d3cf3f8b980ef7c11b29a9b1017d4cf86b99ae93b546992df9931413"
checksum = "8d28bc49ba1e5c5b61ffa7a2eace10820443c4b7d1c0b144109261d14570fdf8"
dependencies = [
"ahash",
"bitflags 2.6.0",
"hashbrown 0.14.5",
"indexmap",
"semver",
"serde",
]
[[package]]
name = "wasmprinter"
version = "0.202.0"
version = "0.212.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ab1cc9508685eef9502e787f4d4123745f5651a1e29aec047645d3cac1e2da7a"
checksum = "dfac65326cc561112af88c3028f6dfdb140acff67ede33a8e86be2dc6b8956f7"
dependencies = [
"anyhow",
"termcolor",
"wasmparser",
]
[[package]]
name = "wasmtime"
version = "20.0.2"
version = "23.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4af5cb32045daee8476711eb12b8b71275c2dd1fc7a58cc2a11b33ce9205f6a2"
checksum = "07232e0b473af36112da7348f51e73fa8b11047a6cb546096da3812930b7c93a"
dependencies = [
"addr2line 0.21.0",
"anyhow",
"async-trait",
"bincode",
"bitflags 2.6.0",
"bumpalo",
"cc",
"cfg-if 1.0.0",
"encoding_rs",
"fxprof-processed-profile",
"gimli 0.28.1",
"hashbrown 0.14.5",
"indexmap",
"ittapi",
"libc",
"libm",
"log",
"object 0.33.0",
"mach2",
"memfd",
"object",
"once_cell",
"paste",
"postcard",
"psm",
"rayon",
"rustix",
"semver",
"serde",
"serde_derive",
"serde_json",
"smallvec",
"sptr",
"target-lexicon",
"wasm-encoder 0.202.0",
"wasm-encoder 0.212.0",
"wasmparser",
"wasmtime-asm-macros",
"wasmtime-cache",
"wasmtime-component-macro",
"wasmtime-component-util",
@ -2396,8 +2538,8 @@ dependencies = [
"wasmtime-fiber",
"wasmtime-jit-debug",
"wasmtime-jit-icache-coherence",
"wasmtime-runtime",
"wasmtime-slab",
"wasmtime-versioned-export-macros",
"wasmtime-winch",
"wat",
"windows-sys 0.52.0",
@ -2405,24 +2547,24 @@ dependencies = [
[[package]]
name = "wasmtime-asm-macros"
version = "20.0.2"
version = "23.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7515c4d24c8b55c0feab67e3d52a42f999fda8b9cfafbd69a82ed6bcf299d26e"
checksum = "e5a9c42562d879c749288d9a26acc0d95d2ca069e30c2ec2efce84461c4d62b3"
dependencies = [
"cfg-if 1.0.0",
]
[[package]]
name = "wasmtime-cache"
version = "20.0.2"
version = "23.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c3aa2de7189ea6b3270727d0027790494aec5e7101ca50da3f9549a86628cae4"
checksum = "38d5d5aac98c8ae87cf5244495da7722e3fa022aa6f3f4fcd5e3d6e5699ce422"
dependencies = [
"anyhow",
"base64 0.21.7",
"bincode",
"directories-next",
"log",
"postcard",
"rustix",
"serde",
"serde_derive",
@ -2434,9 +2576,9 @@ dependencies = [
[[package]]
name = "wasmtime-component-macro"
version = "20.0.2"
version = "23.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "794839a710a39a12677c67ff43fec54ef00d0ca6c6f631209a7c5524522221d3"
checksum = "c0c3f57c4bc96f9b4a6ff4d6cb6e837913eff32e98d09e2b6d79b5c4647b415b"
dependencies = [
"anyhow",
"proc-macro2",
@ -2449,15 +2591,15 @@ dependencies = [
[[package]]
name = "wasmtime-component-util"
version = "20.0.2"
version = "23.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7839a1b9e15d17be1cb2a105f18be8e0bbf52bdec7a7cd6eb5d80d4c2cdf74f0"
checksum = "1da707969bc31a565da9b32d087eb2370c95c6f2087c5539a15f2e3b27e77203"
[[package]]
name = "wasmtime-cranelift"
version = "20.0.2"
version = "23.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "57ec2d9a4b9990bea53a5dfd689d48663dbd19a46903eaf73e2022b3d1ef20d3"
checksum = "62cb6135ec46994299be711b78b03acaa9480de3715f827d450f0c947a84977c"
dependencies = [
"anyhow",
"cfg-if 1.0.0",
@ -2469,7 +2611,7 @@ dependencies = [
"cranelift-wasm",
"gimli 0.28.1",
"log",
"object 0.33.0",
"object",
"target-lexicon",
"thiserror",
"wasmparser",
@ -2479,24 +2621,25 @@ dependencies = [
[[package]]
name = "wasmtime-environ"
version = "20.0.2"
version = "23.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ad72e2e3f7ea5b50fedf66dd36ba24634e4f445c370644683b433d45d88f6126"
checksum = "9bcaa3b42a0718e9123da7fb75e8e13fc95df7db2a7e32e2f2f4f0d3333b7d6f"
dependencies = [
"anyhow",
"bincode",
"cpp_demangle",
"cranelift-bitset",
"cranelift-entity",
"gimli 0.28.1",
"indexmap",
"log",
"object 0.33.0",
"object",
"postcard",
"rustc-demangle",
"semver",
"serde",
"serde_derive",
"target-lexicon",
"thiserror",
"wasm-encoder 0.202.0",
"wasm-encoder 0.212.0",
"wasmparser",
"wasmprinter",
"wasmtime-component-util",
@ -2505,9 +2648,9 @@ dependencies = [
[[package]]
name = "wasmtime-fiber"
version = "20.0.2"
version = "23.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4dbdf3053e7e7ced0cd4ed76579995b62169a1a43696890584eae2de2e33bf54"
checksum = "baf1c805515f4bc157f70f998038951009d21a19c1ef8c5fbb374a11b1d56672"
dependencies = [
"anyhow",
"cc",
@ -2520,11 +2663,11 @@ dependencies = [
[[package]]
name = "wasmtime-jit-debug"
version = "20.0.2"
version = "23.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "983ca409f2cd66385ce49486c022da0128acb7910c055beb5230998b49c6084c"
checksum = "118e141e52f3898a531a612985bd09a5e05a1d646cad2f30a3020b675c21cd49"
dependencies = [
"object 0.33.0",
"object",
"once_cell",
"rustix",
"wasmtime-versioned-export-macros",
@ -2532,69 +2675,41 @@ dependencies = [
[[package]]
name = "wasmtime-jit-icache-coherence"
version = "20.0.2"
version = "23.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ede45379f3b4d395d8947006de8043801806099a240a26db553919b68e96ab15"
dependencies = [
"cfg-if 1.0.0",
"libc",
"windows-sys 0.52.0",
]
[[package]]
name = "wasmtime-runtime"
version = "20.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "65019d29d175c567b84173f2adf3b7a3af6d5592f8fe510dccae55d2569ec0d2"
checksum = "2cfee42dac5148fc2664ab1f5cb8d7fa77a28d1a2cf1d9483abc2c3d751a58b9"
dependencies = [
"anyhow",
"cc",
"cfg-if 1.0.0",
"encoding_rs",
"indexmap",
"libc",
"log",
"mach2",
"memfd",
"memoffset",
"paste",
"psm",
"rustix",
"sptr",
"wasm-encoder 0.202.0",
"wasmtime-asm-macros",
"wasmtime-environ",
"wasmtime-fiber",
"wasmtime-jit-debug",
"wasmtime-slab",
"wasmtime-versioned-export-macros",
"windows-sys 0.52.0",
]
[[package]]
name = "wasmtime-slab"
version = "20.0.2"
version = "23.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ca6585868f5c427c3e9d2a8c0c3354e6d7d4518a0d17723ab25a0c1eebf5d5b4"
checksum = "42eb8f6515708ec67974998c3e644101db4186308985f5ef7c2ef324ff33c948"
[[package]]
name = "wasmtime-types"
version = "20.0.2"
version = "23.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "84d5381ff174faded38c7b2085fbe430dff59489c87a91403354d710075750fb"
checksum = "046873fb8fb3e9652f3fd76fe99c8c8129007695c3d73b2e307fdae40f6e324c"
dependencies = [
"anyhow",
"cranelift-entity",
"serde",
"serde_derive",
"thiserror",
"smallvec",
"wasmparser",
]
[[package]]
name = "wasmtime-versioned-export-macros"
version = "20.0.2"
version = "23.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0d3b70422fdfa915c903f003b8b42554a8ae1aa0c6208429d8314ebf5721f3ac"
checksum = "99c02af2e9dbeb427304d1a08787d70ed0dbfec1af2236616f84c9f1f03e7969"
dependencies = [
"proc-macro2",
"quote",
@ -2603,14 +2718,14 @@ dependencies = [
[[package]]
name = "wasmtime-winch"
version = "20.0.2"
version = "23.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "996360967b5196dec20ddcfce499ce4dc80cc925c088b0f2b376d29b96833a6a"
checksum = "b2ceddc47a49af10908a288fdfdc296ab3932062cab62a785e3705bbb3709c59"
dependencies = [
"anyhow",
"cranelift-codegen",
"gimli 0.28.1",
"object 0.33.0",
"object",
"target-lexicon",
"wasmparser",
"wasmtime-cranelift",
@ -2620,9 +2735,9 @@ dependencies = [
[[package]]
name = "wasmtime-wit-bindgen"
version = "20.0.2"
version = "23.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "01840c0cfbbb01664c796e3f4edbd656e58f9d76db083c7e7c6bba59ea657a96"
checksum = "75f528f8b8a2376a3dacaf497d960216dd466d324425361e1e00e26de0a7705c"
dependencies = [
"anyhow",
"heck 0.4.1",
@ -2678,6 +2793,15 @@ version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
name = "winapi-util"
version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb"
dependencies = [
"windows-sys 0.48.0",
]
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
@ -2686,9 +2810,9 @@ checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
[[package]]
name = "winch-codegen"
version = "0.18.2"
version = "0.21.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cefeb84a0f39227cf2eb665cf348e6150ebf3372d08adff03264064ab590fdf4"
checksum = "2a41b67a37ea74e83c38ef495cc213aba73385236b1deee883dc869e835003b9"
dependencies = [
"anyhow",
"cranelift-codegen",
@ -2861,9 +2985,9 @@ dependencies = [
[[package]]
name = "wit-parser"
version = "0.202.0"
version = "0.212.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "744237b488352f4f27bca05a10acb79474415951c450e52ebd0da784c1df2bcc"
checksum = "ceeb0424aa8679f3fcf2d6e3cfa381f3d6fa6179976a2c05a6249dd2bb426716"
dependencies = [
"anyhow",
"id-arena",

View file

@ -18,6 +18,7 @@ open-message-format-embeddings = { path = "../open-message-format/clients/omf-em
public-types = { path = "../public-types" }
http = "1.1.0"
governor = "0.6.3"
tiktoken-rs = "0.5.9"
[dev-dependencies]
proxy-wasm-test-framework = { git = "https://github.com/katanemo/test-framework.git", branch = "main" }

View file

@ -3,5 +3,6 @@ pub const DEFAULT_COLLECTION_NAME: &str = "prompt_vector_store";
pub const DEFAULT_NER_MODEL: &str = "urchade/gliner_large-v2.1";
pub const DEFAULT_PROMPT_TARGET_THRESHOLD: f64 = 0.6;
pub const DEFAULT_NER_THRESHOLD: f64 = 0.6;
pub const RATELIMIT_SELECTOR_HEADER_KEY: &str = "x-katanemo-ratelimit-selector";
pub const SYSTEM_ROLE: &str = "system";
pub const USER_ROLE: &str = "user";

View file

@ -1,8 +1,8 @@
use crate::consts::DEFAULT_EMBEDDING_MODEL;
use crate::ratelimit;
use crate::stats::{Gauge, RecordingMetric};
use crate::stats::{Counter, Gauge, RecordingMetric};
use crate::stream_context::StreamContext;
use log::info;
use log::{debug, info};
use md5::Digest;
use open_message_format_embeddings::models::{
CreateEmbeddingRequest, CreateEmbeddingRequestInput, CreateEmbeddingResponse,
@ -15,23 +15,26 @@ use public_types::common_types::{
use public_types::configuration::{Configuration, PromptTarget};
use serde_json::to_string;
use std::collections::HashMap;
use std::rc::Rc;
use std::time::Duration;
#[derive(Copy, Clone)]
struct WasmMetrics {
active_http_calls: Gauge,
pub struct WasmMetrics {
pub active_http_calls: Gauge,
pub ratelimited_rq: Counter,
}
impl WasmMetrics {
fn new() -> WasmMetrics {
WasmMetrics {
active_http_calls: Gauge::new(String::from("active_http_calls")),
ratelimited_rq: Counter::new(String::from("ratelimited_rq")),
}
}
}
pub struct FilterContext {
metrics: WasmMetrics,
metrics: Rc<WasmMetrics>,
// callouts stores token_id to request mapping that we use during #on_http_call_response to match the response to the request.
callouts: HashMap<u32, CallContext>,
config: Option<Configuration>,
@ -42,7 +45,7 @@ impl FilterContext {
FilterContext {
callouts: HashMap::new(),
config: None,
metrics: WasmMetrics::new(),
metrics: Rc::new(WasmMetrics::new()),
}
}
@ -259,6 +262,8 @@ impl RootContext for FilterContext {
if let Some(config_bytes) = self.get_plugin_configuration() {
self.config = serde_yaml::from_slice(&config_bytes).unwrap();
debug!("set configuration object: {:?}", self.config);
if let Some(ratelimits_config) = self
.config
.as_mut()
@ -273,7 +278,9 @@ impl RootContext for FilterContext {
fn create_http_context(&self, _context_id: u32) -> Option<Box<dyn HttpContext>> {
Some(Box::new(StreamContext {
host_header: None,
ratelimit_selector: None,
callouts: HashMap::new(),
metrics: Rc::clone(&self.metrics),
}))
}

View file

@ -7,6 +7,7 @@ mod filter_context;
mod ratelimit;
mod stats;
mod stream_context;
mod tokenizer;
proxy_wasm::main! {{
proxy_wasm::set_log_level(LogLevel::Trace);

View file

@ -1,4 +1,5 @@
use governor::{DefaultKeyedRateLimiter, InsufficientCapacity, Quota};
use log::debug;
use public_types::configuration;
use public_types::configuration::{Limit, Ratelimit, TimeUnit};
use std::num::{NonZero, NonZeroU32};
@ -28,9 +29,10 @@ pub struct RatelimitMap {
// This version of Header demands that the user passes a header value to match on.
#[allow(unused)]
#[derive(Debug)]
pub struct Header {
key: String,
value: String,
pub key: String,
pub value: String,
}
impl Header {
@ -84,6 +86,11 @@ impl RatelimitMap {
selector: Header,
tokens_used: NonZeroU32,
) -> Result<(), String> {
debug!(
"Checking limit for provider={}, with selector={:?}, consuming tokens={:?}",
provider, selector, tokens_used
);
let provider_limits = match self.datastore.get(&provider) {
None => {
// No limit configured for this provider, hence ok.

View file

@ -74,7 +74,10 @@ impl Metric for Gauge {
}
}
/// For state of the world updates
impl RecordingMetric for Gauge {}
/// For offset deltas
impl IncrementingMetric for Gauge {}
#[derive(Copy, Clone)]
pub struct Histogram {

View file

@ -1,11 +1,14 @@
use crate::consts::{
DEFAULT_COLLECTION_NAME, DEFAULT_EMBEDDING_MODEL, DEFAULT_NER_MODEL, DEFAULT_NER_THRESHOLD,
DEFAULT_PROMPT_TARGET_THRESHOLD, SYSTEM_ROLE, USER_ROLE,
DEFAULT_PROMPT_TARGET_THRESHOLD, RATELIMIT_SELECTOR_HEADER_KEY, SYSTEM_ROLE, USER_ROLE,
};
use crate::filter_context::WasmMetrics;
use crate::ratelimit;
use crate::ratelimit::Header;
use crate::stats::IncrementingMetric;
use crate::tokenizer;
use http::StatusCode;
use log::error;
use log::info;
use log::warn;
use log::{debug, error, info, warn};
use open_message_format_embeddings::models::{
CreateEmbeddingRequest, CreateEmbeddingRequestInput, CreateEmbeddingResponse,
};
@ -17,6 +20,8 @@ use public_types::common_types::{
};
use public_types::configuration::{Entity, PromptTarget};
use std::collections::HashMap;
use std::num::NonZero;
use std::rc::Rc;
use std::time::Duration;
enum RequestType {
@ -35,7 +40,9 @@ pub struct CallContext {
pub struct StreamContext {
pub host_header: Option<String>,
pub ratelimit_selector: Option<Header>,
pub callouts: HashMap<u32, CallContext>,
pub metrics: Rc<WasmMetrics>,
}
impl StreamContext {
@ -65,6 +72,15 @@ impl StreamContext {
}
}
fn save_ratelimit_header(&mut self) {
self.ratelimit_selector = self
.get_http_request_header(RATELIMIT_SELECTOR_HEADER_KEY)
.and_then(|key| {
self.get_http_request_header(&key)
.map(|value| Header { key, value })
});
}
fn embeddings_handler(&mut self, body: Vec<u8>, mut callout_context: CallContext) {
let embedding_response: CreateEmbeddingResponse = match serde_json::from_slice(&body) {
Ok(embedding_response) => embedding_response,
@ -115,6 +131,7 @@ impl StreamContext {
if self.callouts.insert(token_id, callout_context).is_some() {
panic!("duplicate token_id")
}
self.metrics.active_http_calls.increment(1);
}
fn search_points_handler(&mut self, body: Vec<u8>, mut callout_context: CallContext) {
@ -202,6 +219,7 @@ impl StreamContext {
if self.callouts.insert(token_id, callout_context).is_some() {
panic!("duplicate token_id")
}
self.metrics.active_http_calls.increment(1);
}
fn ner_handler(&mut self, body: Vec<u8>, mut callout_context: CallContext) {
@ -290,10 +308,11 @@ impl StreamContext {
if self.callouts.insert(token_id, callout_context).is_some() {
panic!("duplicate token_id")
}
self.metrics.active_http_calls.increment(1);
}
fn context_resolver_handler(&mut self, body: Vec<u8>, callout_context: CallContext) {
info!("response received for context_resolver");
debug!("response received for context_resolver");
let body_string = String::from_utf8(body);
let prompt_target = callout_context.prompt_target.unwrap();
let mut request_body = callout_context.request_body;
@ -331,7 +350,30 @@ impl StreamContext {
return;
}
};
info!("sending request to openai: msg {}", json_string);
// Tokenize and Ratelimit.
if let Some(selector) = self.ratelimit_selector.take() {
if let Ok(token_count) = tokenizer::token_count(&request_body.model, &json_string) {
match ratelimit::ratelimits(None).read().unwrap().check_limit(
request_body.model,
selector,
NonZero::new(token_count as u32).unwrap(),
) {
Ok(_) => (),
Err(err) => {
self.send_http_response(
StatusCode::TOO_MANY_REQUESTS.as_u16().into(),
vec![],
Some(format!("Exceeded Ratelimit: {}", err).as_bytes()),
);
self.metrics.ratelimited_rq.increment(1);
return;
}
}
}
}
debug!("sending request to openai: msg {}", json_string);
self.set_http_request_body(0, json_string.len(), &json_string.into_bytes());
self.resume_http_request();
}
@ -345,6 +387,7 @@ impl HttpContext for StreamContext {
self.save_host_header();
self.delete_content_length_header();
self.modify_path_header();
self.save_ratelimit_header();
Action::Continue
}
@ -450,6 +493,7 @@ impl HttpContext for StreamContext {
token_id
)
}
self.metrics.active_http_calls.increment(1);
Action::Pause
}
@ -464,6 +508,7 @@ impl Context for StreamContext {
_num_trailers: usize,
) {
let callout_context = self.callouts.remove(&token_id).expect("invalid token_id");
self.metrics.active_http_calls.increment(-1);
let resp = self.get_http_call_response_body(0, body_size);

View file

@ -0,0 +1,39 @@
use log::debug;
#[derive(Debug, PartialEq, Eq)]
#[allow(dead_code)]
pub enum Error {
UnknownModel,
FailedToTokenize,
}
#[allow(dead_code)]
pub fn token_count(model_name: &str, text: &str) -> Result<usize, Error> {
debug!("getting token count model={}", model_name);
// Consideration: is it more expensive to instantiate the BPE object every time, or to contend the singleton?
let bpe = tiktoken_rs::get_bpe_from_model(model_name).map_err(|_| Error::UnknownModel)?;
Ok(bpe.encode_ordinary(text).len())
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn encode_ordinary() {
let model_name = "gpt-3.5-turbo";
let text = "How many tokens does this sentence have?";
assert_eq!(
8,
token_count(model_name, text).expect("correct tokenization")
);
}
#[test]
fn unrecognized_model() {
assert_eq!(
Error::UnknownModel,
token_count("unknown", "").expect_err("unknown model")
)
}
}

View file

@ -1,8 +1,20 @@
use http::StatusCode;
use proxy_wasm_test_framework::tester;
use proxy_wasm_test_framework::types::{Action, BufferType, MapType, MetricType, ReturnType};
use public_types::common_types::Entity;
use open_message_format_embeddings::models::{
create_embedding_response::{self, CreateEmbeddingResponse},
create_embedding_response_usage::CreateEmbeddingResponseUsage,
embedding, Embedding,
};
use proxy_wasm_test_framework::tester::{self, Tester};
use proxy_wasm_test_framework::types::{
Action, BufferType, LogLevel, MapType, MetricType, ReturnType,
};
use public_types::configuration::{self, Endpoint, PromptTarget};
use public_types::{
common_types::{self, NERResponse, SearchPointResult, SearchPointsResponse},
configuration::Configuration,
};
use serial_test::serial;
use std::collections::HashMap;
use std::path::Path;
fn wasm_module() -> String {
@ -14,6 +26,223 @@ fn wasm_module() -> String {
wasm_file.to_str().unwrap().to_string()
}
fn normal_flow(module: &mut Tester, filter_context: i32, http_context: i32) {
module
.call_proxy_on_context_create(http_context, filter_context)
.execute_and_expect(ReturnType::None)
.unwrap();
// Request Headers
module
.call_proxy_on_request_headers(http_context, 0, false)
.expect_get_header_map_value(Some(MapType::HttpRequestHeaders), Some(":host"))
.returning(Some("api.openai.com"))
.expect_add_header_map_value(
Some(MapType::HttpRequestHeaders),
Some("content-length"),
Some(""),
)
.expect_get_header_map_value(Some(MapType::HttpRequestHeaders), Some(":path"))
.returning(Some("/llmrouting"))
.expect_add_header_map_value(
Some(MapType::HttpRequestHeaders),
Some(":path"),
Some("/v1/chat/completions"),
)
.expect_get_header_map_value(
Some(MapType::HttpRequestHeaders),
Some("x-katanemo-ratelimit-selector"),
)
.returning(Some("selector-key"))
.expect_get_header_map_value(Some(MapType::HttpRequestHeaders), Some("selector-key"))
.returning(Some("selector-value"))
.execute_and_expect(ReturnType::Action(Action::Continue))
.unwrap();
// Request Body
let chat_completions_request_body = "\
{\
\"messages\": [\
{\
\"role\": \"system\",\
\"content\": \"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"\
},\
{\
\"role\": \"user\",\
\"content\": \"Compose a poem that explains the concept of recursion in programming.\"\
}\
],\
\"model\": \"gpt-4\"\
}";
module
.call_proxy_on_request_body(
http_context,
chat_completions_request_body.len() as i32,
true,
)
.expect_get_buffer_bytes(Some(BufferType::HttpRequestBody))
.returning(Some(chat_completions_request_body))
// The actual call is not important in this test, we just need to grab the token_id
.expect_http_call(Some("embeddingserver"), None, None, None, None)
.returning(Some(1))
.expect_metric_increment("active_http_calls", 1)
.execute_and_expect(ReturnType::Action(Action::Pause))
.unwrap();
let embedding_response = CreateEmbeddingResponse {
data: vec![Embedding {
index: 0,
embedding: vec![],
object: embedding::Object::default(),
}],
model: String::from("test"),
object: create_embedding_response::Object::default(),
usage: Box::new(CreateEmbeddingResponseUsage::new(0, 0)),
};
let embeddings_response_buffer = serde_json::to_string(&embedding_response).unwrap();
module
.call_proxy_on_http_call_response(
http_context,
1,
0,
embeddings_response_buffer.len() as i32,
0,
)
.expect_metric_increment("active_http_calls", -1)
.expect_get_buffer_bytes(Some(BufferType::HttpCallResponseBody))
.returning(Some(&embeddings_response_buffer))
.expect_http_call(Some("qdrant"), None, None, None, None)
.returning(Some(2))
.expect_metric_increment("active_http_calls", 1)
.execute_and_expect(ReturnType::None)
.unwrap();
let prompt_target = PromptTarget {
name: String::from("test-prompt-target"),
prompt_type: String::from("test-prompt-type"),
few_shot_examples: vec![],
entities: Some(vec![configuration::Entity {
name: String::from("test-entity"),
required: Some(true),
description: None,
}]),
endpoint: Some(Endpoint {
cluster: String::from("test-endpoint-cluster"),
path: None,
method: None,
}),
system_prompt: None,
};
let prompt_target_str = serde_json::to_string(&prompt_target).unwrap();
let search_points_response = SearchPointsResponse {
status: String::new(),
time: 0.0,
result: vec![SearchPointResult {
id: String::new(),
version: 0,
score: 0.7,
payload: HashMap::from([(String::from("prompt-target"), prompt_target_str)]),
}],
};
let search_points_response_buffer = serde_json::to_string(&search_points_response).unwrap();
module
.call_proxy_on_http_call_response(
http_context,
2,
0,
search_points_response_buffer.len() as i32,
0,
)
.expect_metric_increment("active_http_calls", -1)
.expect_get_buffer_bytes(Some(BufferType::HttpCallResponseBody))
.returning(Some(&search_points_response_buffer))
.expect_log(Some(LogLevel::Info), None)
.expect_log(Some(LogLevel::Info), None)
.expect_http_call(Some("nerhost"), None, None, None, None)
.returning(Some(3))
.expect_metric_increment("active_http_calls", 1)
.execute_and_expect(ReturnType::None)
.unwrap();
let ner_reponse = NERResponse {
model: String::from("test-model"),
data: vec![common_types::Entity {
score: 0.7,
text: String::from("test-text"),
label: String::from("test-entity"),
}],
};
let ner_response_buffer = serde_json::to_string(&ner_reponse).unwrap();
let upstream_name = prompt_target.endpoint.unwrap().cluster.leak();
module
.call_proxy_on_http_call_response(http_context, 3, 0, ner_response_buffer.len() as i32, 0)
.expect_metric_increment("active_http_calls", -1)
.expect_get_buffer_bytes(Some(BufferType::HttpCallResponseBody))
.returning(Some(&ner_response_buffer))
.expect_log(Some(LogLevel::Info), None)
.expect_http_call(Some(upstream_name), None, None, None, None)
.returning(Some(4))
.expect_metric_increment("active_http_calls", 1)
.execute_and_expect(ReturnType::None)
.unwrap()
}
fn default_config() -> Configuration {
let config: &str = r#"
default_prompt_endpoint: "127.0.0.1"
load_balancing: "round_robin"
timeout_ms: 5000
embedding_provider:
name: "SentenceTransformer"
model: "all-MiniLM-L6-v2"
llm_providers:
- name: "open-ai-gpt-4"
api_key: "$OPEN_AI_API_KEY"
model: gpt-4
system_prompt: |
You are a helpful weather forecaster. Please following following guidelines when responding to user queries:
- Use farenheight for temperature
- Use miles per hour for wind speed
prompt_targets:
- type: context_resolver
name: weather_forecast
few_shot_examples:
- what is the weather in New York?
endpoint:
cluster: weatherhost
path: /weather
entities:
- name: location
required: true
description: "The location for which the weather is requested"
- type: context_resolver
name: weather_forecast_2
few_shot_examples:
- what is the weather in New York?
endpoint:
cluster: weatherhost
path: /weather
entities:
- name: city
ratelimits:
- provider: gpt-4
selector:
key: selector-key
value: selector-value
limit:
tokens: 1
unit: minute
"#;
serde_yaml::from_str(config).unwrap()
}
#[test]
#[serial]
fn successful_request_to_open_ai_chat_completions() {
@ -35,6 +264,7 @@ fn successful_request_to_open_ai_chat_completions() {
module
.call_proxy_on_context_create(root_context, 0)
.expect_metric_creation(MetricType::Gauge, "active_http_calls")
.expect_metric_creation(MetricType::Counter, "ratelimited_rq")
.execute_and_expect(ReturnType::None)
.unwrap();
@ -63,6 +293,11 @@ fn successful_request_to_open_ai_chat_completions() {
Some(":path"),
Some("/v1/chat/completions"),
)
.expect_get_header_map_value(
Some(MapType::HttpRequestHeaders),
Some("x-katanemo-ratelimit-selector"),
)
.returning(None)
.execute_and_expect(ReturnType::Action(Action::Continue))
.unwrap();
@ -78,7 +313,8 @@ fn successful_request_to_open_ai_chat_completions() {
\"role\": \"user\",\
\"content\": \"Compose a poem that explains the concept of recursion in programming.\"\
}\
]\
],\
\"model\": \"gpt-4\"\
}";
module
@ -91,6 +327,7 @@ fn successful_request_to_open_ai_chat_completions() {
.returning(Some(chat_completions_request_body))
// TODO: assert that the model field was added.
.expect_set_buffer_bytes(Some(BufferType::HttpRequestBody), None)
.expect_metric_increment("active_http_calls", 1)
.execute_and_expect(ReturnType::Action(Action::Pause))
.unwrap();
}
@ -116,6 +353,7 @@ fn bad_request_to_open_ai_chat_completions() {
module
.call_proxy_on_context_create(root_context, 0)
.expect_metric_creation(MetricType::Gauge, "active_http_calls")
.expect_metric_creation(MetricType::Counter, "ratelimited_rq")
.execute_and_expect(ReturnType::None)
.unwrap();
@ -144,6 +382,11 @@ fn bad_request_to_open_ai_chat_completions() {
Some(":path"),
Some("/v1/chat/completions"),
)
.expect_get_header_map_value(
Some(MapType::HttpRequestHeaders),
Some("x-katanemo-ratelimit-selector"),
)
.returning(None)
.execute_and_expect(ReturnType::Action(Action::Continue))
.unwrap();
@ -181,15 +424,7 @@ fn bad_request_to_open_ai_chat_completions() {
#[test]
#[serial]
fn delete_me_in_next_pr_successful_request_to_open_ai_chat_completions() {
let ner_response = Entity {
score: 0.7,
text: String::from("hello"),
label: String::from("hello"),
};
let ner_response_buffer = serde_json::to_string(&ner_response).unwrap();
println!("{} is my length", ner_response_buffer.len());
fn request_ratelimited() {
let args = tester::MockSettings {
wasm_path: wasm_module(),
quiet: false,
@ -203,67 +438,102 @@ fn delete_me_in_next_pr_successful_request_to_open_ai_chat_completions() {
.unwrap();
// Setup Filter
let root_context = 1;
let filter_context = 1;
let config = serde_json::to_string(&default_config()).unwrap();
module
.call_proxy_on_context_create(root_context, 0)
.call_proxy_on_context_create(filter_context, 0)
.expect_metric_creation(MetricType::Gauge, "active_http_calls")
.expect_metric_creation(MetricType::Counter, "ratelimited_rq")
.execute_and_expect(ReturnType::None)
.unwrap();
module
.call_proxy_on_configure(filter_context, config.len() as i32)
.expect_log(Some(LogLevel::Debug), None)
.expect_get_buffer_bytes(Some(BufferType::PluginConfiguration))
.returning(Some(&config))
.execute_and_expect(ReturnType::Bool(true))
.unwrap();
// Setup HTTP Stream
let http_context = 2;
normal_flow(&mut module, filter_context, http_context);
let test_body = "test body";
module
.call_proxy_on_context_create(http_context, root_context)
.call_proxy_on_http_call_response(http_context, 4, 0, test_body.len() as i32, 0)
.expect_metric_increment("active_http_calls", -1)
.expect_get_buffer_bytes(Some(BufferType::HttpCallResponseBody))
.returning(Some(test_body))
.expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Info), None)
.expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Debug), None)
.expect_send_local_response(
Some(StatusCode::TOO_MANY_REQUESTS.as_u16().into()),
None,
None,
None,
)
.expect_metric_increment("ratelimited_rq", 1)
.execute_and_expect(ReturnType::None)
.unwrap();
}
#[test]
#[serial]
fn request_not_ratelimited() {
let args = tester::MockSettings {
wasm_path: wasm_module(),
quiet: false,
allow_unexpected: false,
};
let mut module = tester::mock(args).unwrap();
module
.call_start()
.execute_and_expect(ReturnType::None)
.unwrap();
// Request Headers
// Setup Filter
let filter_context = 1;
let mut config = default_config();
config.ratelimits.as_mut().unwrap()[0].limit.tokens += 1000;
let config_str = serde_json::to_string(&config).unwrap();
module
.call_proxy_on_request_headers(http_context, 0, false)
.expect_get_header_map_value(Some(MapType::HttpRequestHeaders), Some(":host"))
.returning(Some("api.openai.com"))
.expect_add_header_map_value(
Some(MapType::HttpRequestHeaders),
Some("content-length"),
Some(""),
)
.expect_get_header_map_value(Some(MapType::HttpRequestHeaders), Some(":path"))
.returning(Some("/llmrouting"))
.expect_add_header_map_value(
Some(MapType::HttpRequestHeaders),
Some(":path"),
Some("/v1/chat/completions"),
)
.execute_and_expect(ReturnType::Action(Action::Continue))
.call_proxy_on_context_create(filter_context, 0)
.expect_metric_creation(MetricType::Gauge, "active_http_calls")
.expect_metric_creation(MetricType::Counter, "ratelimited_rq")
.execute_and_expect(ReturnType::None)
.unwrap();
module
.call_proxy_on_configure(filter_context, config_str.len() as i32)
.expect_log(Some(LogLevel::Debug), None)
.expect_get_buffer_bytes(Some(BufferType::PluginConfiguration))
.returning(Some(&config_str))
.execute_and_expect(ReturnType::Bool(true))
.unwrap();
// Request Body
let chat_completions_request_body = "\
{\
\"messages\": [\
{\
\"role\": \"system\",\
\"content\": \"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"\
},\
{\
\"role\": \"user\",\
\"content\": \"Compose a poem that explains the concept of recursion in programming.\"\
}\
]\
}";
// Setup HTTP Stream
let http_context = 2;
normal_flow(&mut module, filter_context, http_context);
let test_body = "test body";
module
.call_proxy_on_request_body(
http_context,
chat_completions_request_body.len() as i32,
true,
)
.expect_get_buffer_bytes(Some(BufferType::HttpRequestBody))
.returning(Some(chat_completions_request_body))
// TODO: assert that the model field was added.
.call_proxy_on_http_call_response(http_context, 4, 0, test_body.len() as i32, 0)
.expect_metric_increment("active_http_calls", -1)
.expect_get_buffer_bytes(Some(BufferType::HttpCallResponseBody))
.returning(Some(test_body))
.expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Info), None)
.expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Debug), None)
.expect_set_buffer_bytes(Some(BufferType::HttpRequestBody), None)
.execute_and_expect(ReturnType::Action(Action::Pause))
.execute_and_expect(ReturnType::None)
.unwrap();
}