diff --git a/envoyfilter/Cargo.lock b/envoyfilter/Cargo.lock index d99905e9..4f68ce6a 100644 --- a/envoyfilter/Cargo.lock +++ b/envoyfilter/Cargo.lock @@ -38,6 +38,15 @@ dependencies = [ "zerocopy", ] +[[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] + [[package]] name = "allocator-api2" version = "0.2.18" @@ -110,7 +119,7 @@ dependencies = [ "cfg-if 1.0.0", "libc", "miniz_oxide", - "object 0.36.2", + "object", "rustc-demangle", ] @@ -127,14 +136,20 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" [[package]] -name = "bincode" -version = "1.3.3" +name = "bit-set" +version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" +checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1" dependencies = [ - "serde", + "bit-vec", ] +[[package]] +name = "bit-vec" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb" + [[package]] name = "bitflags" version = "1.3.2" @@ -156,6 +171,17 @@ dependencies = [ "generic-array", ] +[[package]] +name = "bstr" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40723b8fb387abc38f4f4a37c09073622e41dd12327033091ef8950659e6dc0c" +dependencies = [ + "memchr", + "regex-automata", + "serde", +] + [[package]] name = "bumpalo" version = "3.16.0" @@ -211,6 +237,12 @@ dependencies = [ "vec_map", ] +[[package]] +name = "cobs" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67ba02a97a2bd10f4b59b25c7973101c79642302776489e030cd13cdab09ed15" + [[package]] name = "core-foundation" version = "0.9.4" @@ -247,21 +279,32 @@ dependencies = [ [[package]] name = "cranelift-bforest" -version = "0.107.2" +version = "0.110.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ebf72ceaf38f7d41194d0cf6748214d8ef7389167fe09aad80f87646dbfa325b" +checksum = "305d51c180ebdc46ef61bc60c54ae6512db3bc9a05842a1f1e762e45977019ab" dependencies = [ "cranelift-entity", ] [[package]] -name = "cranelift-codegen" -version = "0.107.2" +name = "cranelift-bitset" +version = "0.110.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ee7fde5cd9173f00ce02c491ee9e306d64740f4b1a697946e0474f389999e13" +checksum = "e3247afacd9b13d620033f3190d9e49d1beefc1acb33d5604a249956c9c13709" +dependencies = [ + "serde", + "serde_derive", +] + +[[package]] +name = "cranelift-codegen" +version = "0.110.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd7ca95e831c18d1356da783765c344207cbdffea91e13e47fa9327dbb2e0719" dependencies = [ "bumpalo", "cranelift-bforest", + "cranelift-bitset", "cranelift-codegen-meta", "cranelift-codegen-shared", "cranelift-control", @@ -271,49 +314,51 @@ dependencies = [ "hashbrown 0.14.5", "log", "regalloc2", + "rustc-hash", "smallvec", "target-lexicon", ] [[package]] name = "cranelift-codegen-meta" -version = "0.107.2" +version = "0.110.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b49bec6a517e78d4067500dc16acb558e772491a2bcb37301127448adfb8413c" +checksum = "450c105fa1e51bfba4e95a86e926504a867ad5639d63f31d43fe3b7ec1f1c9ef" dependencies = [ "cranelift-codegen-shared", ] [[package]] name = "cranelift-codegen-shared" -version = "0.107.2" +version = "0.110.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ead4ea497b2dc2ac31fcabd6d5d0d5dc25b3964814122e343724bdf65a53c843" +checksum = "5479117cd1266881479908d383086561cee37e49affbea9b1e6b594cc21cc220" [[package]] name = "cranelift-control" -version = "0.107.2" +version = "0.110.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f81e8028c8d711ea7592648e70221f2e54acb8665f7ecd49545f021ec14c3341" +checksum = "34378804f0abfdd22c068a741cfeed86938b92375b2a96fb0b42c878e0141bfb" dependencies = [ "arbitrary", ] [[package]] name = "cranelift-entity" -version = "0.107.2" +version = "0.110.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32acd0632ba65c2566e75f64af9ef094bb8d90e58a9fbd33d920977a9d85c054" +checksum = "a48cb0a194c9ba82fec35a1e492055388d89b2e3c03dee9dcf2488892be8004d" dependencies = [ + "cranelift-bitset", "serde", "serde_derive", ] [[package]] name = "cranelift-frontend" -version = "0.107.2" +version = "0.110.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a395a704934aa944ba8939cac9001174b9ae5236f48bc091f89e33bb968336f6" +checksum = "8327afc6c1c05f4be62fefce5b439fa83521c65363a322e86ea32c85e7ceaf64" dependencies = [ "cranelift-codegen", "log", @@ -323,15 +368,15 @@ dependencies = [ [[package]] name = "cranelift-isle" -version = "0.107.2" +version = "0.110.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b325ce81c4ee7082dc894537eb342c37898e14230fe7c02ea945691db3e2dd01" +checksum = "56b08621c00321efcfa3eee6a3179adc009e21ea8d24ca7adc3c326184bc3f48" [[package]] name = "cranelift-native" -version = "0.107.2" +version = "0.110.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea11f5ac85996fa093075d66397922d4f56085d5d84ec13043d0cd4f159c6818" +checksum = "d51180b147c8557c1196c77b098f04140c91962e135ea152cd2fcabf40cf365c" dependencies = [ "cranelift-codegen", "libc", @@ -340,9 +385,9 @@ dependencies = [ [[package]] name = "cranelift-wasm" -version = "0.107.2" +version = "0.110.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e4f175d4e299a8edabfbd64fa93c7650836cc8ad7f4879f9bd2632575a1f12d0" +checksum = "019e3dccb7f15e0bc14f0ddc034ec608a66df8e05c9e1e16f75a7716f8461799" dependencies = [ "cranelift-codegen", "cranelift-entity", @@ -457,6 +502,18 @@ version = "1.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" +[[package]] +name = "embedded-io" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef1a6892d9eef45c8fa6b9e0086428a2cca8491aca8f787c534a3d6d0bcb3ced" + +[[package]] +name = "embedded-io" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edd0f118536f44f5ccd48bcb8b111bdc3de888b58c74639dfb034a357d0f206d" + [[package]] name = "encoding_rs" version = "0.8.34" @@ -488,6 +545,16 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649" +[[package]] +name = "fancy-regex" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7493d4c459da9f84325ad297371a6b2b8a162800873a22e3b6b6512e61d18c05" +dependencies = [ + "bit-set", + "regex", +] + [[package]] name = "fastrand" version = "2.1.0" @@ -735,6 +802,7 @@ checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" dependencies = [ "ahash", "allocator-api2", + "serde", ] [[package]] @@ -923,6 +991,7 @@ dependencies = [ "serde_json", "serde_yaml", "serial_test", + "tiktoken-rs", ] [[package]] @@ -1002,6 +1071,12 @@ version = "0.2.155" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" +[[package]] +name = "libm" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058" + [[package]] name = "libredox" version = "0.1.3" @@ -1064,15 +1139,6 @@ dependencies = [ "rustix", ] -[[package]] -name = "memoffset" -version = "0.9.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a" -dependencies = [ - "autocfg", -] - [[package]] name = "mime" version = "0.3.17" @@ -1145,24 +1211,15 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "38bf9645c8b145698bb0b18a4637dcacbc421ea49bef2317e4fd8065a387cf21" -[[package]] -name = "object" -version = "0.33.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8dd6c0cdf9429bce006e1362bfce61fa1bfd8c898a643ed8d2b471934701d3d" -dependencies = [ - "crc32fast", - "hashbrown 0.14.5", - "indexmap", - "memchr", -] - [[package]] name = "object" version = "0.36.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f203fa8daa7bb185f760ae12bd8e097f63d17041dcdcaf675ac54cdf863170e" dependencies = [ + "crc32fast", + "hashbrown 0.14.5", + "indexmap", "memchr", ] @@ -1306,6 +1363,18 @@ version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "da544ee218f0d287a911e9c99a39a8c9bc8fcad3cb8db5959940044ecfc67265" +[[package]] +name = "postcard" +version = "1.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f7f0a8d620d71c457dd1d47df76bb18960378da56af4527aaa10f515eee732e" +dependencies = [ + "cobs", + "embedded-io 0.4.0", + "embedded-io 0.6.1", + "serde", +] + [[package]] name = "ppv-lite86" version = "0.2.17" @@ -1358,7 +1427,7 @@ dependencies = [ [[package]] name = "proxy-wasm-test-framework" version = "0.1.0" -source = "git+https://github.com/katanemo/test-framework.git?branch=main#3593b817e6dff69ca6bfe32aea8a92f783a6ecd8" +source = "git+https://github.com/katanemo/test-framework.git?branch=main#c2511cd9030705e14d5f60aca77d6c96c81c6dfa" dependencies = [ "anyhow", "cfg-if 0.1.10", @@ -1502,6 +1571,35 @@ dependencies = [ "smallvec", ] +[[package]] +name = "regex" +version = "1.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4219d74c6b67a3654a9fbebc4b419e22126d13d2f3c4a07ee0cb61ff79a79619" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b" + [[package]] name = "reqwest" version = "0.12.5" @@ -1690,6 +1788,9 @@ name = "semver" version = "1.0.23" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "61697e0a1c7e512e84a621326239844a24d8207b4669b41bc18b32ea5cbf988b" +dependencies = [ + "serde", +] [[package]] name = "serde" @@ -1812,6 +1913,9 @@ name = "smallvec" version = "1.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" +dependencies = [ + "serde", +] [[package]] name = "socket2" @@ -1953,6 +2057,15 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "termcolor" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755" +dependencies = [ + "winapi-util", +] + [[package]] name = "textwrap" version = "0.11.0" @@ -1982,6 +2095,21 @@ dependencies = [ "syn 2.0.72", ] +[[package]] +name = "tiktoken-rs" +version = "0.5.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c314e7ce51440f9e8f5a497394682a57b7c323d0f4d0a6b1b13c429056e0e234" +dependencies = [ + "anyhow", + "base64 0.21.7", + "bstr", + "fancy-regex", + "lazy_static", + "parking_lot", + "rustc-hash", +] + [[package]] name = "tinyvec" version = "1.8.0" @@ -2320,9 +2448,9 @@ checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96" [[package]] name = "wasm-encoder" -version = "0.202.0" +version = "0.212.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfd106365a7f5f7aa3c1916a98cbb3ad477f5ff96ddb130285a91c6e7429e67a" +checksum = "501940df4418b8929eb6d52f1aade1fdd15a5b86c92453cb696e3c906bd3fc33" dependencies = [ "leb128", ] @@ -2338,56 +2466,70 @@ dependencies = [ [[package]] name = "wasmparser" -version = "0.202.0" +version = "0.212.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d6998515d3cf3f8b980ef7c11b29a9b1017d4cf86b99ae93b546992df9931413" +checksum = "8d28bc49ba1e5c5b61ffa7a2eace10820443c4b7d1c0b144109261d14570fdf8" dependencies = [ + "ahash", "bitflags 2.6.0", + "hashbrown 0.14.5", "indexmap", "semver", + "serde", ] [[package]] name = "wasmprinter" -version = "0.202.0" +version = "0.212.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab1cc9508685eef9502e787f4d4123745f5651a1e29aec047645d3cac1e2da7a" +checksum = "dfac65326cc561112af88c3028f6dfdb140acff67ede33a8e86be2dc6b8956f7" dependencies = [ "anyhow", + "termcolor", "wasmparser", ] [[package]] name = "wasmtime" -version = "20.0.2" +version = "23.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4af5cb32045daee8476711eb12b8b71275c2dd1fc7a58cc2a11b33ce9205f6a2" +checksum = "07232e0b473af36112da7348f51e73fa8b11047a6cb546096da3812930b7c93a" dependencies = [ "addr2line 0.21.0", "anyhow", "async-trait", - "bincode", + "bitflags 2.6.0", "bumpalo", + "cc", "cfg-if 1.0.0", "encoding_rs", "fxprof-processed-profile", "gimli 0.28.1", + "hashbrown 0.14.5", "indexmap", "ittapi", "libc", + "libm", "log", - "object 0.33.0", + "mach2", + "memfd", + "object", "once_cell", "paste", + "postcard", + "psm", "rayon", "rustix", "semver", "serde", "serde_derive", "serde_json", + "smallvec", + "sptr", "target-lexicon", - "wasm-encoder 0.202.0", + "wasm-encoder 0.212.0", "wasmparser", + "wasmtime-asm-macros", "wasmtime-cache", "wasmtime-component-macro", "wasmtime-component-util", @@ -2396,8 +2538,8 @@ dependencies = [ "wasmtime-fiber", "wasmtime-jit-debug", "wasmtime-jit-icache-coherence", - "wasmtime-runtime", "wasmtime-slab", + "wasmtime-versioned-export-macros", "wasmtime-winch", "wat", "windows-sys 0.52.0", @@ -2405,24 +2547,24 @@ dependencies = [ [[package]] name = "wasmtime-asm-macros" -version = "20.0.2" +version = "23.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7515c4d24c8b55c0feab67e3d52a42f999fda8b9cfafbd69a82ed6bcf299d26e" +checksum = "e5a9c42562d879c749288d9a26acc0d95d2ca069e30c2ec2efce84461c4d62b3" dependencies = [ "cfg-if 1.0.0", ] [[package]] name = "wasmtime-cache" -version = "20.0.2" +version = "23.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3aa2de7189ea6b3270727d0027790494aec5e7101ca50da3f9549a86628cae4" +checksum = "38d5d5aac98c8ae87cf5244495da7722e3fa022aa6f3f4fcd5e3d6e5699ce422" dependencies = [ "anyhow", "base64 0.21.7", - "bincode", "directories-next", "log", + "postcard", "rustix", "serde", "serde_derive", @@ -2434,9 +2576,9 @@ dependencies = [ [[package]] name = "wasmtime-component-macro" -version = "20.0.2" +version = "23.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "794839a710a39a12677c67ff43fec54ef00d0ca6c6f631209a7c5524522221d3" +checksum = "c0c3f57c4bc96f9b4a6ff4d6cb6e837913eff32e98d09e2b6d79b5c4647b415b" dependencies = [ "anyhow", "proc-macro2", @@ -2449,15 +2591,15 @@ dependencies = [ [[package]] name = "wasmtime-component-util" -version = "20.0.2" +version = "23.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7839a1b9e15d17be1cb2a105f18be8e0bbf52bdec7a7cd6eb5d80d4c2cdf74f0" +checksum = "1da707969bc31a565da9b32d087eb2370c95c6f2087c5539a15f2e3b27e77203" [[package]] name = "wasmtime-cranelift" -version = "20.0.2" +version = "23.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57ec2d9a4b9990bea53a5dfd689d48663dbd19a46903eaf73e2022b3d1ef20d3" +checksum = "62cb6135ec46994299be711b78b03acaa9480de3715f827d450f0c947a84977c" dependencies = [ "anyhow", "cfg-if 1.0.0", @@ -2469,7 +2611,7 @@ dependencies = [ "cranelift-wasm", "gimli 0.28.1", "log", - "object 0.33.0", + "object", "target-lexicon", "thiserror", "wasmparser", @@ -2479,24 +2621,25 @@ dependencies = [ [[package]] name = "wasmtime-environ" -version = "20.0.2" +version = "23.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad72e2e3f7ea5b50fedf66dd36ba24634e4f445c370644683b433d45d88f6126" +checksum = "9bcaa3b42a0718e9123da7fb75e8e13fc95df7db2a7e32e2f2f4f0d3333b7d6f" dependencies = [ "anyhow", - "bincode", "cpp_demangle", + "cranelift-bitset", "cranelift-entity", "gimli 0.28.1", "indexmap", "log", - "object 0.33.0", + "object", + "postcard", "rustc-demangle", + "semver", "serde", "serde_derive", "target-lexicon", - "thiserror", - "wasm-encoder 0.202.0", + "wasm-encoder 0.212.0", "wasmparser", "wasmprinter", "wasmtime-component-util", @@ -2505,9 +2648,9 @@ dependencies = [ [[package]] name = "wasmtime-fiber" -version = "20.0.2" +version = "23.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4dbdf3053e7e7ced0cd4ed76579995b62169a1a43696890584eae2de2e33bf54" +checksum = "baf1c805515f4bc157f70f998038951009d21a19c1ef8c5fbb374a11b1d56672" dependencies = [ "anyhow", "cc", @@ -2520,11 +2663,11 @@ dependencies = [ [[package]] name = "wasmtime-jit-debug" -version = "20.0.2" +version = "23.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "983ca409f2cd66385ce49486c022da0128acb7910c055beb5230998b49c6084c" +checksum = "118e141e52f3898a531a612985bd09a5e05a1d646cad2f30a3020b675c21cd49" dependencies = [ - "object 0.33.0", + "object", "once_cell", "rustix", "wasmtime-versioned-export-macros", @@ -2532,69 +2675,41 @@ dependencies = [ [[package]] name = "wasmtime-jit-icache-coherence" -version = "20.0.2" +version = "23.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ede45379f3b4d395d8947006de8043801806099a240a26db553919b68e96ab15" -dependencies = [ - "cfg-if 1.0.0", - "libc", - "windows-sys 0.52.0", -] - -[[package]] -name = "wasmtime-runtime" -version = "20.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "65019d29d175c567b84173f2adf3b7a3af6d5592f8fe510dccae55d2569ec0d2" +checksum = "2cfee42dac5148fc2664ab1f5cb8d7fa77a28d1a2cf1d9483abc2c3d751a58b9" dependencies = [ "anyhow", - "cc", "cfg-if 1.0.0", - "encoding_rs", - "indexmap", "libc", - "log", - "mach2", - "memfd", - "memoffset", - "paste", - "psm", - "rustix", - "sptr", - "wasm-encoder 0.202.0", - "wasmtime-asm-macros", - "wasmtime-environ", - "wasmtime-fiber", - "wasmtime-jit-debug", - "wasmtime-slab", - "wasmtime-versioned-export-macros", "windows-sys 0.52.0", ] [[package]] name = "wasmtime-slab" -version = "20.0.2" +version = "23.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca6585868f5c427c3e9d2a8c0c3354e6d7d4518a0d17723ab25a0c1eebf5d5b4" +checksum = "42eb8f6515708ec67974998c3e644101db4186308985f5ef7c2ef324ff33c948" [[package]] name = "wasmtime-types" -version = "20.0.2" +version = "23.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "84d5381ff174faded38c7b2085fbe430dff59489c87a91403354d710075750fb" +checksum = "046873fb8fb3e9652f3fd76fe99c8c8129007695c3d73b2e307fdae40f6e324c" dependencies = [ + "anyhow", "cranelift-entity", "serde", "serde_derive", - "thiserror", + "smallvec", "wasmparser", ] [[package]] name = "wasmtime-versioned-export-macros" -version = "20.0.2" +version = "23.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d3b70422fdfa915c903f003b8b42554a8ae1aa0c6208429d8314ebf5721f3ac" +checksum = "99c02af2e9dbeb427304d1a08787d70ed0dbfec1af2236616f84c9f1f03e7969" dependencies = [ "proc-macro2", "quote", @@ -2603,14 +2718,14 @@ dependencies = [ [[package]] name = "wasmtime-winch" -version = "20.0.2" +version = "23.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "996360967b5196dec20ddcfce499ce4dc80cc925c088b0f2b376d29b96833a6a" +checksum = "b2ceddc47a49af10908a288fdfdc296ab3932062cab62a785e3705bbb3709c59" dependencies = [ "anyhow", "cranelift-codegen", "gimli 0.28.1", - "object 0.33.0", + "object", "target-lexicon", "wasmparser", "wasmtime-cranelift", @@ -2620,9 +2735,9 @@ dependencies = [ [[package]] name = "wasmtime-wit-bindgen" -version = "20.0.2" +version = "23.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01840c0cfbbb01664c796e3f4edbd656e58f9d76db083c7e7c6bba59ea657a96" +checksum = "75f528f8b8a2376a3dacaf497d960216dd466d324425361e1e00e26de0a7705c" dependencies = [ "anyhow", "heck 0.4.1", @@ -2678,6 +2793,15 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" +[[package]] +name = "winapi-util" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" +dependencies = [ + "windows-sys 0.48.0", +] + [[package]] name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" @@ -2686,9 +2810,9 @@ checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" [[package]] name = "winch-codegen" -version = "0.18.2" +version = "0.21.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cefeb84a0f39227cf2eb665cf348e6150ebf3372d08adff03264064ab590fdf4" +checksum = "2a41b67a37ea74e83c38ef495cc213aba73385236b1deee883dc869e835003b9" dependencies = [ "anyhow", "cranelift-codegen", @@ -2861,9 +2985,9 @@ dependencies = [ [[package]] name = "wit-parser" -version = "0.202.0" +version = "0.212.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "744237b488352f4f27bca05a10acb79474415951c450e52ebd0da784c1df2bcc" +checksum = "ceeb0424aa8679f3fcf2d6e3cfa381f3d6fa6179976a2c05a6249dd2bb426716" dependencies = [ "anyhow", "id-arena", diff --git a/envoyfilter/Cargo.toml b/envoyfilter/Cargo.toml index 911a893d..188e5f1f 100644 --- a/envoyfilter/Cargo.toml +++ b/envoyfilter/Cargo.toml @@ -18,6 +18,7 @@ open-message-format-embeddings = { path = "../open-message-format/clients/omf-em public-types = { path = "../public-types" } http = "1.1.0" governor = "0.6.3" +tiktoken-rs = "0.5.9" [dev-dependencies] proxy-wasm-test-framework = { git = "https://github.com/katanemo/test-framework.git", branch = "main" } diff --git a/envoyfilter/src/consts.rs b/envoyfilter/src/consts.rs index 0f844023..b2fa445e 100644 --- a/envoyfilter/src/consts.rs +++ b/envoyfilter/src/consts.rs @@ -3,5 +3,6 @@ pub const DEFAULT_COLLECTION_NAME: &str = "prompt_vector_store"; pub const DEFAULT_NER_MODEL: &str = "urchade/gliner_large-v2.1"; pub const DEFAULT_PROMPT_TARGET_THRESHOLD: f64 = 0.6; pub const DEFAULT_NER_THRESHOLD: f64 = 0.6; +pub const RATELIMIT_SELECTOR_HEADER_KEY: &str = "x-katanemo-ratelimit-selector"; pub const SYSTEM_ROLE: &str = "system"; pub const USER_ROLE: &str = "user"; diff --git a/envoyfilter/src/filter_context.rs b/envoyfilter/src/filter_context.rs index bd3e927a..7ceafff3 100644 --- a/envoyfilter/src/filter_context.rs +++ b/envoyfilter/src/filter_context.rs @@ -1,8 +1,8 @@ use crate::consts::DEFAULT_EMBEDDING_MODEL; use crate::ratelimit; -use crate::stats::{Gauge, RecordingMetric}; +use crate::stats::{Counter, Gauge, RecordingMetric}; use crate::stream_context::StreamContext; -use log::info; +use log::{debug, info}; use md5::Digest; use open_message_format_embeddings::models::{ CreateEmbeddingRequest, CreateEmbeddingRequestInput, CreateEmbeddingResponse, @@ -15,23 +15,26 @@ use public_types::common_types::{ use public_types::configuration::{Configuration, PromptTarget}; use serde_json::to_string; use std::collections::HashMap; +use std::rc::Rc; use std::time::Duration; #[derive(Copy, Clone)] -struct WasmMetrics { - active_http_calls: Gauge, +pub struct WasmMetrics { + pub active_http_calls: Gauge, + pub ratelimited_rq: Counter, } impl WasmMetrics { fn new() -> WasmMetrics { WasmMetrics { active_http_calls: Gauge::new(String::from("active_http_calls")), + ratelimited_rq: Counter::new(String::from("ratelimited_rq")), } } } pub struct FilterContext { - metrics: WasmMetrics, + metrics: Rc, // callouts stores token_id to request mapping that we use during #on_http_call_response to match the response to the request. callouts: HashMap, config: Option, @@ -42,7 +45,7 @@ impl FilterContext { FilterContext { callouts: HashMap::new(), config: None, - metrics: WasmMetrics::new(), + metrics: Rc::new(WasmMetrics::new()), } } @@ -259,6 +262,8 @@ impl RootContext for FilterContext { if let Some(config_bytes) = self.get_plugin_configuration() { self.config = serde_yaml::from_slice(&config_bytes).unwrap(); + debug!("set configuration object: {:?}", self.config); + if let Some(ratelimits_config) = self .config .as_mut() @@ -273,7 +278,9 @@ impl RootContext for FilterContext { fn create_http_context(&self, _context_id: u32) -> Option> { Some(Box::new(StreamContext { host_header: None, + ratelimit_selector: None, callouts: HashMap::new(), + metrics: Rc::clone(&self.metrics), })) } diff --git a/envoyfilter/src/lib.rs b/envoyfilter/src/lib.rs index cd832052..78c1153d 100644 --- a/envoyfilter/src/lib.rs +++ b/envoyfilter/src/lib.rs @@ -7,6 +7,7 @@ mod filter_context; mod ratelimit; mod stats; mod stream_context; +mod tokenizer; proxy_wasm::main! {{ proxy_wasm::set_log_level(LogLevel::Trace); diff --git a/envoyfilter/src/ratelimit.rs b/envoyfilter/src/ratelimit.rs index 2e1bb946..b1093b3e 100644 --- a/envoyfilter/src/ratelimit.rs +++ b/envoyfilter/src/ratelimit.rs @@ -1,4 +1,5 @@ use governor::{DefaultKeyedRateLimiter, InsufficientCapacity, Quota}; +use log::debug; use public_types::configuration; use public_types::configuration::{Limit, Ratelimit, TimeUnit}; use std::num::{NonZero, NonZeroU32}; @@ -28,9 +29,10 @@ pub struct RatelimitMap { // This version of Header demands that the user passes a header value to match on. #[allow(unused)] +#[derive(Debug)] pub struct Header { - key: String, - value: String, + pub key: String, + pub value: String, } impl Header { @@ -84,6 +86,11 @@ impl RatelimitMap { selector: Header, tokens_used: NonZeroU32, ) -> Result<(), String> { + debug!( + "Checking limit for provider={}, with selector={:?}, consuming tokens={:?}", + provider, selector, tokens_used + ); + let provider_limits = match self.datastore.get(&provider) { None => { // No limit configured for this provider, hence ok. diff --git a/envoyfilter/src/stats.rs b/envoyfilter/src/stats.rs index 693f24b5..26755469 100644 --- a/envoyfilter/src/stats.rs +++ b/envoyfilter/src/stats.rs @@ -74,7 +74,10 @@ impl Metric for Gauge { } } +/// For state of the world updates impl RecordingMetric for Gauge {} +/// For offset deltas +impl IncrementingMetric for Gauge {} #[derive(Copy, Clone)] pub struct Histogram { diff --git a/envoyfilter/src/stream_context.rs b/envoyfilter/src/stream_context.rs index dd36b4b4..0a751ec5 100644 --- a/envoyfilter/src/stream_context.rs +++ b/envoyfilter/src/stream_context.rs @@ -1,11 +1,14 @@ use crate::consts::{ DEFAULT_COLLECTION_NAME, DEFAULT_EMBEDDING_MODEL, DEFAULT_NER_MODEL, DEFAULT_NER_THRESHOLD, - DEFAULT_PROMPT_TARGET_THRESHOLD, SYSTEM_ROLE, USER_ROLE, + DEFAULT_PROMPT_TARGET_THRESHOLD, RATELIMIT_SELECTOR_HEADER_KEY, SYSTEM_ROLE, USER_ROLE, }; +use crate::filter_context::WasmMetrics; +use crate::ratelimit; +use crate::ratelimit::Header; +use crate::stats::IncrementingMetric; +use crate::tokenizer; use http::StatusCode; -use log::error; -use log::info; -use log::warn; +use log::{debug, error, info, warn}; use open_message_format_embeddings::models::{ CreateEmbeddingRequest, CreateEmbeddingRequestInput, CreateEmbeddingResponse, }; @@ -17,6 +20,8 @@ use public_types::common_types::{ }; use public_types::configuration::{Entity, PromptTarget}; use std::collections::HashMap; +use std::num::NonZero; +use std::rc::Rc; use std::time::Duration; enum RequestType { @@ -35,7 +40,9 @@ pub struct CallContext { pub struct StreamContext { pub host_header: Option, + pub ratelimit_selector: Option
, pub callouts: HashMap, + pub metrics: Rc, } impl StreamContext { @@ -65,6 +72,15 @@ impl StreamContext { } } + fn save_ratelimit_header(&mut self) { + self.ratelimit_selector = self + .get_http_request_header(RATELIMIT_SELECTOR_HEADER_KEY) + .and_then(|key| { + self.get_http_request_header(&key) + .map(|value| Header { key, value }) + }); + } + fn embeddings_handler(&mut self, body: Vec, mut callout_context: CallContext) { let embedding_response: CreateEmbeddingResponse = match serde_json::from_slice(&body) { Ok(embedding_response) => embedding_response, @@ -115,6 +131,7 @@ impl StreamContext { if self.callouts.insert(token_id, callout_context).is_some() { panic!("duplicate token_id") } + self.metrics.active_http_calls.increment(1); } fn search_points_handler(&mut self, body: Vec, mut callout_context: CallContext) { @@ -202,6 +219,7 @@ impl StreamContext { if self.callouts.insert(token_id, callout_context).is_some() { panic!("duplicate token_id") } + self.metrics.active_http_calls.increment(1); } fn ner_handler(&mut self, body: Vec, mut callout_context: CallContext) { @@ -290,10 +308,11 @@ impl StreamContext { if self.callouts.insert(token_id, callout_context).is_some() { panic!("duplicate token_id") } + self.metrics.active_http_calls.increment(1); } fn context_resolver_handler(&mut self, body: Vec, callout_context: CallContext) { - info!("response received for context_resolver"); + debug!("response received for context_resolver"); let body_string = String::from_utf8(body); let prompt_target = callout_context.prompt_target.unwrap(); let mut request_body = callout_context.request_body; @@ -331,7 +350,30 @@ impl StreamContext { return; } }; - info!("sending request to openai: msg {}", json_string); + + // Tokenize and Ratelimit. + if let Some(selector) = self.ratelimit_selector.take() { + if let Ok(token_count) = tokenizer::token_count(&request_body.model, &json_string) { + match ratelimit::ratelimits(None).read().unwrap().check_limit( + request_body.model, + selector, + NonZero::new(token_count as u32).unwrap(), + ) { + Ok(_) => (), + Err(err) => { + self.send_http_response( + StatusCode::TOO_MANY_REQUESTS.as_u16().into(), + vec![], + Some(format!("Exceeded Ratelimit: {}", err).as_bytes()), + ); + self.metrics.ratelimited_rq.increment(1); + return; + } + } + } + } + + debug!("sending request to openai: msg {}", json_string); self.set_http_request_body(0, json_string.len(), &json_string.into_bytes()); self.resume_http_request(); } @@ -345,6 +387,7 @@ impl HttpContext for StreamContext { self.save_host_header(); self.delete_content_length_header(); self.modify_path_header(); + self.save_ratelimit_header(); Action::Continue } @@ -450,6 +493,7 @@ impl HttpContext for StreamContext { token_id ) } + self.metrics.active_http_calls.increment(1); Action::Pause } @@ -464,6 +508,7 @@ impl Context for StreamContext { _num_trailers: usize, ) { let callout_context = self.callouts.remove(&token_id).expect("invalid token_id"); + self.metrics.active_http_calls.increment(-1); let resp = self.get_http_call_response_body(0, body_size); diff --git a/envoyfilter/src/tokenizer.rs b/envoyfilter/src/tokenizer.rs new file mode 100644 index 00000000..25ac924e --- /dev/null +++ b/envoyfilter/src/tokenizer.rs @@ -0,0 +1,39 @@ +use log::debug; + +#[derive(Debug, PartialEq, Eq)] +#[allow(dead_code)] +pub enum Error { + UnknownModel, + FailedToTokenize, +} + +#[allow(dead_code)] +pub fn token_count(model_name: &str, text: &str) -> Result { + debug!("getting token count model={}", model_name); + // Consideration: is it more expensive to instantiate the BPE object every time, or to contend the singleton? + let bpe = tiktoken_rs::get_bpe_from_model(model_name).map_err(|_| Error::UnknownModel)?; + Ok(bpe.encode_ordinary(text).len()) +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn encode_ordinary() { + let model_name = "gpt-3.5-turbo"; + let text = "How many tokens does this sentence have?"; + assert_eq!( + 8, + token_count(model_name, text).expect("correct tokenization") + ); + } + + #[test] + fn unrecognized_model() { + assert_eq!( + Error::UnknownModel, + token_count("unknown", "").expect_err("unknown model") + ) + } +} diff --git a/envoyfilter/tests/integration.rs b/envoyfilter/tests/integration.rs index 5000312a..71cd550e 100644 --- a/envoyfilter/tests/integration.rs +++ b/envoyfilter/tests/integration.rs @@ -1,8 +1,20 @@ use http::StatusCode; -use proxy_wasm_test_framework::tester; -use proxy_wasm_test_framework::types::{Action, BufferType, MapType, MetricType, ReturnType}; -use public_types::common_types::Entity; +use open_message_format_embeddings::models::{ + create_embedding_response::{self, CreateEmbeddingResponse}, + create_embedding_response_usage::CreateEmbeddingResponseUsage, + embedding, Embedding, +}; +use proxy_wasm_test_framework::tester::{self, Tester}; +use proxy_wasm_test_framework::types::{ + Action, BufferType, LogLevel, MapType, MetricType, ReturnType, +}; +use public_types::configuration::{self, Endpoint, PromptTarget}; +use public_types::{ + common_types::{self, NERResponse, SearchPointResult, SearchPointsResponse}, + configuration::Configuration, +}; use serial_test::serial; +use std::collections::HashMap; use std::path::Path; fn wasm_module() -> String { @@ -14,6 +26,223 @@ fn wasm_module() -> String { wasm_file.to_str().unwrap().to_string() } +fn normal_flow(module: &mut Tester, filter_context: i32, http_context: i32) { + module + .call_proxy_on_context_create(http_context, filter_context) + .execute_and_expect(ReturnType::None) + .unwrap(); + + // Request Headers + module + .call_proxy_on_request_headers(http_context, 0, false) + .expect_get_header_map_value(Some(MapType::HttpRequestHeaders), Some(":host")) + .returning(Some("api.openai.com")) + .expect_add_header_map_value( + Some(MapType::HttpRequestHeaders), + Some("content-length"), + Some(""), + ) + .expect_get_header_map_value(Some(MapType::HttpRequestHeaders), Some(":path")) + .returning(Some("/llmrouting")) + .expect_add_header_map_value( + Some(MapType::HttpRequestHeaders), + Some(":path"), + Some("/v1/chat/completions"), + ) + .expect_get_header_map_value( + Some(MapType::HttpRequestHeaders), + Some("x-katanemo-ratelimit-selector"), + ) + .returning(Some("selector-key")) + .expect_get_header_map_value(Some(MapType::HttpRequestHeaders), Some("selector-key")) + .returning(Some("selector-value")) + .execute_and_expect(ReturnType::Action(Action::Continue)) + .unwrap(); + + // Request Body + let chat_completions_request_body = "\ +{\ + \"messages\": [\ + {\ + \"role\": \"system\",\ + \"content\": \"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"\ + },\ + {\ + \"role\": \"user\",\ + \"content\": \"Compose a poem that explains the concept of recursion in programming.\"\ + }\ + ],\ + \"model\": \"gpt-4\"\ +}"; + + module + .call_proxy_on_request_body( + http_context, + chat_completions_request_body.len() as i32, + true, + ) + .expect_get_buffer_bytes(Some(BufferType::HttpRequestBody)) + .returning(Some(chat_completions_request_body)) + // The actual call is not important in this test, we just need to grab the token_id + .expect_http_call(Some("embeddingserver"), None, None, None, None) + .returning(Some(1)) + .expect_metric_increment("active_http_calls", 1) + .execute_and_expect(ReturnType::Action(Action::Pause)) + .unwrap(); + + let embedding_response = CreateEmbeddingResponse { + data: vec![Embedding { + index: 0, + embedding: vec![], + object: embedding::Object::default(), + }], + model: String::from("test"), + object: create_embedding_response::Object::default(), + usage: Box::new(CreateEmbeddingResponseUsage::new(0, 0)), + }; + let embeddings_response_buffer = serde_json::to_string(&embedding_response).unwrap(); + module + .call_proxy_on_http_call_response( + http_context, + 1, + 0, + embeddings_response_buffer.len() as i32, + 0, + ) + .expect_metric_increment("active_http_calls", -1) + .expect_get_buffer_bytes(Some(BufferType::HttpCallResponseBody)) + .returning(Some(&embeddings_response_buffer)) + .expect_http_call(Some("qdrant"), None, None, None, None) + .returning(Some(2)) + .expect_metric_increment("active_http_calls", 1) + .execute_and_expect(ReturnType::None) + .unwrap(); + + let prompt_target = PromptTarget { + name: String::from("test-prompt-target"), + prompt_type: String::from("test-prompt-type"), + few_shot_examples: vec![], + entities: Some(vec![configuration::Entity { + name: String::from("test-entity"), + required: Some(true), + description: None, + }]), + endpoint: Some(Endpoint { + cluster: String::from("test-endpoint-cluster"), + path: None, + method: None, + }), + system_prompt: None, + }; + let prompt_target_str = serde_json::to_string(&prompt_target).unwrap(); + let search_points_response = SearchPointsResponse { + status: String::new(), + time: 0.0, + result: vec![SearchPointResult { + id: String::new(), + version: 0, + score: 0.7, + payload: HashMap::from([(String::from("prompt-target"), prompt_target_str)]), + }], + }; + let search_points_response_buffer = serde_json::to_string(&search_points_response).unwrap(); + module + .call_proxy_on_http_call_response( + http_context, + 2, + 0, + search_points_response_buffer.len() as i32, + 0, + ) + .expect_metric_increment("active_http_calls", -1) + .expect_get_buffer_bytes(Some(BufferType::HttpCallResponseBody)) + .returning(Some(&search_points_response_buffer)) + .expect_log(Some(LogLevel::Info), None) + .expect_log(Some(LogLevel::Info), None) + .expect_http_call(Some("nerhost"), None, None, None, None) + .returning(Some(3)) + .expect_metric_increment("active_http_calls", 1) + .execute_and_expect(ReturnType::None) + .unwrap(); + + let ner_reponse = NERResponse { + model: String::from("test-model"), + data: vec![common_types::Entity { + score: 0.7, + text: String::from("test-text"), + label: String::from("test-entity"), + }], + }; + let ner_response_buffer = serde_json::to_string(&ner_reponse).unwrap(); + let upstream_name = prompt_target.endpoint.unwrap().cluster.leak(); + module + .call_proxy_on_http_call_response(http_context, 3, 0, ner_response_buffer.len() as i32, 0) + .expect_metric_increment("active_http_calls", -1) + .expect_get_buffer_bytes(Some(BufferType::HttpCallResponseBody)) + .returning(Some(&ner_response_buffer)) + .expect_log(Some(LogLevel::Info), None) + .expect_http_call(Some(upstream_name), None, None, None, None) + .returning(Some(4)) + .expect_metric_increment("active_http_calls", 1) + .execute_and_expect(ReturnType::None) + .unwrap() +} + +fn default_config() -> Configuration { + let config: &str = r#" +default_prompt_endpoint: "127.0.0.1" +load_balancing: "round_robin" +timeout_ms: 5000 + +embedding_provider: + name: "SentenceTransformer" + model: "all-MiniLM-L6-v2" + +llm_providers: + - name: "open-ai-gpt-4" + api_key: "$OPEN_AI_API_KEY" + model: gpt-4 + +system_prompt: | + You are a helpful weather forecaster. Please following following guidelines when responding to user queries: + - Use farenheight for temperature + - Use miles per hour for wind speed + +prompt_targets: + - type: context_resolver + name: weather_forecast + few_shot_examples: + - what is the weather in New York? + endpoint: + cluster: weatherhost + path: /weather + entities: + - name: location + required: true + description: "The location for which the weather is requested" + + - type: context_resolver + name: weather_forecast_2 + few_shot_examples: + - what is the weather in New York? + endpoint: + cluster: weatherhost + path: /weather + entities: + - name: city + +ratelimits: + - provider: gpt-4 + selector: + key: selector-key + value: selector-value + limit: + tokens: 1 + unit: minute + "#; + serde_yaml::from_str(config).unwrap() +} + #[test] #[serial] fn successful_request_to_open_ai_chat_completions() { @@ -35,6 +264,7 @@ fn successful_request_to_open_ai_chat_completions() { module .call_proxy_on_context_create(root_context, 0) .expect_metric_creation(MetricType::Gauge, "active_http_calls") + .expect_metric_creation(MetricType::Counter, "ratelimited_rq") .execute_and_expect(ReturnType::None) .unwrap(); @@ -63,6 +293,11 @@ fn successful_request_to_open_ai_chat_completions() { Some(":path"), Some("/v1/chat/completions"), ) + .expect_get_header_map_value( + Some(MapType::HttpRequestHeaders), + Some("x-katanemo-ratelimit-selector"), + ) + .returning(None) .execute_and_expect(ReturnType::Action(Action::Continue)) .unwrap(); @@ -78,7 +313,8 @@ fn successful_request_to_open_ai_chat_completions() { \"role\": \"user\",\ \"content\": \"Compose a poem that explains the concept of recursion in programming.\"\ }\ - ]\ + ],\ + \"model\": \"gpt-4\"\ }"; module @@ -91,6 +327,7 @@ fn successful_request_to_open_ai_chat_completions() { .returning(Some(chat_completions_request_body)) // TODO: assert that the model field was added. .expect_set_buffer_bytes(Some(BufferType::HttpRequestBody), None) + .expect_metric_increment("active_http_calls", 1) .execute_and_expect(ReturnType::Action(Action::Pause)) .unwrap(); } @@ -116,6 +353,7 @@ fn bad_request_to_open_ai_chat_completions() { module .call_proxy_on_context_create(root_context, 0) .expect_metric_creation(MetricType::Gauge, "active_http_calls") + .expect_metric_creation(MetricType::Counter, "ratelimited_rq") .execute_and_expect(ReturnType::None) .unwrap(); @@ -144,6 +382,11 @@ fn bad_request_to_open_ai_chat_completions() { Some(":path"), Some("/v1/chat/completions"), ) + .expect_get_header_map_value( + Some(MapType::HttpRequestHeaders), + Some("x-katanemo-ratelimit-selector"), + ) + .returning(None) .execute_and_expect(ReturnType::Action(Action::Continue)) .unwrap(); @@ -181,15 +424,7 @@ fn bad_request_to_open_ai_chat_completions() { #[test] #[serial] -fn delete_me_in_next_pr_successful_request_to_open_ai_chat_completions() { - let ner_response = Entity { - score: 0.7, - text: String::from("hello"), - label: String::from("hello"), - }; - let ner_response_buffer = serde_json::to_string(&ner_response).unwrap(); - println!("{} is my length", ner_response_buffer.len()); - +fn request_ratelimited() { let args = tester::MockSettings { wasm_path: wasm_module(), quiet: false, @@ -203,67 +438,102 @@ fn delete_me_in_next_pr_successful_request_to_open_ai_chat_completions() { .unwrap(); // Setup Filter - let root_context = 1; + let filter_context = 1; + let config = serde_json::to_string(&default_config()).unwrap(); module - .call_proxy_on_context_create(root_context, 0) + .call_proxy_on_context_create(filter_context, 0) .expect_metric_creation(MetricType::Gauge, "active_http_calls") + .expect_metric_creation(MetricType::Counter, "ratelimited_rq") .execute_and_expect(ReturnType::None) .unwrap(); + module + .call_proxy_on_configure(filter_context, config.len() as i32) + .expect_log(Some(LogLevel::Debug), None) + .expect_get_buffer_bytes(Some(BufferType::PluginConfiguration)) + .returning(Some(&config)) + .execute_and_expect(ReturnType::Bool(true)) + .unwrap(); // Setup HTTP Stream let http_context = 2; + normal_flow(&mut module, filter_context, http_context); + + let test_body = "test body"; module - .call_proxy_on_context_create(http_context, root_context) + .call_proxy_on_http_call_response(http_context, 4, 0, test_body.len() as i32, 0) + .expect_metric_increment("active_http_calls", -1) + .expect_get_buffer_bytes(Some(BufferType::HttpCallResponseBody)) + .returning(Some(test_body)) + .expect_log(Some(LogLevel::Debug), None) + .expect_log(Some(LogLevel::Info), None) + .expect_log(Some(LogLevel::Debug), None) + .expect_log(Some(LogLevel::Debug), None) + .expect_send_local_response( + Some(StatusCode::TOO_MANY_REQUESTS.as_u16().into()), + None, + None, + None, + ) + .expect_metric_increment("ratelimited_rq", 1) + .execute_and_expect(ReturnType::None) + .unwrap(); +} + +#[test] +#[serial] +fn request_not_ratelimited() { + let args = tester::MockSettings { + wasm_path: wasm_module(), + quiet: false, + allow_unexpected: false, + }; + let mut module = tester::mock(args).unwrap(); + + module + .call_start() .execute_and_expect(ReturnType::None) .unwrap(); - // Request Headers + // Setup Filter + let filter_context = 1; + + let mut config = default_config(); + config.ratelimits.as_mut().unwrap()[0].limit.tokens += 1000; + let config_str = serde_json::to_string(&config).unwrap(); + module - .call_proxy_on_request_headers(http_context, 0, false) - .expect_get_header_map_value(Some(MapType::HttpRequestHeaders), Some(":host")) - .returning(Some("api.openai.com")) - .expect_add_header_map_value( - Some(MapType::HttpRequestHeaders), - Some("content-length"), - Some(""), - ) - .expect_get_header_map_value(Some(MapType::HttpRequestHeaders), Some(":path")) - .returning(Some("/llmrouting")) - .expect_add_header_map_value( - Some(MapType::HttpRequestHeaders), - Some(":path"), - Some("/v1/chat/completions"), - ) - .execute_and_expect(ReturnType::Action(Action::Continue)) + .call_proxy_on_context_create(filter_context, 0) + .expect_metric_creation(MetricType::Gauge, "active_http_calls") + .expect_metric_creation(MetricType::Counter, "ratelimited_rq") + .execute_and_expect(ReturnType::None) + .unwrap(); + module + .call_proxy_on_configure(filter_context, config_str.len() as i32) + .expect_log(Some(LogLevel::Debug), None) + .expect_get_buffer_bytes(Some(BufferType::PluginConfiguration)) + .returning(Some(&config_str)) + .execute_and_expect(ReturnType::Bool(true)) .unwrap(); - // Request Body - let chat_completions_request_body = "\ - {\ - \"messages\": [\ - {\ - \"role\": \"system\",\ - \"content\": \"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"\ - },\ - {\ - \"role\": \"user\",\ - \"content\": \"Compose a poem that explains the concept of recursion in programming.\"\ - }\ - ]\ - }"; + // Setup HTTP Stream + let http_context = 2; + normal_flow(&mut module, filter_context, http_context); + + let test_body = "test body"; module - .call_proxy_on_request_body( - http_context, - chat_completions_request_body.len() as i32, - true, - ) - .expect_get_buffer_bytes(Some(BufferType::HttpRequestBody)) - .returning(Some(chat_completions_request_body)) - // TODO: assert that the model field was added. + .call_proxy_on_http_call_response(http_context, 4, 0, test_body.len() as i32, 0) + .expect_metric_increment("active_http_calls", -1) + .expect_get_buffer_bytes(Some(BufferType::HttpCallResponseBody)) + .returning(Some(test_body)) + .expect_log(Some(LogLevel::Debug), None) + .expect_log(Some(LogLevel::Info), None) + .expect_log(Some(LogLevel::Debug), None) + .expect_log(Some(LogLevel::Debug), None) + .expect_log(Some(LogLevel::Debug), None) .expect_set_buffer_bytes(Some(BufferType::HttpRequestBody), None) - .execute_and_expect(ReturnType::Action(Action::Pause)) + .execute_and_expect(ReturnType::None) .unwrap(); }