diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 00000000..b4010f80 --- /dev/null +++ b/.dockerignore @@ -0,0 +1 @@ +crates/*/target* diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml index 666981bd..d846666a 100644 --- a/.github/workflows/checks.yml +++ b/.github/workflows/checks.yml @@ -12,13 +12,24 @@ jobs: steps: - name: Setup | Checkout uses: actions/checkout@v4 + - name: Setup | Rust run: rustup toolchain install stable --profile minimal + - name: Setup | Install wasm toolchain run: rustup target add wasm32-wasi - - name: Build wasm module - run: cd arch && cargo build --release --target=wasm32-wasi - - name: Run Tests on arch - run: cd arch && cargo test - - name: Run Tests on public_types - run: cd public_types && cargo test + + - name: Build wasm module for prompt_gateway + run: cd crates/prompt_gateway && cargo build --release --target=wasm32-wasi + + - name: Run Tests on common crate + run: cd crates/common && cargo test + + - name: Run Tests on prompt_gateway crate + run: cd crates/prompt_gateway && cargo test + + - name: Build wasm module for llm_gateway + run: cd crates/llm_gateway && cargo build --release --target=wasm32-wasi + + - name: Run Tests on llm_gateway crate + run: cd crates/llm_gateway && cargo test diff --git a/.gitignore b/.gitignore index 08f9462c..be5b849c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,4 @@ -arch/target arch/qdrant_data/ -public_types/target /venv/ __pycache__ grafana-data @@ -31,3 +29,4 @@ model_server/build model_server/dist arch_logs/ dist/ +crates/*/target/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 133f9ff8..868c7548 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -8,23 +8,27 @@ repos: - id: trailing-whitespace - repo: local hooks: + - id: cargo-fmt name: cargo-fmt language: system types: [file, rust] - entry: bash -c "cd arch && cargo fmt -p intelligent-prompt-gateway -- --check" + entry: bash -c "cd crates/llm_gateway && cargo fmt -- --check" + - id: cargo-clippy name: cargo-clippy language: system types: [file, rust] - entry: bash -c "cd arch && cargo clippy -p intelligent-prompt-gateway --all" + entry: bash -c "cd crates/llm_gateway && cargo clippy --all" + - id: cargo-test name: cargo-test language: system types: [file, rust] # --lib is to only test the library, since when integration tests are made, # they will be in a seperate tests directory - entry: bash -c "cd arch && cargo test -p intelligent-prompt-gateway --lib" + entry: bash -c "cd crates/llm_gateway && cargo test --lib" + - repo: https://github.com/psf/black rev: 23.1.0 hooks: diff --git a/arch/Dockerfile b/arch/Dockerfile index bca0d9ac..3a875a62 100644 --- a/arch/Dockerfile +++ b/arch/Dockerfile @@ -2,19 +2,18 @@ FROM rust:1.80.0 as builder RUN rustup -v target add wasm32-wasi WORKDIR /arch -COPY arch/src /arch/src -COPY arch/Cargo.toml /arch/ -COPY arch/Cargo.lock /arch/ -COPY public_types /public_types +COPY crates . -RUN cargo build --release --target wasm32-wasi +RUN cd prompt_gateway && cargo build --release --target wasm32-wasi +RUN cd llm_gateway && cargo build --release --target wasm32-wasi # copy built filter into envoy image FROM envoyproxy/envoy:v1.31-latest as envoy #Build config generator, so that we have a single build image for both Rust and Python FROM python:3-slim as arch -COPY --from=builder /arch/target/wasm32-wasi/release/intelligent_prompt_gateway.wasm /etc/envoy/proxy-wasm-plugins/intelligent_prompt_gateway.wasm +COPY --from=builder /arch/prompt_gateway/target/wasm32-wasi/release/prompt_gateway.wasm /etc/envoy/proxy-wasm-plugins/prompt_gateway.wasm +COPY --from=builder /arch/llm_gateway/target/wasm32-wasi/release/llm_gateway.wasm /etc/envoy/proxy-wasm-plugins/llm_gateway.wasm COPY --from=envoy /usr/local/bin/envoy /usr/local/bin/envoy WORKDIR /config COPY arch/requirements.txt . diff --git a/arch/envoy.template.yaml b/arch/envoy.template.yaml index 070aa919..300d71d1 100644 --- a/arch/envoy.template.yaml +++ b/arch/envoy.template.yaml @@ -90,7 +90,7 @@ static_resources: runtime: "envoy.wasm.runtime.v8" code: local: - filename: "/etc/envoy/proxy-wasm-plugins/intelligent_prompt_gateway.wasm" + filename: "/etc/envoy/proxy-wasm-plugins/prompt_gateway.wasm" - name: envoy.filters.http.router typed_config: "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router @@ -250,7 +250,7 @@ static_resources: runtime: "envoy.wasm.runtime.v8" code: local: - filename: "/etc/envoy/proxy-wasm-plugins/intelligent_prompt_gateway.wasm" + filename: "/etc/envoy/proxy-wasm-plugins/prompt_gateway.wasm" - name: envoy.filters.http.router typed_config: "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router diff --git a/arch/grafana/datasource.yaml b/arch/grafana/datasource.yaml deleted file mode 100644 index 4870174e..00000000 --- a/arch/grafana/datasource.yaml +++ /dev/null @@ -1,9 +0,0 @@ -apiVersion: 1 - -datasources: -- name: Prometheus - type: prometheus - url: http://prometheus:9090 - isDefault: true - access: proxy - editable: true diff --git a/arch/prometheus/prometheus.yaml b/arch/prometheus/prometheus.yaml deleted file mode 100644 index 5aa25e0d..00000000 --- a/arch/prometheus/prometheus.yaml +++ /dev/null @@ -1,23 +0,0 @@ -global: - scrape_interval: 15s - scrape_timeout: 10s - evaluation_interval: 15s -alerting: - alertmanagers: - - static_configs: - - targets: [] - scheme: http - timeout: 10s - api_version: v1 -scrape_configs: -- job_name: envoy - honor_timestamps: true - scrape_interval: 15s - scrape_timeout: 10s - metrics_path: /stats - scheme: http - static_configs: - - targets: - - envoy:9901 - params: - format: ['prometheus'] diff --git a/crates/common/Cargo.lock b/crates/common/Cargo.lock new file mode 100644 index 00000000..8bdd2dec --- /dev/null +++ b/crates/common/Cargo.lock @@ -0,0 +1,668 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "ahash" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8fd72866655d1904d6b0997d0b07ba561047d070fbe29de039031c641b61217" + +[[package]] +name = "ahash" +version = "0.8.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" +dependencies = [ + "cfg-if", + "once_cell", + "version_check", + "zerocopy", +] + +[[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] + +[[package]] +name = "allocator-api2" +version = "0.2.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c6cb57a04249c6480766f7f7cef5467412af1490f8d1e243141daddada3264f" + +[[package]] +name = "anyhow" +version = "1.0.89" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86fdf8605db99b54d3cd748a44c6d04df638eb5dafb219b135d0149bd0db01f6" + +[[package]] +name = "autocfg" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" + +[[package]] +name = "base64" +version = "0.21.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" + +[[package]] +name = "bit-set" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1" +dependencies = [ + "bit-vec", +] + +[[package]] +name = "bit-vec" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb" + +[[package]] +name = "bitflags" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" + +[[package]] +name = "bstr" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40723b8fb387abc38f4f4a37c09073622e41dd12327033091ef8950659e6dc0c" +dependencies = [ + "memchr", + "regex-automata", + "serde", +] + +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "common" +version = "0.1.0" +dependencies = [ + "derivative", + "duration-string", + "governor", + "log", + "pretty_assertions", + "proxy-wasm", + "rand", + "serde", + "serde_json", + "serde_yaml", + "thiserror", + "tiktoken-rs", +] + +[[package]] +name = "derivative" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fcc3dd5e9e9c0b295d6e1e4d811fb6f157d5ffd784b8d202fc62eac8035a770b" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "diff" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8" + +[[package]] +name = "duration-string" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6fcc1d9ae294a15ed05aeae8e11ee5f2b3fe971c077d45a42fb20825fba6ee13" +dependencies = [ + "serde", +] + +[[package]] +name = "equivalent" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" + +[[package]] +name = "fancy-regex" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7493d4c459da9f84325ad297371a6b2b8a162800873a22e3b6b6512e61d18c05" +dependencies = [ + "bit-set", + "regex", +] + +[[package]] +name = "getrandom" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + +[[package]] +name = "governor" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68a7f542ee6b35af73b06abc0dad1c1bae89964e4e253bc4b587b91c9637867b" +dependencies = [ + "cfg-if", + "no-std-compat", + "nonzero_ext", + "portable-atomic", + "smallvec", + "spinning_top", +] + +[[package]] +name = "hashbrown" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e91b62f79061a0bc2e046024cb7ba44b08419ed238ecbd9adbd787434b9e8c25" +dependencies = [ + "ahash 0.3.8", + "autocfg", +] + +[[package]] +name = "hashbrown" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +dependencies = [ + "ahash 0.8.11", + "allocator-api2", +] + +[[package]] +name = "indexmap" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68b900aa2f7301e21c36462b170ee99994de34dff39a4a6a528e80e7376d07e5" +dependencies = [ + "equivalent", + "hashbrown 0.14.5", +] + +[[package]] +name = "itoa" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" + +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + +[[package]] +name = "libc" +version = "0.2.159" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "561d97a539a36e26a9a5fad1ea11a3039a67714694aaa379433e580854bc3dc5" + +[[package]] +name = "lock_api" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17" +dependencies = [ + "autocfg", + "scopeguard", +] + +[[package]] +name = "log" +version = "0.4.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" + +[[package]] +name = "memchr" +version = "2.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" + +[[package]] +name = "no-std-compat" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b93853da6d84c2e3c7d730d6473e8817692dd89be387eb01b94d7f108ecb5b8c" +dependencies = [ + "hashbrown 0.8.2", +] + +[[package]] +name = "nonzero_ext" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38bf9645c8b145698bb0b18a4637dcacbc421ea49bef2317e4fd8065a387cf21" + +[[package]] +name = "once_cell" +version = "1.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" + +[[package]] +name = "parking_lot" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-targets", +] + +[[package]] +name = "portable-atomic" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc9c68a3f6da06753e9335d63e27f6b9754dd1920d941135b7ea8224f141adb2" + +[[package]] +name = "ppv-lite86" +version = "0.2.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04" +dependencies = [ + "zerocopy", +] + +[[package]] +name = "pretty_assertions" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ae130e2f271fbc2ac3a40fb1d07180839cdbbe443c7a27e1e3c13c5cac0116d" +dependencies = [ + "diff", + "yansi", +] + +[[package]] +name = "proc-macro2" +version = "1.0.86" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "proxy-wasm" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14a5a4df5a1ab77235e36a0a0f638687ee1586d21ee9774037693001e94d4e11" +dependencies = [ + "hashbrown 0.14.5", + "log", +] + +[[package]] +name = "quote" +version = "1.0.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom", +] + +[[package]] +name = "redox_syscall" +version = "0.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b6dfecf2c74bce2466cabf93f6664d6998a69eb21e39f4207930065b27b771f" +dependencies = [ + "bitflags", +] + +[[package]] +name = "regex" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38200e5ee88914975b69f657f0801b6f6dccafd44fd9326302a4aaeecfacb1d8" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "368758f23274712b504848e9d5a6f010445cc8b87a7cdb4d7cbee666c1288da3" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" + +[[package]] +name = "rustc-hash" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" + +[[package]] +name = "ryu" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "serde" +version = "1.0.210" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8e3592472072e6e22e0a54d5904d9febf8508f65fb8552499a1abc7d1078c3a" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.210" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "243902eda00fad750862fc144cea25caca5e20d615af0a81bee94ca738f1df1f" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.77", +] + +[[package]] +name = "serde_json" +version = "1.0.128" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ff5456707a1de34e7e37f2a6fd3d3f808c318259cbd01ab6377795054b483d8" +dependencies = [ + "itoa", + "memchr", + "ryu", + "serde", +] + +[[package]] +name = "serde_yaml" +version = "0.9.34+deprecated" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47" +dependencies = [ + "indexmap", + "itoa", + "ryu", + "serde", + "unsafe-libyaml", +] + +[[package]] +name = "smallvec" +version = "1.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" + +[[package]] +name = "spinning_top" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d96d2d1d716fb500937168cc09353ffdc7a012be8475ac7308e1bdf0e3923300" +dependencies = [ + "lock_api", +] + +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "syn" +version = "2.0.77" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f35bcdf61fd8e7be6caf75f429fdca8beb3ed76584befb503b1569faee373ed" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "thiserror" +version = "1.0.64" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d50af8abc119fb8bb6dbabcfa89656f46f84aa0ac7688088608076ad2b459a84" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.64" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08904e7672f5eb876eaaf87e0ce17857500934f4981c4a0ab2b4aa98baac7fc3" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.77", +] + +[[package]] +name = "tiktoken-rs" +version = "0.5.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c314e7ce51440f9e8f5a497394682a57b7c323d0f4d0a6b1b13c429056e0e234" +dependencies = [ + "anyhow", + "base64", + "bstr", + "fancy-regex", + "lazy_static", + "parking_lot", + "rustc-hash", +] + +[[package]] +name = "unicode-ident" +version = "1.0.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e91b56cd4cadaeb79bbf1a5645f6b4f8dc5bde8834ad5894a8db35fda9efa1fe" + +[[package]] +name = "unsafe-libyaml" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "673aac59facbab8a9007c7f6108d11f63b603f7cabff99fabf650fea5c32b861" + +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + +[[package]] +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + +[[package]] +name = "yansi" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049" + +[[package]] +name = "zerocopy" +version = "0.7.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" +dependencies = [ + "byteorder", + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.7.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.77", +] diff --git a/public_types/Cargo.toml b/crates/common/Cargo.toml similarity index 56% rename from public_types/Cargo.toml rename to crates/common/Cargo.toml index 94a1725d..a362da9c 100644 --- a/public_types/Cargo.toml +++ b/crates/common/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "public_types" +name = "common" version = "0.1.0" edition = "2021" @@ -7,6 +7,13 @@ edition = "2021" serde = { version = "1.0", features = ["derive"] } serde_yaml = "0.9.34" duration-string = { version = "0.3.0", features = ["serde"] } +proxy-wasm = "0.2.1" +governor = { version = "0.6.3", default-features = false, features = ["no_std"]} +log = "0.4" +derivative = "2.2.0" +thiserror = "1.0.64" +tiktoken-rs = "0.5.9" +rand = "0.8.5" [dev-dependencies] pretty_assertions = "1.4.1" diff --git a/public_types/src/common_types.rs b/crates/common/src/common_types.rs similarity index 100% rename from public_types/src/common_types.rs rename to crates/common/src/common_types.rs diff --git a/public_types/src/configuration.rs b/crates/common/src/configuration.rs similarity index 95% rename from public_types/src/configuration.rs rename to crates/common/src/configuration.rs index 74f58ab0..63ab156c 100644 --- a/public_types/src/configuration.rs +++ b/crates/common/src/configuration.rs @@ -1,5 +1,6 @@ use duration_string::DurationString; use serde::{Deserialize, Deserializer, Serialize}; +use std::default; use std::fmt::Display; use std::{collections::HashMap, time::Duration}; @@ -13,20 +14,15 @@ pub struct Tracing { pub sampling_rate: Option, } -#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash, Default)] pub enum GatewayMode { #[serde(rename = "llm")] Llm, + #[default] #[serde(rename = "prompt")] Prompt, } -impl Default for GatewayMode { - fn default() -> Self { - GatewayMode::Prompt - } -} - #[derive(Debug, Clone, Serialize, Deserialize)] pub struct Configuration { pub version: String, @@ -225,9 +221,10 @@ mod test { #[test] fn test_deserialize_configuration() { - let ref_config = - fs::read_to_string("../docs/source/resources/includes/arch_config_full_reference.yaml") - .expect("reference config file not found"); + let ref_config = fs::read_to_string( + "../../docs/source/resources/includes/arch_config_full_reference.yaml", + ) + .expect("reference config file not found"); let config: super::Configuration = serde_yaml::from_str(&ref_config).unwrap(); assert_eq!(config.version, "v0.1"); @@ -299,10 +296,7 @@ mod test { let tracing = config.tracing.as_ref().unwrap(); assert_eq!(tracing.sampling_rate.unwrap(), 0.1); - let mode = config - .mode - .as_ref() - .unwrap_or(&super::GatewayMode::Prompt); + let mode = config.mode.as_ref().unwrap_or(&super::GatewayMode::Prompt); assert_eq!(*mode, super::GatewayMode::Prompt); } } diff --git a/arch/src/consts.rs b/crates/common/src/consts.rs similarity index 100% rename from arch/src/consts.rs rename to crates/common/src/consts.rs diff --git a/public_types/src/embeddings/create_embedding_request.rs b/crates/common/src/embeddings/create_embedding_request.rs similarity index 100% rename from public_types/src/embeddings/create_embedding_request.rs rename to crates/common/src/embeddings/create_embedding_request.rs diff --git a/public_types/src/embeddings/create_embedding_request_input.rs b/crates/common/src/embeddings/create_embedding_request_input.rs similarity index 100% rename from public_types/src/embeddings/create_embedding_request_input.rs rename to crates/common/src/embeddings/create_embedding_request_input.rs diff --git a/public_types/src/embeddings/create_embedding_response.rs b/crates/common/src/embeddings/create_embedding_response.rs similarity index 100% rename from public_types/src/embeddings/create_embedding_response.rs rename to crates/common/src/embeddings/create_embedding_response.rs diff --git a/public_types/src/embeddings/create_embedding_response_usage.rs b/crates/common/src/embeddings/create_embedding_response_usage.rs similarity index 100% rename from public_types/src/embeddings/create_embedding_response_usage.rs rename to crates/common/src/embeddings/create_embedding_response_usage.rs diff --git a/public_types/src/embeddings/embedding.rs b/crates/common/src/embeddings/embedding.rs similarity index 100% rename from public_types/src/embeddings/embedding.rs rename to crates/common/src/embeddings/embedding.rs diff --git a/public_types/src/embeddings/mod.rs b/crates/common/src/embeddings/mod.rs similarity index 100% rename from public_types/src/embeddings/mod.rs rename to crates/common/src/embeddings/mod.rs diff --git a/arch/src/http.rs b/crates/common/src/http.rs similarity index 100% rename from arch/src/http.rs rename to crates/common/src/http.rs diff --git a/crates/common/src/lib.rs b/crates/common/src/lib.rs new file mode 100644 index 00000000..27a51803 --- /dev/null +++ b/crates/common/src/lib.rs @@ -0,0 +1,12 @@ +#![allow(unused_imports)] + +pub mod common_types; +pub mod configuration; +pub mod consts; +pub mod embeddings; +pub mod http; +pub mod llm_providers; +pub mod ratelimit; +pub mod routing; +pub mod stats; +pub mod tokenizer; diff --git a/arch/src/llm_providers.rs b/crates/common/src/llm_providers.rs similarity index 97% rename from arch/src/llm_providers.rs rename to crates/common/src/llm_providers.rs index 65cd0d04..8214f148 100644 --- a/arch/src/llm_providers.rs +++ b/crates/common/src/llm_providers.rs @@ -1,4 +1,4 @@ -use public_types::configuration::LlmProvider; +use crate::configuration::LlmProvider; use std::collections::HashMap; use std::rc::Rc; diff --git a/arch/src/ratelimit.rs b/crates/common/src/ratelimit.rs similarity index 99% rename from arch/src/ratelimit.rs rename to crates/common/src/ratelimit.rs index 83a85e6c..66c3facd 100644 --- a/arch/src/ratelimit.rs +++ b/crates/common/src/ratelimit.rs @@ -1,7 +1,7 @@ +use crate::configuration; +use configuration::{Limit, Ratelimit, TimeUnit}; use governor::{DefaultKeyedRateLimiter, InsufficientCapacity, Quota}; use log::debug; -use public_types::configuration; -use public_types::configuration::{Limit, Ratelimit, TimeUnit}; use std::fmt::Display; use std::num::{NonZero, NonZeroU32}; use std::sync::RwLock; @@ -398,9 +398,10 @@ fn different_provider_can_have_different_limits_with_the_same_keys() { // If more tests are written here, move the initial call out of the test. #[cfg(test)] mod test { + use crate::configuration; + use super::ratelimits; use configuration::{Limit, Ratelimit, TimeUnit}; - use public_types::configuration; use std::num::NonZero; use std::thread; diff --git a/arch/src/routing.rs b/crates/common/src/routing.rs similarity index 93% rename from arch/src/routing.rs rename to crates/common/src/routing.rs index a372537e..1a440ee9 100644 --- a/arch/src/routing.rs +++ b/crates/common/src/routing.rs @@ -1,8 +1,8 @@ use std::rc::Rc; -use crate::llm_providers::LlmProviders; +use crate::{configuration, llm_providers::LlmProviders}; +use configuration::LlmProvider; use log::debug; -use public_types::configuration::LlmProvider; use rand::{seq::IteratorRandom, thread_rng}; #[derive(Debug)] diff --git a/arch/src/stats.rs b/crates/common/src/stats.rs similarity index 100% rename from arch/src/stats.rs rename to crates/common/src/stats.rs diff --git a/arch/src/tokenizer.rs b/crates/common/src/tokenizer.rs similarity index 100% rename from arch/src/tokenizer.rs rename to crates/common/src/tokenizer.rs diff --git a/arch/Cargo.lock b/crates/llm_gateway/Cargo.lock similarity index 99% rename from arch/Cargo.lock rename to crates/llm_gateway/Cargo.lock index 6e9a2e5d..35182863 100644 --- a/arch/Cargo.lock +++ b/crates/llm_gateway/Cargo.lock @@ -217,6 +217,22 @@ version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "67ba02a97a2bd10f4b59b25c7973101c79642302776489e030cd13cdab09ed15" +[[package]] +name = "common" +version = "0.1.0" +dependencies = [ + "derivative", + "duration-string", + "governor", + "log", + "proxy-wasm", + "rand", + "serde", + "serde_yaml", + "thiserror", + "tiktoken-rs", +] + [[package]] name = "cpp_demangle" version = "0.4.4" @@ -753,29 +769,6 @@ dependencies = [ "serde", ] -[[package]] -name = "intelligent-prompt-gateway" -version = "0.1.0" -dependencies = [ - "acap", - "derivative", - "governor", - "http", - "log", - "md5", - "proxy-wasm", - "proxy-wasm-test-framework", - "public_types", - "rand", - "serde", - "serde_json", - "serde_yaml", - "serial_test", - "sha2", - "thiserror", - "tiktoken-rs", -] - [[package]] name = "itertools" version = "0.12.1" @@ -860,6 +853,28 @@ version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" +[[package]] +name = "llm_gateway" +version = "0.1.0" +dependencies = [ + "acap", + "common", + "derivative", + "governor", + "http", + "log", + "md5", + "proxy-wasm", + "proxy-wasm-test-framework", + "rand", + "serde", + "serde_json", + "serde_yaml", + "serial_test", + "sha2", + "thiserror", +] + [[package]] name = "lock_api" version = "0.4.12" @@ -1094,15 +1109,6 @@ dependencies = [ "cc", ] -[[package]] -name = "public_types" -version = "0.1.0" -dependencies = [ - "duration-string", - "serde", - "serde_yaml", -] - [[package]] name = "quote" version = "1.0.37" @@ -1197,9 +1203,9 @@ dependencies = [ [[package]] name = "regex" -version = "1.10.6" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4219d74c6b67a3654a9fbebc4b419e22126d13d2f3c4a07ee0cb61ff79a79619" +checksum = "38200e5ee88914975b69f657f0801b6f6dccafd44fd9326302a4aaeecfacb1d8" dependencies = [ "aho-corasick", "memchr", @@ -1209,9 +1215,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.7" +version = "0.4.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df" +checksum = "368758f23274712b504848e9d5a6f010445cc8b87a7cdb4d7cbee666c1288da3" dependencies = [ "aho-corasick", "memchr", @@ -1220,9 +1226,9 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.8.4" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b" +checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" [[package]] name = "rustc-demangle" diff --git a/arch/Cargo.toml b/crates/llm_gateway/Cargo.toml similarity index 85% rename from arch/Cargo.toml rename to crates/llm_gateway/Cargo.toml index 430703b8..73d62c3d 100644 --- a/arch/Cargo.toml +++ b/crates/llm_gateway/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "intelligent-prompt-gateway" +name = "llm_gateway" version = "0.1.0" authors = ["Katanemo Inc "] edition = "2021" @@ -14,10 +14,9 @@ serde = { version = "1.0", features = ["derive"] } serde_yaml = "0.9.34" serde_json = "1.0" md5 = "0.7.0" -public_types = { path = "../public_types" } +common = { path = "../common" } http = "1.1.0" governor = { version = "0.6.3", default-features = false, features = ["no_std"]} -tiktoken-rs = "0.5.9" acap = "0.3.0" rand = "0.8.5" thiserror = "1.0.64" diff --git a/arch/src/filter_context.rs b/crates/llm_gateway/src/filter_context.rs similarity index 94% rename from arch/src/filter_context.rs rename to crates/llm_gateway/src/filter_context.rs index ff6342b3..5d0090a7 100644 --- a/arch/src/filter_context.rs +++ b/crates/llm_gateway/src/filter_context.rs @@ -1,22 +1,23 @@ -use crate::consts::{ - ARCH_INTERNAL_CLUSTER_NAME, ARCH_UPSTREAM_HOST_HEADER, DEFAULT_EMBEDDING_MODEL, - MODEL_SERVER_NAME, -}; -use crate::http::{CallArgs, Client}; -use crate::llm_providers::LlmProviders; -use crate::ratelimit; -use crate::stats::{Counter, Gauge, IncrementingMetric}; use crate::stream_context::StreamContext; +use common::common_types::EmbeddingType; +use common::configuration::{Configuration, GatewayMode, Overrides, PromptGuards, PromptTarget}; +use common::consts::ARCH_INTERNAL_CLUSTER_NAME; +use common::consts::ARCH_UPSTREAM_HOST_HEADER; +use common::consts::DEFAULT_EMBEDDING_MODEL; +use common::consts::MODEL_SERVER_NAME; +use common::embeddings::{ + CreateEmbeddingRequest, CreateEmbeddingRequestInput, CreateEmbeddingResponse, +}; +use common::http::CallArgs; +use common::http::Client; +use common::llm_providers::LlmProviders; +use common::ratelimit; +use common::stats::Counter; +use common::stats::Gauge; +use common::stats::IncrementingMetric; use log::debug; use proxy_wasm::traits::*; use proxy_wasm::types::*; -use public_types::common_types::EmbeddingType; -use public_types::configuration::{ - Configuration, GatewayMode, Overrides, PromptGuards, PromptTarget, -}; -use public_types::embeddings::{ - CreateEmbeddingRequest, CreateEmbeddingRequestInput, CreateEmbeddingResponse, -}; use std::cell::RefCell; use std::collections::hash_map::Entry; use std::collections::HashMap; diff --git a/arch/src/lib.rs b/crates/llm_gateway/src/lib.rs similarity index 76% rename from arch/src/lib.rs rename to crates/llm_gateway/src/lib.rs index 8d8c0b90..e2ad9025 100644 --- a/arch/src/lib.rs +++ b/crates/llm_gateway/src/lib.rs @@ -2,15 +2,8 @@ use filter_context::FilterContext; use proxy_wasm::traits::*; use proxy_wasm::types::*; -mod consts; mod filter_context; -mod http; -mod llm_providers; -mod ratelimit; -mod routing; -mod stats; mod stream_context; -mod tokenizer; proxy_wasm::main! {{ proxy_wasm::set_log_level(LogLevel::Trace); diff --git a/arch/src/stream_context.rs b/crates/llm_gateway/src/stream_context.rs similarity index 98% rename from arch/src/stream_context.rs rename to crates/llm_gateway/src/stream_context.rs index 7a65609c..5e4e6149 100644 --- a/arch/src/stream_context.rs +++ b/crates/llm_gateway/src/stream_context.rs @@ -1,4 +1,18 @@ -use crate::consts::{ +use crate::filter_context::{EmbeddingsStore, WasmMetrics}; +use acap::cos; +use common::common_types::open_ai::{ + ArchState, ChatCompletionChunkResponse, ChatCompletionTool, ChatCompletionsRequest, + ChatCompletionsResponse, Choice, FunctionDefinition, FunctionParameter, FunctionParameters, + Message, ParameterType, StreamOptions, ToolCall, ToolCallState, ToolType, +}; +use common::common_types::{ + EmbeddingType, HallucinationClassificationRequest, HallucinationClassificationResponse, + PromptGuardRequest, PromptGuardResponse, PromptGuardTask, ZeroShotClassificationRequest, + ZeroShotClassificationResponse, +}; +use common::configuration::{GatewayMode, LlmProvider}; +use common::configuration::{Overrides, PromptGuards, PromptTarget}; +use common::consts::{ ARCH_FC_MODEL_NAME, ARCH_FC_REQUEST_TIMEOUT_MS, ARCH_INTERNAL_CLUSTER_NAME, ARCH_LLM_UPSTREAM_LISTENER, ARCH_MESSAGES_KEY, ARCH_MODEL_PREFIX, ARCH_PROVIDER_HINT_HEADER, ARCH_ROUTING_HEADER, ARCH_STATE_HEADER, ARCH_UPSTREAM_HOST_HEADER, ARC_FC_CLUSTER, @@ -6,32 +20,18 @@ use crate::consts::{ DEFAULT_INTENT_MODEL, DEFAULT_PROMPT_TARGET_THRESHOLD, GPT_35_TURBO, MODEL_SERVER_NAME, RATELIMIT_SELECTOR_HEADER_KEY, REQUEST_ID_HEADER, SYSTEM_ROLE, USER_ROLE, }; -use crate::filter_context::{EmbeddingsStore, WasmMetrics}; -use crate::http::{CallArgs, Client, ClientError}; -use crate::llm_providers::LlmProviders; -use crate::ratelimit::Header; -use crate::stats::IncrementingMetric; -use crate::{ratelimit, routing, tokenizer}; -use acap::cos; +use common::embeddings::{ + CreateEmbeddingRequest, CreateEmbeddingRequestInput, CreateEmbeddingResponse, +}; +use common::http::{CallArgs, Client, ClientError}; +use common::llm_providers::LlmProviders; +use common::ratelimit::Header; +use common::stats::Gauge; +use common::{ratelimit, routing, tokenizer}; use http::StatusCode; use log::{debug, info, warn}; use proxy_wasm::traits::*; use proxy_wasm::types::*; -use public_types::common_types::open_ai::{ - ArchState, ChatCompletionChunkResponse, ChatCompletionTool, ChatCompletionsRequest, - ChatCompletionsResponse, Choice, FunctionDefinition, FunctionParameter, FunctionParameters, - Message, ParameterType, StreamOptions, ToolCall, ToolCallState, ToolType, -}; -use public_types::common_types::{ - EmbeddingType, HallucinationClassificationRequest, HallucinationClassificationResponse, - PromptGuardRequest, PromptGuardResponse, PromptGuardTask, ZeroShotClassificationRequest, - ZeroShotClassificationResponse, -}; -use public_types::configuration::{GatewayMode, LlmProvider}; -use public_types::configuration::{Overrides, PromptGuards, PromptTarget}; -use public_types::embeddings::{ - CreateEmbeddingRequest, CreateEmbeddingRequestInput, CreateEmbeddingResponse, -}; use serde_json::Value; use sha2::{Digest, Sha256}; use std::cell::RefCell; @@ -40,6 +40,8 @@ use std::num::NonZero; use std::rc::Rc; use std::time::Duration; +use common::stats::IncrementingMetric; + #[derive(Debug, Clone)] enum ResponseHandlerType { GetEmbeddings, @@ -753,10 +755,8 @@ impl StreamContext { } } } - } else { - if let Some(user_message) = callout_context.user_message.as_ref() { - user_messages.push(user_message.clone()); - } + } else if let Some(user_message) = callout_context.user_message.as_ref() { + user_messages.push(user_message.clone()); } let user_messages_str = user_messages.join(", "); debug!("user messages: {}", user_messages_str); @@ -1280,7 +1280,7 @@ impl HttpContext for StreamContext { let prompt_guard_jailbreak_task = self .prompt_guards .input_guards - .contains_key(&public_types::configuration::GuardType::Jailbreak); + .contains_key(&common::configuration::GuardType::Jailbreak); self.chat_completions_request = Some(deserialized_body); @@ -1570,7 +1570,7 @@ impl Client for StreamContext { &self.callouts } - fn active_http_calls(&self) -> &crate::stats::Gauge { + fn active_http_calls(&self) -> &Gauge { &self.metrics.active_http_calls } } diff --git a/arch/tests/integration.rs b/crates/llm_gateway/tests/integration.rs similarity index 98% rename from arch/tests/integration.rs rename to crates/llm_gateway/tests/integration.rs index c628d9c3..5821a79a 100644 --- a/arch/tests/integration.rs +++ b/crates/llm_gateway/tests/integration.rs @@ -1,23 +1,23 @@ +use common::common_types::open_ai::{ChatCompletionsResponse, Choice, Message, Usage}; +use common::common_types::open_ai::{FunctionCallDetail, ToolCall, ToolType}; +use common::common_types::{HallucinationClassificationResponse, PromptGuardResponse}; +use common::embeddings::{ + create_embedding_response, embedding, CreateEmbeddingResponse, CreateEmbeddingResponseUsage, + Embedding, +}; +use common::{common_types::ZeroShotClassificationResponse, configuration::Configuration}; use http::StatusCode; use proxy_wasm_test_framework::tester::{self, Tester}; use proxy_wasm_test_framework::types::{ Action, BufferType, LogLevel, MapType, MetricType, ReturnType, }; -use public_types::common_types::open_ai::{ChatCompletionsResponse, Choice, Message, Usage}; -use public_types::common_types::open_ai::{FunctionCallDetail, ToolCall, ToolType}; -use public_types::common_types::{HallucinationClassificationResponse, PromptGuardResponse}; -use public_types::embeddings::{ - create_embedding_response, embedding, CreateEmbeddingResponse, CreateEmbeddingResponseUsage, - Embedding, -}; -use public_types::{common_types::ZeroShotClassificationResponse, configuration::Configuration}; use serde_yaml::Value; use serial_test::serial; use std::collections::HashMap; use std::path::Path; fn wasm_module() -> String { - let wasm_file = Path::new("target/wasm32-wasi/release/intelligent_prompt_gateway.wasm"); + let wasm_file = Path::new("target/wasm32-wasi/release/llm_gateway.wasm"); assert!( wasm_file.exists(), "Run `cargo build --release --target=wasm32-wasi` first" diff --git a/crates/prompt_gateway/Cargo.lock b/crates/prompt_gateway/Cargo.lock new file mode 100644 index 00000000..63de3b3f --- /dev/null +++ b/crates/prompt_gateway/Cargo.lock @@ -0,0 +1,2165 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "acap" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6851a0b3b2d5729a0b7e61e3c36923ed9d72240146b0efda61121b0b84ad595d" +dependencies = [ + "num-traits", +] + +[[package]] +name = "addr2line" +version = "0.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a30b2e23b9e17a9f90641c7ab1549cd9b44f296d3ccbf309d2863cfe398a0cb" +dependencies = [ + "gimli", +] + +[[package]] +name = "ahash" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8fd72866655d1904d6b0997d0b07ba561047d070fbe29de039031c641b61217" + +[[package]] +name = "ahash" +version = "0.8.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" +dependencies = [ + "cfg-if 1.0.0", + "once_cell", + "version_check", + "zerocopy", +] + +[[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] + +[[package]] +name = "allocator-api2" +version = "0.2.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c6cb57a04249c6480766f7f7cef5467412af1490f8d1e243141daddada3264f" + +[[package]] +name = "ansi_term" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d52a9bb7ec0cf484c551830a7ce27bd20d67eac647e1befb56b0be4ee39a55d2" +dependencies = [ + "winapi", +] + +[[package]] +name = "anyhow" +version = "1.0.87" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "10f00e1f6e58a40e807377c75c6a7f97bf9044fab57816f2414e6f5f4499d7b8" + +[[package]] +name = "arbitrary" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d5a26814d8dcb93b0e5a0ff3c6d80a8843bafb21b39e8e18a6f05471870e110" + +[[package]] +name = "async-trait" +version = "0.1.82" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a27b8a3a6e1a44fa4c8baf1f653e4172e81486d4941f2237e20dc2d0cf4ddff1" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.77", +] + +[[package]] +name = "atty" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" +dependencies = [ + "hermit-abi", + "libc", + "winapi", +] + +[[package]] +name = "autocfg" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" + +[[package]] +name = "base64" +version = "0.21.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" + +[[package]] +name = "bit-set" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1" +dependencies = [ + "bit-vec", +] + +[[package]] +name = "bit-vec" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb" + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "bitflags" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" + +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + +[[package]] +name = "bstr" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40723b8fb387abc38f4f4a37c09073622e41dd12327033091ef8950659e6dc0c" +dependencies = [ + "memchr", + "regex-automata", + "serde", +] + +[[package]] +name = "bumpalo" +version = "3.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" + +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + +[[package]] +name = "bytes" +version = "1.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8318a53db07bb3f8dca91a600466bdb3f2eaadeedfdbcf02e1accbad9271ba50" + +[[package]] +name = "cc" +version = "1.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b62ac837cdb5cb22e10a256099b4fc502b1dfe560cb282963a974d7abd80e476" +dependencies = [ + "jobserver", + "libc", + "shlex", +] + +[[package]] +name = "cfg-if" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "clap" +version = "2.34.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a0610544180c38b88101fecf2dd634b174a62eef6946f84dfc6a7127512b381c" +dependencies = [ + "ansi_term", + "atty", + "bitflags 1.3.2", + "strsim", + "textwrap", + "unicode-width", + "vec_map", +] + +[[package]] +name = "cobs" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67ba02a97a2bd10f4b59b25c7973101c79642302776489e030cd13cdab09ed15" + +[[package]] +name = "common" +version = "0.1.0" +dependencies = [ + "derivative", + "duration-string", + "governor", + "log", + "proxy-wasm", + "rand", + "serde", + "serde_yaml", + "thiserror", + "tiktoken-rs", +] + +[[package]] +name = "cpp_demangle" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96e58d342ad113c2b878f16d5d034c03be492ae460cdbc02b7f0f2284d310c7d" +dependencies = [ + "cfg-if 1.0.0", +] + +[[package]] +name = "cpufeatures" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "608697df725056feaccfa42cffdaeeec3fccc4ffc38358ecd19b243e716a78e0" +dependencies = [ + "libc", +] + +[[package]] +name = "cranelift-bforest" +version = "0.110.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "305d51c180ebdc46ef61bc60c54ae6512db3bc9a05842a1f1e762e45977019ab" +dependencies = [ + "cranelift-entity", +] + +[[package]] +name = "cranelift-bitset" +version = "0.110.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3247afacd9b13d620033f3190d9e49d1beefc1acb33d5604a249956c9c13709" +dependencies = [ + "serde", + "serde_derive", +] + +[[package]] +name = "cranelift-codegen" +version = "0.110.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd7ca95e831c18d1356da783765c344207cbdffea91e13e47fa9327dbb2e0719" +dependencies = [ + "bumpalo", + "cranelift-bforest", + "cranelift-bitset", + "cranelift-codegen-meta", + "cranelift-codegen-shared", + "cranelift-control", + "cranelift-entity", + "cranelift-isle", + "gimli", + "hashbrown 0.14.5", + "log", + "regalloc2", + "rustc-hash", + "smallvec", + "target-lexicon", +] + +[[package]] +name = "cranelift-codegen-meta" +version = "0.110.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "450c105fa1e51bfba4e95a86e926504a867ad5639d63f31d43fe3b7ec1f1c9ef" +dependencies = [ + "cranelift-codegen-shared", +] + +[[package]] +name = "cranelift-codegen-shared" +version = "0.110.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5479117cd1266881479908d383086561cee37e49affbea9b1e6b594cc21cc220" + +[[package]] +name = "cranelift-control" +version = "0.110.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34378804f0abfdd22c068a741cfeed86938b92375b2a96fb0b42c878e0141bfb" +dependencies = [ + "arbitrary", +] + +[[package]] +name = "cranelift-entity" +version = "0.110.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a48cb0a194c9ba82fec35a1e492055388d89b2e3c03dee9dcf2488892be8004d" +dependencies = [ + "cranelift-bitset", + "serde", + "serde_derive", +] + +[[package]] +name = "cranelift-frontend" +version = "0.110.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8327afc6c1c05f4be62fefce5b439fa83521c65363a322e86ea32c85e7ceaf64" +dependencies = [ + "cranelift-codegen", + "log", + "smallvec", + "target-lexicon", +] + +[[package]] +name = "cranelift-isle" +version = "0.110.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56b08621c00321efcfa3eee6a3179adc009e21ea8d24ca7adc3c326184bc3f48" + +[[package]] +name = "cranelift-native" +version = "0.110.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d51180b147c8557c1196c77b098f04140c91962e135ea152cd2fcabf40cf365c" +dependencies = [ + "cranelift-codegen", + "libc", + "target-lexicon", +] + +[[package]] +name = "cranelift-wasm" +version = "0.110.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "019e3dccb7f15e0bc14f0ddc034ec608a66df8e05c9e1e16f75a7716f8461799" +dependencies = [ + "cranelift-codegen", + "cranelift-entity", + "cranelift-frontend", + "itertools", + "log", + "smallvec", + "wasmparser", + "wasmtime-types", +] + +[[package]] +name = "crc32fast" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3" +dependencies = [ + "cfg-if 1.0.0", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" + +[[package]] +name = "crypto-common" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" +dependencies = [ + "generic-array", + "typenum", +] + +[[package]] +name = "debugid" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef552e6f588e446098f6ba40d89ac146c8c7b64aade83c051ee00bb5d2bc18d" +dependencies = [ + "uuid", +] + +[[package]] +name = "derivative" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fcc3dd5e9e9c0b295d6e1e4d811fb6f157d5ffd784b8d202fc62eac8035a770b" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "crypto-common", +] + +[[package]] +name = "directories-next" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "339ee130d97a610ea5a5872d2bbb130fdf68884ff09d3028b81bec8a1ac23bbc" +dependencies = [ + "cfg-if 1.0.0", + "dirs-sys-next", +] + +[[package]] +name = "dirs-sys-next" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ebda144c4fe02d1f7ea1a7d9641b6fc6b580adcfa024ae48797ecdeb6825b4d" +dependencies = [ + "libc", + "redox_users", + "winapi", +] + +[[package]] +name = "duration-string" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6fcc1d9ae294a15ed05aeae8e11ee5f2b3fe971c077d45a42fb20825fba6ee13" +dependencies = [ + "serde", +] + +[[package]] +name = "either" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" + +[[package]] +name = "embedded-io" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef1a6892d9eef45c8fa6b9e0086428a2cca8491aca8f787c534a3d6d0bcb3ced" + +[[package]] +name = "embedded-io" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edd0f118536f44f5ccd48bcb8b111bdc3de888b58c74639dfb034a357d0f206d" + +[[package]] +name = "encoding_rs" +version = "0.8.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b45de904aa0b010bce2ab45264d0631681847fa7b6f2eaa7dab7619943bc4f59" +dependencies = [ + "cfg-if 1.0.0", +] + +[[package]] +name = "equivalent" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" + +[[package]] +name = "errno" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "534c5cf6194dfab3db3242765c03bbe257cf92f22b38f6bc0c58d59108a820ba" +dependencies = [ + "libc", + "windows-sys 0.52.0", +] + +[[package]] +name = "fallible-iterator" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649" + +[[package]] +name = "fancy-regex" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7493d4c459da9f84325ad297371a6b2b8a162800873a22e3b6b6512e61d18c05" +dependencies = [ + "bit-set", + "regex", +] + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "futures" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "645c6916888f6cb6350d2550b80fb63e734897a8498abe35cfb732b6487804b0" +dependencies = [ + "futures-channel", + "futures-core", + "futures-executor", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-channel" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eac8f7d7865dcb88bd4373ab671c8cf4508703796caa2b1985a9ca867b3fcb78" +dependencies = [ + "futures-core", + "futures-sink", +] + +[[package]] +name = "futures-core" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dfc6580bb841c5a68e9ef15c77ccc837b40a7504914d52e47b8b0e9bbda25a1d" + +[[package]] +name = "futures-executor" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a576fc72ae164fca6b9db127eaa9a9dda0d61316034f33a0a0d4eda41f02b01d" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-io" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a44623e20b9681a318efdd71c299b6b222ed6f231972bfe2f224ebad6311f0c1" + +[[package]] +name = "futures-sink" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fb8e00e87438d937621c1c6269e53f536c14d3fbd6a042bb24879e57d474fb5" + +[[package]] +name = "futures-task" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38d84fa142264698cdce1a9f9172cf383a0c82de1bddcf3092901442c4097004" + +[[package]] +name = "futures-util" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d6401deb83407ab3da39eba7e33987a73c3df0c82b4bb5813ee871c19c41d48" +dependencies = [ + "futures-channel", + "futures-core", + "futures-io", + "futures-sink", + "futures-task", + "memchr", + "pin-project-lite", + "pin-utils", + "slab", +] + +[[package]] +name = "fxhash" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c" +dependencies = [ + "byteorder", +] + +[[package]] +name = "fxprof-processed-profile" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "27d12c0aed7f1e24276a241aadc4cb8ea9f83000f34bc062b7cc2d51e3b0fabd" +dependencies = [ + "bitflags 2.6.0", + "debugid", + "fxhash", + "serde", + "serde_json", +] + +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + +[[package]] +name = "getrandom" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" +dependencies = [ + "cfg-if 1.0.0", + "libc", + "wasi", +] + +[[package]] +name = "gimli" +version = "0.28.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253" +dependencies = [ + "fallible-iterator", + "indexmap", + "stable_deref_trait", +] + +[[package]] +name = "governor" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68a7f542ee6b35af73b06abc0dad1c1bae89964e4e253bc4b587b91c9637867b" +dependencies = [ + "cfg-if 1.0.0", + "no-std-compat", + "nonzero_ext", + "portable-atomic", + "smallvec", + "spinning_top", +] + +[[package]] +name = "hashbrown" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e91b62f79061a0bc2e046024cb7ba44b08419ed238ecbd9adbd787434b9e8c25" +dependencies = [ + "ahash 0.3.8", + "autocfg", +] + +[[package]] +name = "hashbrown" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43a3c133739dddd0d2990f9a4bdf8eb4b21ef50e4851ca85ab661199821d510e" +dependencies = [ + "ahash 0.8.11", +] + +[[package]] +name = "hashbrown" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +dependencies = [ + "ahash 0.8.11", + "allocator-api2", + "serde", +] + +[[package]] +name = "heck" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d621efb26863f0e9924c6ac577e8275e5e6b77455db64ffa6c65c904e9e132c" +dependencies = [ + "unicode-segmentation", +] + +[[package]] +name = "heck" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" + +[[package]] +name = "hermit-abi" +version = "0.1.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" +dependencies = [ + "libc", +] + +[[package]] +name = "http" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21b9ddb458710bc376481b842f5da65cdf31522de232c1ca8146abce2a358258" +dependencies = [ + "bytes", + "fnv", + "itoa", +] + +[[package]] +name = "id-arena" +version = "2.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25a2bc672d1148e28034f176e01fffebb08b35768468cc954630da77a1449005" + +[[package]] +name = "indexmap" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68b900aa2f7301e21c36462b170ee99994de34dff39a4a6a528e80e7376d07e5" +dependencies = [ + "equivalent", + "hashbrown 0.14.5", + "serde", +] + +[[package]] +name = "itertools" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" + +[[package]] +name = "ittapi" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b996fe614c41395cdaedf3cf408a9534851090959d90d54a535f675550b64b1" +dependencies = [ + "anyhow", + "ittapi-sys", + "log", +] + +[[package]] +name = "ittapi-sys" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52f5385394064fa2c886205dba02598013ce83d3e92d33dbdc0c52fe0e7bf4fc" +dependencies = [ + "cc", +] + +[[package]] +name = "jobserver" +version = "0.1.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48d1dbcbbeb6a7fec7e059840aa538bd62aaccf972c7346c4d9d2059312853d0" +dependencies = [ + "libc", +] + +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + +[[package]] +name = "leb128" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "884e2677b40cc8c339eaefcb701c32ef1fd2493d71118dc0ca4b6a736c93bd67" + +[[package]] +name = "libc" +version = "0.2.158" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d8adc4bb1803a324070e64a98ae98f38934d91957a99cfb3a43dcbc01bc56439" + +[[package]] +name = "libm" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058" + +[[package]] +name = "libredox" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0ff37bd590ca25063e35af745c343cb7a0271906fb7b37e4813e8f79f00268d" +dependencies = [ + "bitflags 2.6.0", + "libc", +] + +[[package]] +name = "linux-raw-sys" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" + +[[package]] +name = "lock_api" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17" +dependencies = [ + "autocfg", + "scopeguard", +] + +[[package]] +name = "log" +version = "0.4.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" + +[[package]] +name = "mach2" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19b955cdeb2a02b9117f121ce63aa52d08ade45de53e48fe6a38b39c10f6f709" +dependencies = [ + "libc", +] + +[[package]] +name = "md5" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771" + +[[package]] +name = "memchr" +version = "2.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" + +[[package]] +name = "memfd" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2cffa4ad52c6f791f4f8b15f0c05f9824b2ced1160e88cc393d64fff9a8ac64" +dependencies = [ + "rustix", +] + +[[package]] +name = "more-asserts" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fafa6961cabd9c63bcd77a45d7e3b7f3b552b70417831fb0f56db717e72407e" + +[[package]] +name = "no-std-compat" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b93853da6d84c2e3c7d730d6473e8817692dd89be387eb01b94d7f108ecb5b8c" +dependencies = [ + "hashbrown 0.8.2", +] + +[[package]] +name = "nonzero_ext" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38bf9645c8b145698bb0b18a4637dcacbc421ea49bef2317e4fd8065a387cf21" + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + +[[package]] +name = "object" +version = "0.36.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "084f1a5821ac4c651660a94a7153d27ac9d8a53736203f58b31945ded098070a" +dependencies = [ + "crc32fast", + "hashbrown 0.14.5", + "indexmap", + "memchr", +] + +[[package]] +name = "once_cell" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" + +[[package]] +name = "parking_lot" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" +dependencies = [ + "cfg-if 1.0.0", + "libc", + "redox_syscall", + "smallvec", + "windows-targets", +] + +[[package]] +name = "paste" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" + +[[package]] +name = "pin-project-lite" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bda66fc9667c18cb2758a2ac84d1167245054bcf85d5d1aaa6923f45801bdd02" + +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + +[[package]] +name = "pkg-config" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec" + +[[package]] +name = "portable-atomic" +version = "1.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da544ee218f0d287a911e9c99a39a8c9bc8fcad3cb8db5959940044ecfc67265" + +[[package]] +name = "postcard" +version = "1.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f7f0a8d620d71c457dd1d47df76bb18960378da56af4527aaa10f515eee732e" +dependencies = [ + "cobs", + "embedded-io 0.4.0", + "embedded-io 0.6.1", + "serde", +] + +[[package]] +name = "ppv-lite86" +version = "0.2.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04" +dependencies = [ + "zerocopy", +] + +[[package]] +name = "proc-macro-error" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" +dependencies = [ + "proc-macro-error-attr", + "proc-macro2", + "quote", + "syn 1.0.109", + "version_check", +] + +[[package]] +name = "proc-macro-error-attr" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" +dependencies = [ + "proc-macro2", + "quote", + "version_check", +] + +[[package]] +name = "proc-macro2" +version = "1.0.86" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "prompt_gateway" +version = "0.1.0" +dependencies = [ + "acap", + "common", + "derivative", + "governor", + "http", + "log", + "md5", + "proxy-wasm", + "proxy-wasm-test-framework", + "rand", + "serde", + "serde_json", + "serde_yaml", + "serial_test", + "sha2", + "thiserror", +] + +[[package]] +name = "proxy-wasm" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14a5a4df5a1ab77235e36a0a0f638687ee1586d21ee9774037693001e94d4e11" +dependencies = [ + "hashbrown 0.14.5", + "log", +] + +[[package]] +name = "proxy-wasm-test-framework" +version = "0.1.0" +source = "git+https://github.com/katanemo/test-framework.git?branch=new#c2511cd9030705e14d5f60aca77d6c96c81c6dfa" +dependencies = [ + "anyhow", + "cfg-if 0.1.10", + "lazy_static", + "more-asserts", + "rand", + "structopt", + "wasmtime", +] + +[[package]] +name = "psm" +version = "0.1.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa37f80ca58604976033fae9515a8a2989fc13797d953f7c04fb8fa36a11f205" +dependencies = [ + "cc", +] + +[[package]] +name = "quote" +version = "1.0.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom", +] + +[[package]] +name = "rayon" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + +[[package]] +name = "redox_syscall" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a908a6e00f1fdd0dfd9c0eb08ce85126f6d8bbda50017e74bc4a4b7d4a926a4" +dependencies = [ + "bitflags 2.6.0", +] + +[[package]] +name = "redox_users" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba009ff324d1fc1b900bd1fdb31564febe58a8ccc8a6fdbb93b543d33b13ca43" +dependencies = [ + "getrandom", + "libredox", + "thiserror", +] + +[[package]] +name = "regalloc2" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad156d539c879b7a24a363a2016d77961786e71f48f2e2fc8302a92abd2429a6" +dependencies = [ + "hashbrown 0.13.2", + "log", + "rustc-hash", + "slice-group-by", + "smallvec", +] + +[[package]] +name = "regex" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38200e5ee88914975b69f657f0801b6f6dccafd44fd9326302a4aaeecfacb1d8" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "368758f23274712b504848e9d5a6f010445cc8b87a7cdb4d7cbee666c1288da3" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" + +[[package]] +name = "rustc-demangle" +version = "0.1.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" + +[[package]] +name = "rustc-hash" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" + +[[package]] +name = "rustix" +version = "0.38.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f55e80d50763938498dd5ebb18647174e0c76dc38c5505294bb224624f30f36" +dependencies = [ + "bitflags 2.6.0", + "errno", + "libc", + "linux-raw-sys", + "windows-sys 0.52.0", +] + +[[package]] +name = "ryu" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" + +[[package]] +name = "scc" +version = "2.1.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c947adb109a8afce5fc9c7bf951f87f146e9147b3a6a58413105628fb1d1e66" +dependencies = [ + "sdd", +] + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "sdd" +version = "3.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60a7b59a5d9b0099720b417b6325d91a52cbf5b3dcb5041d864be53eefa58abc" + +[[package]] +name = "semver" +version = "1.0.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61697e0a1c7e512e84a621326239844a24d8207b4669b41bc18b32ea5cbf988b" +dependencies = [ + "serde", +] + +[[package]] +name = "serde" +version = "1.0.210" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8e3592472072e6e22e0a54d5904d9febf8508f65fb8552499a1abc7d1078c3a" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.210" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "243902eda00fad750862fc144cea25caca5e20d615af0a81bee94ca738f1df1f" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.77", +] + +[[package]] +name = "serde_json" +version = "1.0.128" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ff5456707a1de34e7e37f2a6fd3d3f808c318259cbd01ab6377795054b483d8" +dependencies = [ + "itoa", + "memchr", + "ryu", + "serde", +] + +[[package]] +name = "serde_spanned" +version = "0.6.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb5b1b31579f3811bf615c144393417496f152e12ac8b7663bf664f4a815306d" +dependencies = [ + "serde", +] + +[[package]] +name = "serde_yaml" +version = "0.9.34+deprecated" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47" +dependencies = [ + "indexmap", + "itoa", + "ryu", + "serde", + "unsafe-libyaml", +] + +[[package]] +name = "serial_test" +version = "3.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b4b487fe2acf240a021cf57c6b2b4903b1e78ca0ecd862a71b71d2a51fed77d" +dependencies = [ + "futures", + "log", + "once_cell", + "parking_lot", + "scc", + "serial_test_derive", +] + +[[package]] +name = "serial_test_derive" +version = "3.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "82fe9db325bcef1fbcde82e078a5cc4efdf787e96b3b9cf45b50b529f2083d67" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.77", +] + +[[package]] +name = "sha2" +version = "0.10.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "793db75ad2bcafc3ffa7c68b215fee268f537982cd901d132f89c6343f3a3dc8" +dependencies = [ + "cfg-if 1.0.0", + "cpufeatures", + "digest", +] + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] +name = "slab" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67" +dependencies = [ + "autocfg", +] + +[[package]] +name = "slice-group-by" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "826167069c09b99d56f31e9ae5c99049e932a98c9dc2dac47645b08dbbf76ba7" + +[[package]] +name = "smallvec" +version = "1.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" +dependencies = [ + "serde", +] + +[[package]] +name = "spinning_top" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d96d2d1d716fb500937168cc09353ffdc7a012be8475ac7308e1bdf0e3923300" +dependencies = [ + "lock_api", +] + +[[package]] +name = "sptr" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b9b39299b249ad65f3b7e96443bad61c02ca5cd3589f46cb6d610a0fd6c0d6a" + +[[package]] +name = "stable_deref_trait" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" + +[[package]] +name = "strsim" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" + +[[package]] +name = "structopt" +version = "0.3.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c6b5c64445ba8094a6ab0c3cd2ad323e07171012d9c98b0b15651daf1787a10" +dependencies = [ + "clap", + "lazy_static", + "structopt-derive", +] + +[[package]] +name = "structopt-derive" +version = "0.4.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dcb5ae327f9cc13b68763b5749770cb9e048a99bd9dfdfa58d0cf05d5f64afe0" +dependencies = [ + "heck 0.3.3", + "proc-macro-error", + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "syn" +version = "2.0.77" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f35bcdf61fd8e7be6caf75f429fdca8beb3ed76584befb503b1569faee373ed" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "target-lexicon" +version = "0.12.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1" + +[[package]] +name = "termcolor" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "textwrap" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" +dependencies = [ + "unicode-width", +] + +[[package]] +name = "thiserror" +version = "1.0.64" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d50af8abc119fb8bb6dbabcfa89656f46f84aa0ac7688088608076ad2b459a84" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.64" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08904e7672f5eb876eaaf87e0ce17857500934f4981c4a0ab2b4aa98baac7fc3" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.77", +] + +[[package]] +name = "tiktoken-rs" +version = "0.5.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c314e7ce51440f9e8f5a497394682a57b7c323d0f4d0a6b1b13c429056e0e234" +dependencies = [ + "anyhow", + "base64", + "bstr", + "fancy-regex", + "lazy_static", + "parking_lot", + "rustc-hash", +] + +[[package]] +name = "toml" +version = "0.8.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1ed1f98e3fdc28d6d910e6737ae6ab1a93bf1985935a1193e68f93eeb68d24e" +dependencies = [ + "serde", + "serde_spanned", + "toml_datetime", + "toml_edit", +] + +[[package]] +name = "toml_datetime" +version = "0.6.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0dd7358ecb8fc2f8d014bf86f6f638ce72ba252a2c3a2572f2a795f1d23efb41" +dependencies = [ + "serde", +] + +[[package]] +name = "toml_edit" +version = "0.22.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "583c44c02ad26b0c3f3066fe629275e50627026c51ac2e595cca4c230ce1ce1d" +dependencies = [ + "indexmap", + "serde", + "serde_spanned", + "toml_datetime", + "winnow", +] + +[[package]] +name = "typenum" +version = "1.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" + +[[package]] +name = "unicode-segmentation" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4c87d22b6e3f4a18d4d40ef354e97c90fcb14dd91d7dc0aa9d8a1172ebf7202" + +[[package]] +name = "unicode-width" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0336d538f7abc86d282a4189614dfaa90810dfc2c6f6427eaf88e16311dd225d" + +[[package]] +name = "unicode-xid" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "229730647fbc343e3a80e463c1db7f78f3855d3f3739bee0dda773c9a037c90a" + +[[package]] +name = "unsafe-libyaml" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "673aac59facbab8a9007c7f6108d11f63b603f7cabff99fabf650fea5c32b861" + +[[package]] +name = "uuid" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81dfa00651efa65069b0b6b651f4aaa31ba9e3c3ce0137aaad053604ee7e0314" + +[[package]] +name = "vec_map" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191" + +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + +[[package]] +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" + +[[package]] +name = "wasm-encoder" +version = "0.212.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "501940df4418b8929eb6d52f1aade1fdd15a5b86c92453cb696e3c906bd3fc33" +dependencies = [ + "leb128", +] + +[[package]] +name = "wasm-encoder" +version = "0.217.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b88b0814c9a2b323a9b46c687e726996c255ac8b64aa237dd11c81ed4854760" +dependencies = [ + "leb128", +] + +[[package]] +name = "wasmparser" +version = "0.212.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d28bc49ba1e5c5b61ffa7a2eace10820443c4b7d1c0b144109261d14570fdf8" +dependencies = [ + "ahash 0.8.11", + "bitflags 2.6.0", + "hashbrown 0.14.5", + "indexmap", + "semver", + "serde", +] + +[[package]] +name = "wasmprinter" +version = "0.212.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dfac65326cc561112af88c3028f6dfdb140acff67ede33a8e86be2dc6b8956f7" +dependencies = [ + "anyhow", + "termcolor", + "wasmparser", +] + +[[package]] +name = "wasmtime" +version = "23.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07232e0b473af36112da7348f51e73fa8b11047a6cb546096da3812930b7c93a" +dependencies = [ + "addr2line", + "anyhow", + "async-trait", + "bitflags 2.6.0", + "bumpalo", + "cc", + "cfg-if 1.0.0", + "encoding_rs", + "fxprof-processed-profile", + "gimli", + "hashbrown 0.14.5", + "indexmap", + "ittapi", + "libc", + "libm", + "log", + "mach2", + "memfd", + "object", + "once_cell", + "paste", + "postcard", + "psm", + "rayon", + "rustix", + "semver", + "serde", + "serde_derive", + "serde_json", + "smallvec", + "sptr", + "target-lexicon", + "wasm-encoder 0.212.0", + "wasmparser", + "wasmtime-asm-macros", + "wasmtime-cache", + "wasmtime-component-macro", + "wasmtime-component-util", + "wasmtime-cranelift", + "wasmtime-environ", + "wasmtime-fiber", + "wasmtime-jit-debug", + "wasmtime-jit-icache-coherence", + "wasmtime-slab", + "wasmtime-versioned-export-macros", + "wasmtime-winch", + "wat", + "windows-sys 0.52.0", +] + +[[package]] +name = "wasmtime-asm-macros" +version = "23.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5a9c42562d879c749288d9a26acc0d95d2ca069e30c2ec2efce84461c4d62b3" +dependencies = [ + "cfg-if 1.0.0", +] + +[[package]] +name = "wasmtime-cache" +version = "23.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38d5d5aac98c8ae87cf5244495da7722e3fa022aa6f3f4fcd5e3d6e5699ce422" +dependencies = [ + "anyhow", + "base64", + "directories-next", + "log", + "postcard", + "rustix", + "serde", + "serde_derive", + "sha2", + "toml", + "windows-sys 0.52.0", + "zstd", +] + +[[package]] +name = "wasmtime-component-macro" +version = "23.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0c3f57c4bc96f9b4a6ff4d6cb6e837913eff32e98d09e2b6d79b5c4647b415b" +dependencies = [ + "anyhow", + "proc-macro2", + "quote", + "syn 2.0.77", + "wasmtime-component-util", + "wasmtime-wit-bindgen", + "wit-parser", +] + +[[package]] +name = "wasmtime-component-util" +version = "23.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1da707969bc31a565da9b32d087eb2370c95c6f2087c5539a15f2e3b27e77203" + +[[package]] +name = "wasmtime-cranelift" +version = "23.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62cb6135ec46994299be711b78b03acaa9480de3715f827d450f0c947a84977c" +dependencies = [ + "anyhow", + "cfg-if 1.0.0", + "cranelift-codegen", + "cranelift-control", + "cranelift-entity", + "cranelift-frontend", + "cranelift-native", + "cranelift-wasm", + "gimli", + "log", + "object", + "target-lexicon", + "thiserror", + "wasmparser", + "wasmtime-environ", + "wasmtime-versioned-export-macros", +] + +[[package]] +name = "wasmtime-environ" +version = "23.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9bcaa3b42a0718e9123da7fb75e8e13fc95df7db2a7e32e2f2f4f0d3333b7d6f" +dependencies = [ + "anyhow", + "cpp_demangle", + "cranelift-bitset", + "cranelift-entity", + "gimli", + "indexmap", + "log", + "object", + "postcard", + "rustc-demangle", + "semver", + "serde", + "serde_derive", + "target-lexicon", + "wasm-encoder 0.212.0", + "wasmparser", + "wasmprinter", + "wasmtime-component-util", + "wasmtime-types", +] + +[[package]] +name = "wasmtime-fiber" +version = "23.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1c805515f4bc157f70f998038951009d21a19c1ef8c5fbb374a11b1d56672" +dependencies = [ + "anyhow", + "cc", + "cfg-if 1.0.0", + "rustix", + "wasmtime-asm-macros", + "wasmtime-versioned-export-macros", + "windows-sys 0.52.0", +] + +[[package]] +name = "wasmtime-jit-debug" +version = "23.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "118e141e52f3898a531a612985bd09a5e05a1d646cad2f30a3020b675c21cd49" +dependencies = [ + "object", + "once_cell", + "rustix", + "wasmtime-versioned-export-macros", +] + +[[package]] +name = "wasmtime-jit-icache-coherence" +version = "23.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2cfee42dac5148fc2664ab1f5cb8d7fa77a28d1a2cf1d9483abc2c3d751a58b9" +dependencies = [ + "anyhow", + "cfg-if 1.0.0", + "libc", + "windows-sys 0.52.0", +] + +[[package]] +name = "wasmtime-slab" +version = "23.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42eb8f6515708ec67974998c3e644101db4186308985f5ef7c2ef324ff33c948" + +[[package]] +name = "wasmtime-types" +version = "23.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "046873fb8fb3e9652f3fd76fe99c8c8129007695c3d73b2e307fdae40f6e324c" +dependencies = [ + "anyhow", + "cranelift-entity", + "serde", + "serde_derive", + "smallvec", + "wasmparser", +] + +[[package]] +name = "wasmtime-versioned-export-macros" +version = "23.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "99c02af2e9dbeb427304d1a08787d70ed0dbfec1af2236616f84c9f1f03e7969" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.77", +] + +[[package]] +name = "wasmtime-winch" +version = "23.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2ceddc47a49af10908a288fdfdc296ab3932062cab62a785e3705bbb3709c59" +dependencies = [ + "anyhow", + "cranelift-codegen", + "gimli", + "object", + "target-lexicon", + "wasmparser", + "wasmtime-cranelift", + "wasmtime-environ", + "winch-codegen", +] + +[[package]] +name = "wasmtime-wit-bindgen" +version = "23.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75f528f8b8a2376a3dacaf497d960216dd466d324425361e1e00e26de0a7705c" +dependencies = [ + "anyhow", + "heck 0.4.1", + "indexmap", + "wit-parser", +] + +[[package]] +name = "wast" +version = "217.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79004ecebded92d3c710d4841383368c7f04b63d0992ddd6b0c7d5029b7629b7" +dependencies = [ + "bumpalo", + "leb128", + "memchr", + "unicode-width", + "wasm-encoder 0.217.0", +] + +[[package]] +name = "wat" +version = "1.217.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c126271c3d92ca0f7c63e4e462e40c69cca52fd4245fcda730d1cf558fb55088" +dependencies = [ + "wast", +] + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-util" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" +dependencies = [ + "windows-sys 0.59.0", +] + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "winch-codegen" +version = "0.21.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a41b67a37ea74e83c38ef495cc213aba73385236b1deee883dc869e835003b9" +dependencies = [ + "anyhow", + "cranelift-codegen", + "gimli", + "regalloc2", + "smallvec", + "target-lexicon", + "wasmparser", + "wasmtime-cranelift", + "wasmtime-environ", +] + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + +[[package]] +name = "winnow" +version = "0.6.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68a9bda4691f099d435ad181000724da8e5899daa10713c2d432552b9ccd3a6f" +dependencies = [ + "memchr", +] + +[[package]] +name = "wit-parser" +version = "0.212.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ceeb0424aa8679f3fcf2d6e3cfa381f3d6fa6179976a2c05a6249dd2bb426716" +dependencies = [ + "anyhow", + "id-arena", + "indexmap", + "log", + "semver", + "serde", + "serde_derive", + "serde_json", + "unicode-xid", + "wasmparser", +] + +[[package]] +name = "zerocopy" +version = "0.7.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" +dependencies = [ + "byteorder", + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.7.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.77", +] + +[[package]] +name = "zstd" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fcf2b778a664581e31e389454a7072dab1647606d44f7feea22cd5abb9c9f3f9" +dependencies = [ + "zstd-safe", +] + +[[package]] +name = "zstd-safe" +version = "7.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "54a3ab4db68cea366acc5c897c7b4d4d1b8994a9cd6e6f841f8964566a419059" +dependencies = [ + "zstd-sys", +] + +[[package]] +name = "zstd-sys" +version = "2.0.13+zstd.1.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38ff0f21cfee8f97d94cef41359e0c89aa6113028ab0291aa8ca0038995a95aa" +dependencies = [ + "cc", + "pkg-config", +] diff --git a/crates/prompt_gateway/Cargo.toml b/crates/prompt_gateway/Cargo.toml new file mode 100644 index 00000000..29d385b7 --- /dev/null +++ b/crates/prompt_gateway/Cargo.toml @@ -0,0 +1,28 @@ +[package] +name = "prompt_gateway" +version = "0.1.0" +authors = ["Katanemo Inc "] +edition = "2021" + +[lib] +crate-type = ["cdylib"] + +[dependencies] +proxy-wasm = "0.2.1" +log = "0.4" +serde = { version = "1.0", features = ["derive"] } +serde_yaml = "0.9.34" +serde_json = "1.0" +md5 = "0.7.0" +common = { path = "../common" } +http = "1.1.0" +governor = { version = "0.6.3", default-features = false, features = ["no_std"]} +acap = "0.3.0" +rand = "0.8.5" +thiserror = "1.0.64" +derivative = "2.2.0" +sha2 = "0.10.8" + +[dev-dependencies] +proxy-wasm-test-framework = { git = "https://github.com/katanemo/test-framework.git", branch = "new" } +serial_test = "3.1.1" diff --git a/crates/prompt_gateway/src/filter_context.rs b/crates/prompt_gateway/src/filter_context.rs new file mode 100644 index 00000000..5d0090a7 --- /dev/null +++ b/crates/prompt_gateway/src/filter_context.rs @@ -0,0 +1,322 @@ +use crate::stream_context::StreamContext; +use common::common_types::EmbeddingType; +use common::configuration::{Configuration, GatewayMode, Overrides, PromptGuards, PromptTarget}; +use common::consts::ARCH_INTERNAL_CLUSTER_NAME; +use common::consts::ARCH_UPSTREAM_HOST_HEADER; +use common::consts::DEFAULT_EMBEDDING_MODEL; +use common::consts::MODEL_SERVER_NAME; +use common::embeddings::{ + CreateEmbeddingRequest, CreateEmbeddingRequestInput, CreateEmbeddingResponse, +}; +use common::http::CallArgs; +use common::http::Client; +use common::llm_providers::LlmProviders; +use common::ratelimit; +use common::stats::Counter; +use common::stats::Gauge; +use common::stats::IncrementingMetric; +use log::debug; +use proxy_wasm::traits::*; +use proxy_wasm::types::*; +use std::cell::RefCell; +use std::collections::hash_map::Entry; +use std::collections::HashMap; +use std::rc::Rc; +use std::time::Duration; + +#[derive(Copy, Clone, Debug)] +pub struct WasmMetrics { + pub active_http_calls: Gauge, + pub ratelimited_rq: Counter, +} + +impl WasmMetrics { + fn new() -> WasmMetrics { + WasmMetrics { + active_http_calls: Gauge::new(String::from("active_http_calls")), + ratelimited_rq: Counter::new(String::from("ratelimited_rq")), + } + } +} + +pub type EmbeddingTypeMap = HashMap>; +pub type EmbeddingsStore = HashMap; + +#[derive(Debug)] +pub struct FilterCallContext { + pub prompt_target_name: String, + pub embedding_type: EmbeddingType, +} + +#[derive(Debug)] +pub struct FilterContext { + metrics: Rc, + // callouts stores token_id to request mapping that we use during #on_http_call_response to match the response to the request. + callouts: RefCell>, + overrides: Rc>, + system_prompt: Rc>, + prompt_targets: Rc>, + mode: GatewayMode, + prompt_guards: Rc, + llm_providers: Option>, + embeddings_store: Option>, + temp_embeddings_store: EmbeddingsStore, +} + +impl FilterContext { + pub fn new() -> FilterContext { + FilterContext { + callouts: RefCell::new(HashMap::new()), + metrics: Rc::new(WasmMetrics::new()), + system_prompt: Rc::new(None), + prompt_targets: Rc::new(HashMap::new()), + overrides: Rc::new(None), + prompt_guards: Rc::new(PromptGuards::default()), + mode: GatewayMode::Prompt, + llm_providers: None, + embeddings_store: Some(Rc::new(HashMap::new())), + temp_embeddings_store: HashMap::new(), + } + } + + fn process_prompt_targets(&self) { + for values in self.prompt_targets.iter() { + let prompt_target = values.1; + self.schedule_embeddings_call( + &prompt_target.name, + &prompt_target.description, + EmbeddingType::Description, + ); + } + } + + fn schedule_embeddings_call( + &self, + prompt_target_name: &str, + input: &str, + embedding_type: EmbeddingType, + ) { + let embeddings_input = CreateEmbeddingRequest { + input: Box::new(CreateEmbeddingRequestInput::String(String::from(input))), + model: String::from(DEFAULT_EMBEDDING_MODEL), + encoding_format: None, + dimensions: None, + user: None, + }; + let json_data = serde_json::to_string(&embeddings_input).unwrap(); + + let call_args = CallArgs::new( + ARCH_INTERNAL_CLUSTER_NAME, + "/embeddings", + vec![ + (ARCH_UPSTREAM_HOST_HEADER, MODEL_SERVER_NAME), + (":method", "POST"), + (":path", "/embeddings"), + (":authority", MODEL_SERVER_NAME), + ("content-type", "application/json"), + ("x-envoy-upstream-rq-timeout-ms", "60000"), + ], + Some(json_data.as_bytes()), + vec![], + Duration::from_secs(60), + ); + + let call_context = crate::filter_context::FilterCallContext { + prompt_target_name: String::from(prompt_target_name), + embedding_type, + }; + + if let Err(error) = self.http_call(call_args, call_context) { + panic!("{error}") + } + } + + fn embedding_response_handler( + &mut self, + body_size: usize, + embedding_type: EmbeddingType, + prompt_target_name: String, + ) { + let prompt_target = self + .prompt_targets + .get(&prompt_target_name) + .unwrap_or_else(|| { + panic!( + "Received embeddings response for unknown prompt target name={}", + prompt_target_name + ) + }); + + let body = self + .get_http_call_response_body(0, body_size) + .expect("No body in response"); + if !body.is_empty() { + let mut embedding_response: CreateEmbeddingResponse = + match serde_json::from_slice(&body) { + Ok(response) => response, + Err(e) => { + panic!( + "Error deserializing embedding response. body: {:?}: {:?}", + String::from_utf8(body).unwrap(), + e + ); + } + }; + + let embeddings = embedding_response.data.remove(0).embedding; + debug!( + "Adding embeddings for prompt target name: {:?}, description: {:?}, embedding type: {:?}", + prompt_target.name, + prompt_target.description, + embedding_type + ); + + let entry = self.temp_embeddings_store.entry(prompt_target_name); + match entry { + Entry::Occupied(_) => { + entry.and_modify(|e| { + if let Entry::Vacant(e) = e.entry(embedding_type) { + e.insert(embeddings); + } else { + panic!( + "Duplicate {:?} for prompt target with name=\"{}\"", + &embedding_type, prompt_target.name + ) + } + }); + } + Entry::Vacant(_) => { + entry.or_insert(HashMap::from([(embedding_type, embeddings)])); + } + } + + if self.prompt_targets.len() == self.temp_embeddings_store.len() { + self.embeddings_store = + Some(Rc::new(std::mem::take(&mut self.temp_embeddings_store))) + } + } + } +} + +impl Client for FilterContext { + type CallContext = FilterCallContext; + + fn callouts(&self) -> &RefCell> { + &self.callouts + } + + fn active_http_calls(&self) -> &Gauge { + &self.metrics.active_http_calls + } +} + +impl Context for FilterContext { + fn on_http_call_response( + &mut self, + token_id: u32, + _num_headers: usize, + body_size: usize, + _num_trailers: usize, + ) { + debug!( + "filter_context: on_http_call_response called with token_id: {:?}", + token_id + ); + let callout_data = self + .callouts + .borrow_mut() + .remove(&token_id) + .expect("invalid token_id"); + + self.metrics.active_http_calls.increment(-1); + + self.embedding_response_handler( + body_size, + callout_data.embedding_type, + callout_data.prompt_target_name, + ) + } +} + +// RootContext allows the Rust code to reach into the Envoy Config +impl RootContext for FilterContext { + fn on_configure(&mut self, _: usize) -> bool { + let config_bytes = self + .get_plugin_configuration() + .expect("Arch config cannot be empty"); + + let config: Configuration = match serde_yaml::from_slice(&config_bytes) { + Ok(config) => config, + Err(err) => panic!("Invalid arch config \"{:?}\"", err), + }; + + self.overrides = Rc::new(config.overrides); + + let mut prompt_targets = HashMap::new(); + for pt in config.prompt_targets { + prompt_targets.insert(pt.name.clone(), pt.clone()); + } + self.system_prompt = Rc::new(config.system_prompt); + self.prompt_targets = Rc::new(prompt_targets); + self.mode = config.mode.unwrap_or_default(); + + ratelimit::ratelimits(Some(config.ratelimits.unwrap_or_default())); + + if let Some(prompt_guards) = config.prompt_guards { + self.prompt_guards = Rc::new(prompt_guards) + } + + match config.llm_providers.try_into() { + Ok(llm_providers) => self.llm_providers = Some(Rc::new(llm_providers)), + Err(err) => panic!("{err}"), + } + + true + } + + fn create_http_context(&self, context_id: u32) -> Option> { + debug!( + "||| create_http_context called with context_id: {:?} |||", + context_id + ); + + // No StreamContext can be created until the Embedding Store is fully initialized. + let embedding_store = match self.mode { + GatewayMode::Llm => None, + GatewayMode::Prompt => Some(Rc::clone(self.embeddings_store.as_ref().unwrap())), + }; + Some(Box::new(StreamContext::new( + context_id, + Rc::clone(&self.metrics), + Rc::clone(&self.system_prompt), + Rc::clone(&self.prompt_targets), + Rc::clone(&self.prompt_guards), + Rc::clone(&self.overrides), + Rc::clone( + self.llm_providers + .as_ref() + .expect("LLM Providers must exist when Streams are being created"), + ), + embedding_store, + self.mode.clone(), + ))) + } + + fn get_type(&self) -> Option { + Some(ContextType::HttpContext) + } + + fn on_vm_start(&mut self, _: usize) -> bool { + self.set_tick_period(Duration::from_secs(1)); + true + } + + fn on_tick(&mut self) { + debug!("starting up arch filter in mode: {:?}", self.mode); + if self.mode == GatewayMode::Prompt { + self.process_prompt_targets(); + } + + self.set_tick_period(Duration::from_secs(0)); + } +} diff --git a/crates/prompt_gateway/src/lib.rs b/crates/prompt_gateway/src/lib.rs new file mode 100644 index 00000000..e2ad9025 --- /dev/null +++ b/crates/prompt_gateway/src/lib.rs @@ -0,0 +1,13 @@ +use filter_context::FilterContext; +use proxy_wasm::traits::*; +use proxy_wasm::types::*; + +mod filter_context; +mod stream_context; + +proxy_wasm::main! {{ + proxy_wasm::set_log_level(LogLevel::Trace); + proxy_wasm::set_root_context(|_| -> Box { + Box::new(FilterContext::new()) + }); +}} diff --git a/crates/prompt_gateway/src/stream_context.rs b/crates/prompt_gateway/src/stream_context.rs new file mode 100644 index 00000000..5e4e6149 --- /dev/null +++ b/crates/prompt_gateway/src/stream_context.rs @@ -0,0 +1,1576 @@ +use crate::filter_context::{EmbeddingsStore, WasmMetrics}; +use acap::cos; +use common::common_types::open_ai::{ + ArchState, ChatCompletionChunkResponse, ChatCompletionTool, ChatCompletionsRequest, + ChatCompletionsResponse, Choice, FunctionDefinition, FunctionParameter, FunctionParameters, + Message, ParameterType, StreamOptions, ToolCall, ToolCallState, ToolType, +}; +use common::common_types::{ + EmbeddingType, HallucinationClassificationRequest, HallucinationClassificationResponse, + PromptGuardRequest, PromptGuardResponse, PromptGuardTask, ZeroShotClassificationRequest, + ZeroShotClassificationResponse, +}; +use common::configuration::{GatewayMode, LlmProvider}; +use common::configuration::{Overrides, PromptGuards, PromptTarget}; +use common::consts::{ + ARCH_FC_MODEL_NAME, ARCH_FC_REQUEST_TIMEOUT_MS, ARCH_INTERNAL_CLUSTER_NAME, + ARCH_LLM_UPSTREAM_LISTENER, ARCH_MESSAGES_KEY, ARCH_MODEL_PREFIX, ARCH_PROVIDER_HINT_HEADER, + ARCH_ROUTING_HEADER, ARCH_STATE_HEADER, ARCH_UPSTREAM_HOST_HEADER, ARC_FC_CLUSTER, + CHAT_COMPLETIONS_PATH, DEFAULT_EMBEDDING_MODEL, DEFAULT_HALLUCINATED_THRESHOLD, + DEFAULT_INTENT_MODEL, DEFAULT_PROMPT_TARGET_THRESHOLD, GPT_35_TURBO, MODEL_SERVER_NAME, + RATELIMIT_SELECTOR_HEADER_KEY, REQUEST_ID_HEADER, SYSTEM_ROLE, USER_ROLE, +}; +use common::embeddings::{ + CreateEmbeddingRequest, CreateEmbeddingRequestInput, CreateEmbeddingResponse, +}; +use common::http::{CallArgs, Client, ClientError}; +use common::llm_providers::LlmProviders; +use common::ratelimit::Header; +use common::stats::Gauge; +use common::{ratelimit, routing, tokenizer}; +use http::StatusCode; +use log::{debug, info, warn}; +use proxy_wasm::traits::*; +use proxy_wasm::types::*; +use serde_json::Value; +use sha2::{Digest, Sha256}; +use std::cell::RefCell; +use std::collections::HashMap; +use std::num::NonZero; +use std::rc::Rc; +use std::time::Duration; + +use common::stats::IncrementingMetric; + +#[derive(Debug, Clone)] +enum ResponseHandlerType { + GetEmbeddings, + FunctionResolver, + FunctionCall, + ZeroShotIntent, + HallucinationDetect, + ArchGuard, + DefaultTarget, +} + +#[derive(Debug, Clone)] +pub struct StreamCallContext { + response_handler_type: ResponseHandlerType, + user_message: Option, + prompt_target_name: Option, + request_body: ChatCompletionsRequest, + tool_calls: Option>, + similarity_scores: Option>, + upstream_cluster: Option, + upstream_cluster_path: Option, +} + +#[derive(thiserror::Error, Debug)] +pub enum ServerError { + #[error(transparent)] + HttpDispatch(ClientError), + #[error(transparent)] + Deserialization(serde_json::Error), + #[error(transparent)] + Serialization(serde_json::Error), + #[error("{0}")] + LogicError(String), + #[error("upstream error response authority={authority}, path={path}, status={status}")] + Upstream { + authority: String, + path: String, + status: String, + }, + #[error(transparent)] + ExceededRatelimit(ratelimit::Error), + #[error("jailbreak detected: {0}")] + Jailbreak(String), + #[error("{why}")] + BadRequest { why: String }, + #[error("{why}")] + NoMessagesFound { why: String }, +} + +pub struct StreamContext { + context_id: u32, + metrics: Rc, + system_prompt: Rc>, + prompt_targets: Rc>, + embeddings_store: Option>, + overrides: Rc>, + callouts: RefCell>, + tool_calls: Option>, + tool_call_response: Option, + arch_state: Option>, + request_body_size: usize, + ratelimit_selector: Option
, + streaming_response: bool, + user_prompt: Option, + response_tokens: usize, + is_chat_completions_request: bool, + chat_completions_request: Option, + prompt_guards: Rc, + llm_providers: Rc, + llm_provider: Option>, + request_id: Option, + mode: GatewayMode, +} + +impl StreamContext { + #[allow(clippy::too_many_arguments)] + pub fn new( + context_id: u32, + metrics: Rc, + system_prompt: Rc>, + prompt_targets: Rc>, + prompt_guards: Rc, + overrides: Rc>, + llm_providers: Rc, + embeddings_store: Option>, + mode: GatewayMode, + ) -> Self { + StreamContext { + context_id, + metrics, + system_prompt, + prompt_targets, + embeddings_store, + callouts: RefCell::new(HashMap::new()), + chat_completions_request: None, + tool_calls: None, + tool_call_response: None, + arch_state: None, + request_body_size: 0, + ratelimit_selector: None, + streaming_response: false, + user_prompt: None, + response_tokens: 0, + is_chat_completions_request: false, + llm_providers, + llm_provider: None, + prompt_guards, + overrides, + request_id: None, + mode, + } + } + fn llm_provider(&self) -> &LlmProvider { + self.llm_provider + .as_ref() + .expect("the provider should be set when asked for it") + } + + fn embeddings_store(&self) -> &EmbeddingsStore { + self.embeddings_store + .as_ref() + .expect("embeddings store is not set") + } + + fn select_llm_provider(&mut self) { + let provider_hint = self + .get_http_request_header(ARCH_PROVIDER_HINT_HEADER) + .map(|provider_name| provider_name.into()); + + debug!("llm provider hint: {:?}", provider_hint); + self.llm_provider = Some(routing::get_llm_provider( + &self.llm_providers, + provider_hint, + )); + debug!("selected llm: {}", self.llm_provider.as_ref().unwrap().name); + } + + fn add_routing_header(&mut self) { + match self.mode { + GatewayMode::Prompt => { + // in prompt gateway mode, we need to route to llm upstream listener + self.add_http_request_header(ARCH_UPSTREAM_HOST_HEADER, ARCH_LLM_UPSTREAM_LISTENER); + } + _ => { + self.add_http_request_header(ARCH_ROUTING_HEADER, &self.llm_provider().name); + } + } + } + + fn modify_auth_headers(&mut self) -> Result<(), ServerError> { + let llm_provider_api_key_value = + self.llm_provider() + .access_key + .as_ref() + .ok_or(ServerError::BadRequest { + why: format!( + "No access key configured for selected LLM Provider \"{}\"", + self.llm_provider() + ), + })?; + + let authorization_header_value = format!("Bearer {}", llm_provider_api_key_value); + + self.set_http_request_header("Authorization", Some(&authorization_header_value)); + + Ok(()) + } + + fn delete_content_length_header(&mut self) { + // Remove the Content-Length header because further body manipulations in the gateway logic will invalidate it. + // Server's generally throw away requests whose body length do not match the Content-Length header. + // However, a missing Content-Length header is not grounds for bad requests given that intermediary hops could + // manipulate the body in benign ways e.g., compression. + self.set_http_request_header("content-length", None); + } + + fn save_ratelimit_header(&mut self) { + self.ratelimit_selector = self + .get_http_request_header(RATELIMIT_SELECTOR_HEADER_KEY) + .and_then(|key| { + self.get_http_request_header(&key) + .map(|value| Header { key, value }) + }); + } + + fn send_server_error(&self, error: ServerError, override_status_code: Option) { + debug!("server error occurred: {}", error); + self.send_http_response( + override_status_code + .unwrap_or(StatusCode::INTERNAL_SERVER_ERROR) + .as_u16() + .into(), + vec![], + Some(format!("{error}").as_bytes()), + ); + } + + fn embeddings_handler(&mut self, body: Vec, mut callout_context: StreamCallContext) { + let embedding_response: CreateEmbeddingResponse = match serde_json::from_slice(&body) { + Ok(embedding_response) => embedding_response, + Err(e) => { + return self.send_server_error(ServerError::Deserialization(e), None); + } + }; + + let prompt_embeddings_vector = &embedding_response.data[0].embedding; + + debug!( + "embedding model: {}, vector length: {:?}", + embedding_response.model, + prompt_embeddings_vector.len() + ); + + let prompt_target_names = self + .prompt_targets + .iter() + // exclude default target + .filter(|(_, prompt_target)| !prompt_target.default.unwrap_or(false)) + .map(|(name, _)| name.clone()) + .collect(); + + let similarity_scores: Vec<(String, f64)> = self + .prompt_targets + .iter() + // exclude default prompt target + .filter(|(_, prompt_target)| !prompt_target.default.unwrap_or(false)) + .map(|(prompt_name, _)| { + let pte = match self.embeddings_store().get(prompt_name) { + Some(embeddings) => embeddings, + None => { + warn!( + "embeddings not found for prompt target name: {}", + prompt_name + ); + return (prompt_name.clone(), f64::NAN); + } + }; + + let description_embeddings = match pte.get(&EmbeddingType::Description) { + Some(embeddings) => embeddings, + None => { + warn!( + "description embeddings not found for prompt target name: {}", + prompt_name + ); + return (prompt_name.clone(), f64::NAN); + } + }; + let similarity_score_description = + cos::cosine_similarity(&prompt_embeddings_vector, &description_embeddings); + (prompt_name.clone(), similarity_score_description) + }) + .collect(); + + debug!( + "similarity scores based on description embeddings match: {:?}", + similarity_scores + ); + + callout_context.similarity_scores = Some(similarity_scores); + + let zero_shot_classification_request = ZeroShotClassificationRequest { + // Need to clone into input because user_message is used below. + input: callout_context.user_message.as_ref().unwrap().clone(), + model: String::from(DEFAULT_INTENT_MODEL), + labels: prompt_target_names, + }; + + let json_data: String = match serde_json::to_string(&zero_shot_classification_request) { + Ok(json_data) => json_data, + Err(error) => { + return self.send_server_error(ServerError::Serialization(error), None); + } + }; + + let mut headers = vec![ + (ARCH_UPSTREAM_HOST_HEADER, MODEL_SERVER_NAME), + (":method", "POST"), + (":path", "/zeroshot"), + (":authority", MODEL_SERVER_NAME), + ("content-type", "application/json"), + ("x-envoy-max-retries", "3"), + ("x-envoy-upstream-rq-timeout-ms", "60000"), + ]; + + if self.request_id.is_some() { + headers.push((REQUEST_ID_HEADER, self.request_id.as_ref().unwrap())); + } + + let call_args = CallArgs::new( + ARCH_INTERNAL_CLUSTER_NAME, + "/zeroshot", + headers, + Some(json_data.as_bytes()), + vec![], + Duration::from_secs(5), + ); + callout_context.response_handler_type = ResponseHandlerType::ZeroShotIntent; + + if let Err(e) = self.http_call(call_args, callout_context) { + self.send_server_error(ServerError::HttpDispatch(e), None); + } + } + + fn hallucination_classification_resp_handler( + &mut self, + body: Vec, + callout_context: StreamCallContext, + ) { + let hallucination_response: HallucinationClassificationResponse = + match serde_json::from_slice(&body) { + Ok(hallucination_response) => hallucination_response, + Err(e) => { + return self.send_server_error(ServerError::Deserialization(e), None); + } + }; + let mut keys_with_low_score: Vec = Vec::new(); + for (key, value) in &hallucination_response.params_scores { + if *value < DEFAULT_HALLUCINATED_THRESHOLD { + debug!( + "hallucination detected: score for {} : {} is less than threshold {}", + key, value, DEFAULT_HALLUCINATED_THRESHOLD + ); + keys_with_low_score.push(key.clone().to_string()); + } + } + + if !keys_with_low_score.is_empty() { + let response = + "It seems I’m missing some information. Could you provide the following details: " + .to_string() + + &keys_with_low_score.join(", ") + + " ?"; + let message = Message { + role: SYSTEM_ROLE.to_string(), + content: Some(response), + model: Some(ARCH_FC_MODEL_NAME.to_string()), + tool_calls: None, + }; + + let chat_completion_response = ChatCompletionsResponse { + choices: vec![Choice { + message, + index: 0, + finish_reason: "done".to_string(), + }], + usage: None, + model: ARCH_FC_MODEL_NAME.to_string(), + metadata: None, + }; + + debug!("hallucination response: {:?}", chat_completion_response); + self.send_http_response( + StatusCode::OK.as_u16().into(), + vec![("Powered-By", "Katanemo")], + Some( + serde_json::to_string(&chat_completion_response) + .unwrap() + .as_bytes(), + ), + ); + } else { + // not a hallucination, resume the flow + self.schedule_api_call_request(callout_context); + } + } + + fn zero_shot_intent_detection_resp_handler( + &mut self, + body: Vec, + mut callout_context: StreamCallContext, + ) { + let zeroshot_intent_response: ZeroShotClassificationResponse = + match serde_json::from_slice(&body) { + Ok(zeroshot_response) => zeroshot_response, + Err(e) => { + return self.send_server_error(ServerError::Deserialization(e), None); + } + }; + + debug!("zeroshot intent response: {:?}", zeroshot_intent_response); + + let desc_emb_similarity_map: HashMap = callout_context + .similarity_scores + .clone() + .unwrap() + .into_iter() + .collect(); + + let pred_class_desc_emb_similarity = desc_emb_similarity_map + .get(&zeroshot_intent_response.predicted_class) + .unwrap(); + + let prompt_target_similarity_score = zeroshot_intent_response.predicted_class_score * 0.7 + + pred_class_desc_emb_similarity * 0.3; + + debug!( + "similarity score: {:.3}, intent score: {:.3}, description embedding score: {:.3}, prompt: {}", + prompt_target_similarity_score, + zeroshot_intent_response.predicted_class_score, + pred_class_desc_emb_similarity, + callout_context.user_message.as_ref().unwrap() + ); + + let prompt_target_name = zeroshot_intent_response.predicted_class.clone(); + + // Check to see who responded to user message. This will help us identify if control should be passed to Arch FC or not. + // If the last message was from Arch FC, then Arch FC is handling the conversation (possibly for parameter collection). + let mut arch_assistant = false; + let messages = &callout_context.request_body.messages; + if messages.len() >= 2 { + let latest_assistant_message = &messages[messages.len() - 2]; + if let Some(model) = latest_assistant_message.model.as_ref() { + if model.contains(ARCH_MODEL_PREFIX) { + arch_assistant = true; + } + } + } else { + info!("no assistant message found, probably first interaction"); + } + + // get prompt target similarity thresold from overrides + let prompt_target_intent_matching_threshold = match self.overrides.as_ref() { + Some(overrides) => match overrides.prompt_target_intent_matching_threshold { + Some(threshold) => threshold, + None => DEFAULT_PROMPT_TARGET_THRESHOLD, + }, + None => DEFAULT_PROMPT_TARGET_THRESHOLD, + }; + + // check to ensure that the prompt target similarity score is above the threshold + if prompt_target_similarity_score < prompt_target_intent_matching_threshold + || arch_assistant + { + debug!("intent score is low or arch assistant is handling the conversation"); + // if arch fc responded to the user message, then we don't need to check the similarity score + // it may be that arch fc is handling the conversation for parameter collection + if arch_assistant { + info!("arch assistant is handling the conversation"); + } else { + debug!("checking for default prompt target"); + if let Some(default_prompt_target) = self + .prompt_targets + .values() + .find(|pt| pt.default.unwrap_or(false)) + { + debug!("default prompt target found"); + let endpoint = default_prompt_target.endpoint.clone().unwrap(); + let upstream_path: String = endpoint.path.unwrap_or(String::from("/")); + + let upstream_endpoint = endpoint.name; + let mut params = HashMap::new(); + params.insert( + ARCH_MESSAGES_KEY.to_string(), + callout_context.request_body.messages.clone(), + ); + let arch_messages_json = serde_json::to_string(¶ms).unwrap(); + debug!("no prompt target found with similarity score above threshold, using default prompt target"); + + let timeout_str = ARCH_FC_REQUEST_TIMEOUT_MS.to_string(); + + let mut headers = vec![ + (":method", "POST"), + (ARCH_UPSTREAM_HOST_HEADER, &upstream_endpoint), + (":path", &upstream_path), + (":authority", &upstream_endpoint), + ("content-type", "application/json"), + ("x-envoy-max-retries", "3"), + ("x-envoy-upstream-rq-timeout-ms", timeout_str.as_str()), + ]; + + if self.request_id.is_some() { + headers.push((REQUEST_ID_HEADER, self.request_id.as_ref().unwrap())); + } + + let call_args = CallArgs::new( + ARCH_INTERNAL_CLUSTER_NAME, + &upstream_path, + headers, + Some(arch_messages_json.as_bytes()), + vec![], + Duration::from_secs(5), + ); + callout_context.response_handler_type = ResponseHandlerType::DefaultTarget; + callout_context.prompt_target_name = Some(default_prompt_target.name.clone()); + + if let Err(e) = self.http_call(call_args, callout_context) { + return self.send_server_error( + ServerError::HttpDispatch(e), + Some(StatusCode::BAD_REQUEST), + ); + } + } + + self.resume_http_request(); + return; + } + } + + let prompt_target = match self.prompt_targets.get(&prompt_target_name) { + Some(prompt_target) => prompt_target.clone(), + None => { + return self.send_server_error( + ServerError::LogicError(format!( + "Prompt target not found: {prompt_target_name}" + )), + None, + ); + } + }; + + info!("prompt_target name: {:?}", prompt_target_name); + let mut chat_completion_tools: Vec = Vec::new(); + for pt in self.prompt_targets.values() { + if pt.default.unwrap_or_default() { + continue; + } + // only extract entity names + let properties: HashMap = match pt.parameters { + // Clone is unavoidable here because we don't want to move the values out of the prompt target struct. + Some(ref entities) => { + let mut properties: HashMap = HashMap::new(); + for entity in entities.iter() { + let param = FunctionParameter { + parameter_type: ParameterType::from( + entity.parameter_type.clone().unwrap_or("str".to_string()), + ), + description: entity.description.clone(), + required: entity.required, + enum_values: entity.enum_values.clone(), + default: entity.default.clone(), + }; + properties.insert(entity.name.clone(), param); + } + properties + } + None => HashMap::new(), + }; + let tools_parameters = FunctionParameters { properties }; + + chat_completion_tools.push({ + ChatCompletionTool { + tool_type: ToolType::Function, + function: FunctionDefinition { + name: pt.name.clone(), + description: pt.description.clone(), + parameters: tools_parameters, + }, + } + }); + } + + // archfc handler needs state so it can expand tool calls + let mut metadata = HashMap::new(); + metadata.insert( + ARCH_STATE_HEADER.to_string(), + serde_json::to_string(&self.arch_state).unwrap(), + ); + + let chat_completions = ChatCompletionsRequest { + model: GPT_35_TURBO.to_string(), + messages: callout_context.request_body.messages.clone(), + tools: Some(chat_completion_tools), + stream: false, + stream_options: None, + metadata: Some(metadata), + }; + + let msg_body = match serde_json::to_string(&chat_completions) { + Ok(msg_body) => { + debug!("arch_fc request body content: {}", msg_body); + msg_body + } + Err(e) => { + return self.send_server_error(ServerError::Serialization(e), None); + } + }; + + let timeout_str = ARCH_FC_REQUEST_TIMEOUT_MS.to_string(); + + let mut headers = vec![ + (":method", "POST"), + (ARCH_UPSTREAM_HOST_HEADER, ARC_FC_CLUSTER), + (":path", "/v1/chat/completions"), + (":authority", ARC_FC_CLUSTER), + ("content-type", "application/json"), + ("x-envoy-max-retries", "3"), + ("x-envoy-upstream-rq-timeout-ms", timeout_str.as_str()), + ]; + + if self.request_id.is_some() { + headers.push((REQUEST_ID_HEADER, self.request_id.as_ref().unwrap())); + } + + let call_args = CallArgs::new( + ARCH_INTERNAL_CLUSTER_NAME, + "/v1/chat/completions", + headers, + Some(msg_body.as_bytes()), + vec![], + Duration::from_secs(5), + ); + callout_context.response_handler_type = ResponseHandlerType::FunctionResolver; + callout_context.prompt_target_name = Some(prompt_target.name); + + if let Err(e) = self.http_call(call_args, callout_context) { + self.send_server_error(ServerError::HttpDispatch(e), Some(StatusCode::BAD_REQUEST)); + } + } + + fn function_resolver_handler(&mut self, body: Vec, mut callout_context: StreamCallContext) { + let body_str = String::from_utf8(body).unwrap(); + debug!("arch <= app response body: {}", body_str); + + let arch_fc_response: ChatCompletionsResponse = match serde_json::from_str(&body_str) { + Ok(arch_fc_response) => arch_fc_response, + Err(e) => { + return self.send_server_error(ServerError::Deserialization(e), None); + } + }; + + let model_resp = &arch_fc_response.choices[0]; + + if model_resp.message.tool_calls.is_none() + || model_resp.message.tool_calls.as_ref().unwrap().is_empty() + { + // This means that Arch FC did not have enough information to resolve the function call + // Arch FC probably responded with a message asking for more information. + // Let's send the response back to the user to initalize lightweight dialog for parameter collection + + //TODO: add resolver name to the response so the client can send the response back to the correct resolver + + return self.send_http_response( + StatusCode::OK.as_u16().into(), + vec![("Powered-By", "Katanemo")], + Some(body_str.as_bytes()), + ); + } + + let tool_calls = model_resp.message.tool_calls.as_ref().unwrap(); + + // TODO CO: pass nli check + // If hallucination, pass chat template to check parameters + + // extract all tool names + let tool_names: Vec = tool_calls + .iter() + .map(|tool_call| tool_call.function.name.clone()) + .collect(); + + debug!( + "call context similarity score: {:?}", + callout_context.similarity_scores + ); + //HACK: for now we only support one tool call, we will support multiple tool calls in the future + let mut tool_params = tool_calls[0].function.arguments.clone(); + tool_params.insert( + String::from(ARCH_MESSAGES_KEY), + serde_yaml::to_value(&callout_context.request_body.messages).unwrap(), + ); + + let tools_call_name = tool_calls[0].function.name.clone(); + let tool_params_json_str = serde_json::to_string(&tool_params).unwrap(); + let prompt_target = self.prompt_targets.get(&tools_call_name).unwrap().clone(); + callout_context.tool_calls = Some(tool_calls.clone()); + + debug!( + "prompt_target_name: {}, tool_name(s): {:?}", + prompt_target.name, tool_names + ); + debug!("tool_params: {}", tool_params_json_str); + + if model_resp.message.tool_calls.is_some() + && !model_resp.message.tool_calls.as_ref().unwrap().is_empty() + { + use serde_json::Value; + let v: Value = serde_json::from_str(&tool_params_json_str).unwrap(); + let tool_params_dict: HashMap = match v.as_object() { + Some(obj) => obj + .iter() + .map(|(key, value)| { + // Convert each value to a string, regardless of its type + (key.clone(), value.to_string()) + }) + .collect(), + None => HashMap::new(), // Return an empty HashMap if v is not an object + }; + + let messages = &callout_context.request_body.messages; + let mut arch_assistant = false; + let mut user_messages = Vec::new(); + + if messages.len() >= 2 { + let latest_assistant_message = &messages[messages.len() - 2]; + if let Some(model) = latest_assistant_message.model.as_ref() { + if model.starts_with(ARCH_MODEL_PREFIX) { + arch_assistant = true; + } + } + } + if arch_assistant { + for message in messages.iter() { + if let Some(model) = message.model.as_ref() { + if !model.starts_with(ARCH_MODEL_PREFIX) { + break; + } + } + if message.role == "user" { + if let Some(content) = &message.content { + user_messages.push(content.clone()); + } + } + } + } else if let Some(user_message) = callout_context.user_message.as_ref() { + user_messages.push(user_message.clone()); + } + let user_messages_str = user_messages.join(", "); + debug!("user messages: {}", user_messages_str); + + let hallucination_classification_request = HallucinationClassificationRequest { + prompt: user_messages_str, + model: String::from(DEFAULT_INTENT_MODEL), + parameters: tool_params_dict, + }; + + let json_data: String = + match serde_json::to_string(&hallucination_classification_request) { + Ok(json_data) => json_data, + Err(error) => { + return self.send_server_error(ServerError::Serialization(error), None); + } + }; + + let mut headers = vec![ + (ARCH_UPSTREAM_HOST_HEADER, MODEL_SERVER_NAME), + (":method", "POST"), + (":path", "/hallucination"), + (":authority", MODEL_SERVER_NAME), + ("content-type", "application/json"), + ("x-envoy-max-retries", "3"), + ("x-envoy-upstream-rq-timeout-ms", "60000"), + ]; + + if self.request_id.is_some() { + headers.push((REQUEST_ID_HEADER, self.request_id.as_ref().unwrap())); + } + + let call_args = CallArgs::new( + ARCH_INTERNAL_CLUSTER_NAME, + "/hallucination", + headers, + Some(json_data.as_bytes()), + vec![], + Duration::from_secs(5), + ); + callout_context.response_handler_type = ResponseHandlerType::HallucinationDetect; + + if let Err(e) = self.http_call(call_args, callout_context) { + self.send_server_error(ServerError::HttpDispatch(e), None); + } + } else { + self.schedule_api_call_request(callout_context); + } + } + + fn schedule_api_call_request(&mut self, mut callout_context: StreamCallContext) { + let tools_call_name = callout_context.tool_calls.as_ref().unwrap()[0] + .function + .name + .clone(); + + let prompt_target = self.prompt_targets.get(&tools_call_name).unwrap().clone(); + + //HACK: for now we only support one tool call, we will support multiple tool calls in the future + let mut tool_params = callout_context.tool_calls.as_ref().unwrap()[0] + .function + .arguments + .clone(); + tool_params.insert( + String::from(ARCH_MESSAGES_KEY), + serde_yaml::to_value(&callout_context.request_body.messages).unwrap(), + ); + + let tool_params_json_str = serde_json::to_string(&tool_params).unwrap(); + + let endpoint = prompt_target.endpoint.unwrap(); + let path: String = endpoint.path.unwrap_or(String::from("/")); + + let mut headers = vec![ + (ARCH_UPSTREAM_HOST_HEADER, endpoint.name.as_str()), + (":method", "POST"), + (":path", &path), + (":authority", endpoint.name.as_str()), + ("content-type", "application/json"), + ("x-envoy-max-retries", "3"), + ]; + + if self.request_id.is_some() { + headers.push((REQUEST_ID_HEADER, self.request_id.as_ref().unwrap())); + } + + let call_args = CallArgs::new( + ARCH_INTERNAL_CLUSTER_NAME, + &path, + headers, + Some(tool_params_json_str.as_bytes()), + vec![], + Duration::from_secs(5), + ); + callout_context.upstream_cluster = Some(endpoint.name.clone()); + callout_context.upstream_cluster_path = Some(path.clone()); + callout_context.response_handler_type = ResponseHandlerType::FunctionCall; + + if let Err(e) = self.http_call(call_args, callout_context) { + self.send_server_error(ServerError::HttpDispatch(e), Some(StatusCode::BAD_REQUEST)); + } + } + + fn function_call_response_handler( + &mut self, + body: Vec, + mut callout_context: StreamCallContext, + ) { + if let Some(http_status) = self.get_http_call_response_header(":status") { + if http_status != StatusCode::OK.as_str() { + return self.send_server_error( + ServerError::Upstream { + authority: callout_context.upstream_cluster.unwrap(), + path: callout_context.upstream_cluster_path.unwrap(), + status: http_status, + }, + None, + ); + } + } else { + warn!("http status code not found in api response"); + } + let app_function_call_response_str: String = String::from_utf8(body).unwrap(); + self.tool_call_response = Some(app_function_call_response_str.clone()); + debug!( + "arch <= app response body: {}", + app_function_call_response_str + ); + let prompt_target_name = callout_context.prompt_target_name.unwrap(); + let prompt_target = self + .prompt_targets + .get(&prompt_target_name) + .unwrap() + .clone(); + + let mut messages: Vec = Vec::new(); + + // add system prompt + let system_prompt = match prompt_target.system_prompt.as_ref() { + None => self.system_prompt.as_ref().clone(), + Some(system_prompt) => Some(system_prompt.clone()), + }; + if system_prompt.is_some() { + let system_prompt_message = Message { + role: SYSTEM_ROLE.to_string(), + content: system_prompt, + model: None, + tool_calls: None, + }; + messages.push(system_prompt_message); + } + + messages.append(callout_context.request_body.messages.as_mut()); + + let user_message = match messages.pop() { + Some(user_message) => user_message, + None => { + return self.send_server_error( + ServerError::NoMessagesFound { + why: "no user messages found".to_string(), + }, + None, + ); + } + }; + + let final_prompt = format!( + "{}\ncontext: {}", + user_message.content.unwrap(), + app_function_call_response_str + ); + + // add original user prompt + messages.push({ + Message { + role: USER_ROLE.to_string(), + content: Some(final_prompt), + model: None, + tool_calls: None, + } + }); + + let chat_completions_request: ChatCompletionsRequest = ChatCompletionsRequest { + model: callout_context.request_body.model, + messages, + tools: None, + stream: callout_context.request_body.stream, + stream_options: callout_context.request_body.stream_options, + metadata: None, + }; + + let json_string = match serde_json::to_string(&chat_completions_request) { + Ok(json_string) => json_string, + Err(e) => { + return self.send_server_error(ServerError::Serialization(e), None); + } + }; + debug!("arch => openai request body: {}", json_string); + + // Tokenize and Ratelimit. + if let Err(e) = self.enforce_ratelimits(&chat_completions_request.model, &json_string) { + self.send_server_error( + ServerError::ExceededRatelimit(e), + Some(StatusCode::TOO_MANY_REQUESTS), + ); + self.metrics.ratelimited_rq.increment(1); + return; + } + + self.set_http_request_body(0, self.request_body_size, &json_string.into_bytes()); + self.resume_http_request(); + } + + fn enforce_ratelimits( + &mut self, + model: &str, + json_string: &str, + ) -> Result<(), ratelimit::Error> { + if let Some(selector) = self.ratelimit_selector.take() { + // Tokenize and Ratelimit. + if let Ok(token_count) = tokenizer::token_count(model, json_string) { + ratelimit::ratelimits(None).read().unwrap().check_limit( + model.to_owned(), + selector, + NonZero::new(token_count as u32).unwrap(), + )?; + } + } + Ok(()) + } + + fn arch_guard_handler(&mut self, body: Vec, callout_context: StreamCallContext) { + debug!("response received for arch guard"); + let prompt_guard_resp: PromptGuardResponse = serde_json::from_slice(&body).unwrap(); + debug!("prompt_guard_resp: {:?}", prompt_guard_resp); + + if prompt_guard_resp.jailbreak_verdict.unwrap_or_default() { + //TODO: handle other scenarios like forward to error target + let msg = self + .prompt_guards + .jailbreak_on_exception_message() + .unwrap_or("refrain from discussing jailbreaking."); + return self.send_server_error( + ServerError::Jailbreak(String::from(msg)), + Some(StatusCode::BAD_REQUEST), + ); + } + + self.get_embeddings(callout_context); + } + + fn get_embeddings(&mut self, callout_context: StreamCallContext) { + let user_message = callout_context.user_message.unwrap(); + let get_embeddings_input = CreateEmbeddingRequest { + // Need to clone into input because user_message is used below. + input: Box::new(CreateEmbeddingRequestInput::String(user_message.clone())), + model: String::from(DEFAULT_EMBEDDING_MODEL), + encoding_format: None, + dimensions: None, + user: None, + }; + + let json_data: String = match serde_json::to_string(&get_embeddings_input) { + Ok(json_data) => json_data, + Err(error) => { + return self.send_server_error(ServerError::Deserialization(error), None); + } + }; + + let mut headers = vec![ + (ARCH_UPSTREAM_HOST_HEADER, MODEL_SERVER_NAME), + (":method", "POST"), + (":path", "/embeddings"), + (":authority", MODEL_SERVER_NAME), + ("content-type", "application/json"), + ("x-envoy-max-retries", "3"), + ("x-envoy-upstream-rq-timeout-ms", "60000"), + ]; + if self.request_id.is_some() { + headers.push((REQUEST_ID_HEADER, self.request_id.as_ref().unwrap())); + } + let call_args = CallArgs::new( + ARCH_INTERNAL_CLUSTER_NAME, + "/embeddings", + headers, + Some(json_data.as_bytes()), + vec![], + Duration::from_secs(5), + ); + let call_context = StreamCallContext { + response_handler_type: ResponseHandlerType::GetEmbeddings, + user_message: Some(user_message), + prompt_target_name: None, + request_body: callout_context.request_body, + similarity_scores: None, + upstream_cluster: None, + upstream_cluster_path: None, + tool_calls: None, + }; + + if let Err(e) = self.http_call(call_args, call_context) { + self.send_server_error(ServerError::HttpDispatch(e), None); + } + } + + fn default_target_handler(&self, body: Vec, callout_context: StreamCallContext) { + let prompt_target = self + .prompt_targets + .get(callout_context.prompt_target_name.as_ref().unwrap()) + .unwrap() + .clone(); + debug!( + "response received for default target: {}", + prompt_target.name + ); + // check if the default target should be dispatched to the LLM provider + if !prompt_target.auto_llm_dispatch_on_response.unwrap_or(false) { + let default_target_response_str = String::from_utf8(body).unwrap(); + debug!( + "sending response back to developer: {}", + default_target_response_str + ); + self.send_http_response( + StatusCode::OK.as_u16().into(), + vec![("Powered-By", "Katanemo")], + Some(default_target_response_str.as_bytes()), + ); + // self.resume_http_request(); + return; + } + debug!("default_target: sending api response to default llm"); + let chat_completions_resp: ChatCompletionsResponse = match serde_json::from_slice(&body) { + Ok(chat_completions_resp) => chat_completions_resp, + Err(e) => { + return self.send_server_error(ServerError::Deserialization(e), None); + } + }; + let api_resp = chat_completions_resp.choices[0] + .message + .content + .as_ref() + .unwrap(); + let mut messages = callout_context.request_body.messages; + + // add system prompt + match prompt_target.system_prompt.as_ref() { + None => {} + Some(system_prompt) => { + let system_prompt_message = Message { + role: SYSTEM_ROLE.to_string(), + content: Some(system_prompt.clone()), + model: None, + tool_calls: None, + }; + messages.push(system_prompt_message); + } + } + + messages.push(Message { + role: USER_ROLE.to_string(), + content: Some(api_resp.clone()), + model: None, + tool_calls: None, + }); + let chat_completion_request = ChatCompletionsRequest { + model: GPT_35_TURBO.to_string(), + messages, + tools: None, + stream: callout_context.request_body.stream, + stream_options: callout_context.request_body.stream_options, + metadata: None, + }; + let json_resp = serde_json::to_string(&chat_completion_request).unwrap(); + debug!("sending response back to default llm: {}", json_resp); + self.set_http_request_body(0, self.request_body_size, json_resp.as_bytes()); + self.resume_http_request(); + } +} + +// HttpContext is the trait that allows the Rust code to interact with HTTP objects. +impl HttpContext for StreamContext { + // Envoy's HTTP model is event driven. The WASM ABI has given implementors events to hook onto + // the lifecycle of the http request and response. + fn on_http_request_headers(&mut self, _num_headers: usize, _end_of_stream: bool) -> Action { + self.select_llm_provider(); + self.add_routing_header(); + if let Err(error) = self.modify_auth_headers() { + self.send_server_error(error, Some(StatusCode::BAD_REQUEST)); + } + self.delete_content_length_header(); + self.save_ratelimit_header(); + + self.is_chat_completions_request = + self.get_http_request_header(":path").unwrap_or_default() == CHAT_COMPLETIONS_PATH; + + debug!( + "S[{}] req_headers={:?}", + self.context_id, + self.get_http_request_headers() + ); + + self.request_id = self.get_http_request_header(REQUEST_ID_HEADER); + + Action::Continue + } + + fn on_http_request_body(&mut self, body_size: usize, end_of_stream: bool) -> Action { + // Let the client send the gateway all the data before sending to the LLM_provider. + // TODO: consider a streaming API. + if !end_of_stream { + return Action::Pause; + } + + if body_size == 0 { + return Action::Continue; + } + + self.request_body_size = body_size; + + // Deserialize body into spec. + // Currently OpenAI API. + let mut deserialized_body: ChatCompletionsRequest = + match self.get_http_request_body(0, body_size) { + Some(body_bytes) => match serde_json::from_slice(&body_bytes) { + Ok(deserialized) => deserialized, + Err(e) => { + self.send_server_error( + ServerError::Deserialization(e), + Some(StatusCode::BAD_REQUEST), + ); + return Action::Pause; + } + }, + None => { + self.send_server_error( + ServerError::LogicError(format!( + "Failed to obtain body bytes even though body_size is {}", + body_size + )), + None, + ); + return Action::Pause; + } + }; + self.is_chat_completions_request = true; + + if self.mode == GatewayMode::Llm { + debug!("llm gateway mode, skipping over all prompt targets"); + + // remove metadata from the request body + deserialized_body.metadata = None; + // delete model key from message array + for message in deserialized_body.messages.iter_mut() { + message.model = None; + } + deserialized_body + .model + .clone_from(&self.llm_provider.as_ref().unwrap().model); + let chat_completion_request_str = serde_json::to_string(&deserialized_body).unwrap(); + + // enforce ratelimits + if let Err(e) = + self.enforce_ratelimits(&deserialized_body.model, &chat_completion_request_str) + { + self.send_server_error( + ServerError::ExceededRatelimit(e), + Some(StatusCode::TOO_MANY_REQUESTS), + ); + self.metrics.ratelimited_rq.increment(1); + return Action::Continue; + } + + debug!( + "arch => {:?}, body: {}", + deserialized_body.model, chat_completion_request_str + ); + self.set_http_request_body(0, body_size, chat_completion_request_str.as_bytes()); + return Action::Continue; + } + + self.arch_state = match deserialized_body.metadata { + Some(ref metadata) => { + if metadata.contains_key(ARCH_STATE_HEADER) { + let arch_state_str = metadata[ARCH_STATE_HEADER].clone(); + let arch_state: Vec = serde_json::from_str(&arch_state_str).unwrap(); + Some(arch_state) + } else { + None + } + } + None => None, + }; + + // Set the model based on the chosen LLM Provider + deserialized_body.model = String::from(&self.llm_provider().model); + + self.streaming_response = deserialized_body.stream; + if deserialized_body.stream && deserialized_body.stream_options.is_none() { + deserialized_body.stream_options = Some(StreamOptions { + include_usage: true, + }); + } + + let last_user_prompt = match deserialized_body + .messages + .iter() + .filter(|msg| msg.role == USER_ROLE) + .last() + { + Some(content) => content, + None => { + warn!("No messages in the request body"); + return Action::Continue; + } + }; + + self.user_prompt = Some(last_user_prompt.clone()); + + let user_message_str = self.user_prompt.as_ref().unwrap().content.clone(); + + let prompt_guard_jailbreak_task = self + .prompt_guards + .input_guards + .contains_key(&common::configuration::GuardType::Jailbreak); + + self.chat_completions_request = Some(deserialized_body); + + if !prompt_guard_jailbreak_task { + debug!("Missing input guard. Making inline call to retrieve"); + let callout_context = StreamCallContext { + response_handler_type: ResponseHandlerType::ArchGuard, + user_message: user_message_str.clone(), + prompt_target_name: None, + request_body: self.chat_completions_request.as_ref().unwrap().clone(), + similarity_scores: None, + upstream_cluster: None, + upstream_cluster_path: None, + tool_calls: None, + }; + self.get_embeddings(callout_context); + return Action::Pause; + } + + let get_prompt_guards_request = PromptGuardRequest { + input: self + .user_prompt + .as_ref() + .unwrap() + .content + .as_ref() + .unwrap() + .clone(), + task: PromptGuardTask::Jailbreak, + }; + + let json_data: String = match serde_json::to_string(&get_prompt_guards_request) { + Ok(json_data) => json_data, + Err(error) => { + self.send_server_error(ServerError::Serialization(error), None); + return Action::Pause; + } + }; + + let mut headers = vec![ + (ARCH_UPSTREAM_HOST_HEADER, MODEL_SERVER_NAME), + (":method", "POST"), + (":path", "/guard"), + (":authority", MODEL_SERVER_NAME), + ("content-type", "application/json"), + ("x-envoy-max-retries", "3"), + ("x-envoy-upstream-rq-timeout-ms", "60000"), + ]; + + if self.request_id.is_some() { + headers.push((REQUEST_ID_HEADER, self.request_id.as_ref().unwrap())); + } + + let call_args = CallArgs::new( + ARCH_INTERNAL_CLUSTER_NAME, + "/guard", + headers, + Some(json_data.as_bytes()), + vec![], + Duration::from_secs(5), + ); + let call_context = StreamCallContext { + response_handler_type: ResponseHandlerType::ArchGuard, + user_message: self.user_prompt.as_ref().unwrap().content.clone(), + prompt_target_name: None, + request_body: self.chat_completions_request.as_ref().unwrap().clone(), + similarity_scores: None, + upstream_cluster: None, + upstream_cluster_path: None, + tool_calls: None, + }; + + if let Err(e) = self.http_call(call_args, call_context) { + self.send_server_error(ServerError::HttpDispatch(e), None); + } + + Action::Pause + } + + fn on_http_response_body(&mut self, body_size: usize, end_of_stream: bool) -> Action { + debug!( + "recv [S={}] bytes={} end_stream={}", + self.context_id, body_size, end_of_stream + ); + + if !self.is_chat_completions_request { + if let Some(body_str) = self + .get_http_response_body(0, body_size) + .and_then(|bytes| String::from_utf8(bytes).ok()) + { + debug!("recv [S={}] body_str={}", self.context_id, body_str); + } + return Action::Continue; + } + + if !end_of_stream { + return Action::Pause; + } + + let body = self + .get_http_response_body(0, body_size) + .expect("cant get response body"); + + if self.streaming_response { + let body_str = String::from_utf8(body).expect("body is not utf-8"); + debug!("streaming response"); + let chat_completions_data = match body_str.split_once("data: ") { + Some((_, chat_completions_data)) => chat_completions_data, + None => { + self.send_server_error( + ServerError::LogicError(String::from("parsing error in streaming data")), + None, + ); + return Action::Pause; + } + }; + + let chat_completions_chunk_response: ChatCompletionChunkResponse = + match serde_json::from_str(chat_completions_data) { + Ok(de) => de, + Err(_) => { + if chat_completions_data != "[NONE]" { + self.send_server_error( + ServerError::LogicError(String::from( + "error in streaming response", + )), + None, + ); + return Action::Continue; + } + return Action::Continue; + } + }; + + if let Some(content) = chat_completions_chunk_response + .choices + .first() + .unwrap() + .delta + .content + .as_ref() + { + let model = &chat_completions_chunk_response.model; + let token_count = tokenizer::token_count(model, content).unwrap_or(0); + self.response_tokens += token_count; + } + } else { + debug!("non streaming response"); + let chat_completions_response: ChatCompletionsResponse = + match serde_json::from_slice(&body) { + Ok(de) => de, + Err(e) => { + debug!("invalid response: {}", String::from_utf8_lossy(&body)); + self.send_server_error(ServerError::Deserialization(e), None); + return Action::Pause; + } + }; + + if chat_completions_response.usage.is_some() { + self.response_tokens += chat_completions_response + .usage + .as_ref() + .unwrap() + .completion_tokens; + } + + if let Some(tool_calls) = self.tool_calls.as_ref() { + if !tool_calls.is_empty() { + if self.arch_state.is_none() { + self.arch_state = Some(Vec::new()); + } + + // compute sha hash from message history + let mut hasher = Sha256::new(); + let prompts: Vec = self + .chat_completions_request + .as_ref() + .unwrap() + .messages + .iter() + .filter(|msg| msg.role == USER_ROLE) + .map(|msg| msg.content.clone().unwrap()) + .collect(); + let prompts_merged = prompts.join("#.#"); + hasher.update(prompts_merged.clone()); + let hash_key = hasher.finalize(); + // conver hash to hex string + let hash_key_str = format!("{:x}", hash_key); + debug!("hash key: {}, prompts: {}", hash_key_str, prompts_merged); + + // create new tool call state + let tool_call_state = ToolCallState { + key: hash_key_str, + message: self.user_prompt.clone(), + tool_call: tool_calls[0].function.clone(), + tool_response: self.tool_call_response.clone().unwrap(), + }; + + // push tool call state to arch state + self.arch_state + .as_mut() + .unwrap() + .push(ArchState::ToolCall(vec![tool_call_state])); + + let mut data: Value = serde_json::from_slice(&body).unwrap(); + // use serde::Value to manipulate the json object and ensure that we don't lose any data + if let Value::Object(ref mut map) = data { + // serialize arch state and add to metadata + let arch_state_str = serde_json::to_string(&self.arch_state).unwrap(); + debug!("arch_state: {}", arch_state_str); + let metadata = map + .entry("metadata") + .or_insert(Value::Object(serde_json::Map::new())); + metadata.as_object_mut().unwrap().insert( + ARCH_STATE_HEADER.to_string(), + serde_json::Value::String(arch_state_str), + ); + + let data_serialized = serde_json::to_string(&data).unwrap(); + debug!("arch => user: {}", data_serialized); + self.set_http_response_body(0, body_size, data_serialized.as_bytes()); + }; + } + } + } + + debug!( + "recv [S={}] total_tokens={} end_stream={}", + self.context_id, self.response_tokens, end_of_stream + ); + + // TODO:: ratelimit based on response tokens. + Action::Continue + } +} + +impl Context for StreamContext { + fn on_http_call_response( + &mut self, + token_id: u32, + _num_headers: usize, + body_size: usize, + _num_trailers: usize, + ) { + let callout_context = self + .callouts + .get_mut() + .remove(&token_id) + .expect("invalid token_id"); + self.metrics.active_http_calls.increment(-1); + + if let Some(body) = self.get_http_call_response_body(0, body_size) { + match callout_context.response_handler_type { + ResponseHandlerType::GetEmbeddings => { + self.embeddings_handler(body, callout_context) + } + ResponseHandlerType::ZeroShotIntent => { + self.zero_shot_intent_detection_resp_handler(body, callout_context) + } + ResponseHandlerType::HallucinationDetect => { + self.hallucination_classification_resp_handler(body, callout_context) + } + ResponseHandlerType::FunctionResolver => { + self.function_resolver_handler(body, callout_context) + } + ResponseHandlerType::FunctionCall => { + self.function_call_response_handler(body, callout_context) + } + ResponseHandlerType::ArchGuard => self.arch_guard_handler(body, callout_context), + ResponseHandlerType::DefaultTarget => { + self.default_target_handler(body, callout_context) + } + } + } else { + self.send_server_error( + ServerError::LogicError(String::from("No response body in inline HTTP request")), + None, + ); + } + } +} + +impl Client for StreamContext { + type CallContext = StreamCallContext; + + fn callouts(&self) -> &RefCell> { + &self.callouts + } + + fn active_http_calls(&self) -> &Gauge { + &self.metrics.active_http_calls + } +} diff --git a/crates/prompt_gateway/tests/integration.rs b/crates/prompt_gateway/tests/integration.rs new file mode 100644 index 00000000..2e9e984e --- /dev/null +++ b/crates/prompt_gateway/tests/integration.rs @@ -0,0 +1,805 @@ +use common::common_types::open_ai::{ChatCompletionsResponse, Choice, Message, Usage}; +use common::common_types::open_ai::{FunctionCallDetail, ToolCall, ToolType}; +use common::common_types::{HallucinationClassificationResponse, PromptGuardResponse}; +use common::embeddings::{ + create_embedding_response, embedding, CreateEmbeddingResponse, CreateEmbeddingResponseUsage, + Embedding, +}; +use common::{common_types::ZeroShotClassificationResponse, configuration::Configuration}; +use http::StatusCode; +use proxy_wasm_test_framework::tester::{self, Tester}; +use proxy_wasm_test_framework::types::{ + Action, BufferType, LogLevel, MapType, MetricType, ReturnType, +}; +use serde_yaml::Value; +use serial_test::serial; +use std::collections::HashMap; +use std::path::Path; + +fn wasm_module() -> String { + let wasm_file = Path::new("target/wasm32-wasi/release/prompt_gateway.wasm"); + assert!( + wasm_file.exists(), + "Run `cargo build --release --target=wasm32-wasi` first" + ); + wasm_file.to_str().unwrap().to_string() +} + +fn request_headers_expectations(module: &mut Tester, http_context: i32) { + module + .call_proxy_on_request_headers(http_context, 0, false) + .expect_get_header_map_value( + Some(MapType::HttpRequestHeaders), + Some("x-arch-llm-provider-hint"), + ) + .returning(Some("default")) + .expect_log(Some(LogLevel::Debug), None) + .expect_add_header_map_value( + Some(MapType::HttpRequestHeaders), + Some("x-arch-upstream"), + Some("arch_llm_listener"), + ) + .expect_add_header_map_value( + Some(MapType::HttpRequestHeaders), + Some("x-arch-llm-provider"), + Some("open-ai-gpt-4"), + ) + .expect_replace_header_map_value( + Some(MapType::HttpRequestHeaders), + Some("Authorization"), + Some("Bearer secret_key"), + ) + .expect_remove_header_map_value(Some(MapType::HttpRequestHeaders), Some("content-length")) + .expect_get_header_map_value( + Some(MapType::HttpRequestHeaders), + Some("x-arch-ratelimit-selector"), + ) + .returning(Some("selector-key")) + .expect_get_header_map_value(Some(MapType::HttpRequestHeaders), Some("selector-key")) + .returning(Some("selector-value")) + .expect_get_header_map_pairs(Some(MapType::HttpRequestHeaders)) + .returning(None) + .expect_get_header_map_value(Some(MapType::HttpRequestHeaders), Some(":path")) + .returning(Some("/v1/chat/completions")) + .expect_log(Some(LogLevel::Debug), None) + .expect_get_header_map_value(Some(MapType::HttpRequestHeaders), Some("x-request-id")) + .returning(None) + .execute_and_expect(ReturnType::Action(Action::Continue)) + .unwrap(); +} + +fn normal_flow(module: &mut Tester, filter_context: i32, http_context: i32) { + module + .call_proxy_on_context_create(http_context, filter_context) + .expect_log(Some(LogLevel::Debug), None) + .execute_and_expect(ReturnType::None) + .unwrap(); + + request_headers_expectations(module, http_context); + + // Request Body + let chat_completions_request_body = "\ +{\ + \"messages\": [\ + {\ + \"role\": \"system\",\ + \"content\": \"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"\ + },\ + {\ + \"role\": \"user\",\ + \"content\": \"Compose a poem that explains the concept of recursion in programming.\"\ + }\ + ],\ + \"model\": \"gpt-4\"\ +}"; + + module + .call_proxy_on_request_body( + http_context, + chat_completions_request_body.len() as i32, + true, + ) + .expect_get_buffer_bytes(Some(BufferType::HttpRequestBody)) + .returning(Some(chat_completions_request_body)) + // The actual call is not important in this test, we just need to grab the token_id + .expect_http_call( + Some("arch_internal"), + Some(vec![ + ("x-arch-upstream", "model_server"), + (":method", "POST"), + (":path", "/guard"), + (":authority", "model_server"), + ("content-type", "application/json"), + ("x-envoy-max-retries", "3"), + ("x-envoy-upstream-rq-timeout-ms", "60000"), + ]), + None, + None, + None, + ) + .returning(Some(1)) + .expect_log(Some(LogLevel::Debug), None) + .expect_metric_increment("active_http_calls", 1) + .execute_and_expect(ReturnType::Action(Action::Pause)) + .unwrap(); + + let prompt_guard_response = PromptGuardResponse { + toxic_prob: None, + toxic_verdict: None, + jailbreak_prob: None, + jailbreak_verdict: None, + }; + let prompt_guard_response_buffer = serde_json::to_string(&prompt_guard_response).unwrap(); + module + .call_proxy_on_http_call_response( + http_context, + 1, + 0, + prompt_guard_response_buffer.len() as i32, + 0, + ) + .expect_metric_increment("active_http_calls", -1) + .expect_get_buffer_bytes(Some(BufferType::HttpCallResponseBody)) + .returning(Some(&prompt_guard_response_buffer)) + .expect_log(Some(LogLevel::Debug), None) + .expect_log(Some(LogLevel::Debug), None) + .expect_http_call( + Some("arch_internal"), + Some(vec![ + ("x-arch-upstream", "model_server"), + (":method", "POST"), + (":path", "/embeddings"), + (":authority", "model_server"), + ("content-type", "application/json"), + ("x-envoy-max-retries", "3"), + ("x-envoy-upstream-rq-timeout-ms", "60000"), + ]), + None, + None, + None, + ) + .returning(Some(2)) + .expect_metric_increment("active_http_calls", 1) + .expect_log(Some(LogLevel::Debug), None) + .execute_and_expect(ReturnType::None) + .unwrap(); + + let embedding_response = CreateEmbeddingResponse { + data: vec![Embedding { + index: 0, + embedding: vec![], + object: embedding::Object::default(), + }], + model: String::from("test"), + object: create_embedding_response::Object::default(), + usage: Box::new(CreateEmbeddingResponseUsage::new(0, 0)), + }; + let embeddings_response_buffer = serde_json::to_string(&embedding_response).unwrap(); + module + .call_proxy_on_http_call_response( + http_context, + 2, + 0, + embeddings_response_buffer.len() as i32, + 0, + ) + .expect_metric_increment("active_http_calls", -1) + .expect_get_buffer_bytes(Some(BufferType::HttpCallResponseBody)) + .returning(Some(&embeddings_response_buffer)) + .expect_log(Some(LogLevel::Debug), None) + .expect_log(Some(LogLevel::Debug), None) + .expect_http_call( + Some("arch_internal"), + Some(vec![ + ("x-arch-upstream", "model_server"), + (":method", "POST"), + (":path", "/zeroshot"), + (":authority", "model_server"), + ("content-type", "application/json"), + ("x-envoy-max-retries", "3"), + ("x-envoy-upstream-rq-timeout-ms", "60000"), + ]), + None, + None, + None, + ) + .returning(Some(3)) + .expect_metric_increment("active_http_calls", 1) + .expect_log(Some(LogLevel::Debug), None) + .execute_and_expect(ReturnType::None) + .unwrap(); + + let zero_shot_response = ZeroShotClassificationResponse { + predicted_class: "weather_forecast".to_string(), + predicted_class_score: 0.1, + scores: HashMap::new(), + model: "test-model".to_string(), + }; + let zeroshot_intent_detection_buffer = serde_json::to_string(&zero_shot_response).unwrap(); + module + .call_proxy_on_http_call_response( + http_context, + 3, + 0, + zeroshot_intent_detection_buffer.len() as i32, + 0, + ) + .expect_metric_increment("active_http_calls", -1) + .expect_get_buffer_bytes(Some(BufferType::HttpCallResponseBody)) + .returning(Some(&zeroshot_intent_detection_buffer)) + .expect_log(Some(LogLevel::Debug), None) + .expect_log(Some(LogLevel::Debug), None) + .expect_log(Some(LogLevel::Info), None) + .expect_http_call( + Some("arch_internal"), + Some(vec![ + (":method", "POST"), + ("x-arch-upstream", "arch_fc"), + (":path", "/v1/chat/completions"), + (":authority", "arch_fc"), + ("content-type", "application/json"), + ("x-envoy-max-retries", "3"), + ("x-envoy-upstream-rq-timeout-ms", "120000"), + ]), + None, + None, + None, + ) + .returning(Some(4)) + .expect_log(Some(LogLevel::Debug), None) + .expect_log(Some(LogLevel::Debug), None) + .expect_metric_increment("active_http_calls", 1) + .execute_and_expect(ReturnType::None) + .unwrap(); +} + +fn setup_filter(module: &mut Tester, config: &str) -> i32 { + let filter_context = 1; + + module + .call_proxy_on_context_create(filter_context, 0) + .expect_metric_creation(MetricType::Gauge, "active_http_calls") + .expect_metric_creation(MetricType::Counter, "ratelimited_rq") + .execute_and_expect(ReturnType::None) + .unwrap(); + + module + .call_proxy_on_configure(filter_context, config.len() as i32) + .expect_get_buffer_bytes(Some(BufferType::PluginConfiguration)) + .returning(Some(config)) + .execute_and_expect(ReturnType::Bool(true)) + .unwrap(); + + module + .call_proxy_on_tick(filter_context) + .expect_log(Some(LogLevel::Debug), None) + .expect_log(Some(LogLevel::Debug), None) + .expect_http_call( + Some("arch_internal"), + Some(vec![ + ("x-arch-upstream", "model_server"), + (":method", "POST"), + (":path", "/embeddings"), + (":authority", "model_server"), + ("content-type", "application/json"), + ("x-envoy-upstream-rq-timeout-ms", "60000"), + ]), + None, + None, + None, + ) + .returning(Some(101)) + .expect_metric_increment("active_http_calls", 1) + .expect_set_tick_period_millis(Some(0)) + .execute_and_expect(ReturnType::None) + .unwrap(); + + let embedding_response = CreateEmbeddingResponse { + data: vec![Embedding { + embedding: vec![], + index: 0, + object: embedding::Object::default(), + }], + model: String::from("test"), + object: create_embedding_response::Object::default(), + usage: Box::new(CreateEmbeddingResponseUsage { + prompt_tokens: 0, + total_tokens: 0, + }), + }; + let embedding_response_str = serde_json::to_string(&embedding_response).unwrap(); + module + .call_proxy_on_http_call_response( + filter_context, + 101, + 0, + embedding_response_str.len() as i32, + 0, + ) + .expect_log( + Some(LogLevel::Debug), + Some( + format!( + "filter_context: on_http_call_response called with token_id: {:?}", + 101 + ) + .as_str(), + ), + ) + .expect_metric_increment("active_http_calls", -1) + .expect_get_buffer_bytes(Some(BufferType::HttpCallResponseBody)) + .returning(Some(&embedding_response_str)) + .expect_log(Some(LogLevel::Debug), None) + .execute_and_expect(ReturnType::None) + .unwrap(); + + filter_context +} + +fn default_config() -> &'static str { + r#" +version: "0.1-beta" + +listener: + address: 0.0.0.0 + port: 10000 + message_format: huggingface + connect_timeout: 0.005s + +endpoints: + api_server: + endpoint: api_server:80 + connect_timeout: 0.005s + +llm_providers: + - name: open-ai-gpt-4 + provider: openai + access_key: secret_key + model: gpt-4 + default: true + +overrides: + # confidence threshold for prompt target intent matching + prompt_target_intent_matching_threshold: 0.6 + +system_prompt: | + You are a helpful assistant. + +prompt_guards: + input_guards: + jailbreak: + on_exception: + message: "Looks like you're curious about my abilities, but I can only provide assistance within my programmed parameters." + +prompt_targets: + - name: weather_forecast + description: This function provides realtime weather forecast information for a given city. + parameters: + - name: city + required: true + description: The city for which the weather forecast is requested. + - name: days + description: The number of days for which the weather forecast is requested. + - name: units + description: The units in which the weather forecast is requested. + endpoint: + name: api_server + path: /weather + system_prompt: | + You are a helpful weather forecaster. Use weater data that is provided to you. Please following following guidelines when responding to user queries: + - Use farenheight for temperature + - Use miles per hour for wind speed + +ratelimits: + - model: gpt-4 + selector: + key: selector-key + value: selector-value + limit: + tokens: 1 + unit: minute +"# +} + +#[test] +#[serial] +fn successful_request_to_open_ai_chat_completions() { + let args = tester::MockSettings { + wasm_path: wasm_module(), + quiet: false, + allow_unexpected: false, + }; + let mut module = tester::mock(args).unwrap(); + + module + .call_start() + .execute_and_expect(ReturnType::None) + .unwrap(); + + // Setup Filter + let filter_context = setup_filter(&mut module, default_config()); + + // Setup HTTP Stream + let http_context = 2; + + module + .call_proxy_on_context_create(http_context, filter_context) + .expect_log(Some(LogLevel::Debug), None) + .execute_and_expect(ReturnType::None) + .unwrap(); + + request_headers_expectations(&mut module, http_context); + + // Request Body + let chat_completions_request_body = "\ + {\ + \"messages\": [\ + {\ + \"role\": \"system\",\ + \"content\": \"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"\ + },\ + {\ + \"role\": \"user\",\ + \"content\": \"Compose a poem that explains the concept of recursion in programming.\"\ + }\ + ],\ + \"model\": \"gpt-4\"\ + }"; + + module + .call_proxy_on_request_body( + http_context, + chat_completions_request_body.len() as i32, + true, + ) + .expect_get_buffer_bytes(Some(BufferType::HttpRequestBody)) + .returning(Some(chat_completions_request_body)) + .expect_log(Some(LogLevel::Debug), None) + .expect_http_call(Some("arch_internal"), None, None, None, None) + .returning(Some(4)) + .expect_metric_increment("active_http_calls", 1) + .execute_and_expect(ReturnType::Action(Action::Pause)) + .unwrap(); +} + +#[test] +#[serial] +fn bad_request_to_open_ai_chat_completions() { + let args = tester::MockSettings { + wasm_path: wasm_module(), + quiet: false, + allow_unexpected: false, + }; + let mut module = tester::mock(args).unwrap(); + + module + .call_start() + .execute_and_expect(ReturnType::None) + .unwrap(); + + // Setup Filter + let filter_context = setup_filter(&mut module, default_config()); + + // Setup HTTP Stream + let http_context = 2; + + module + .call_proxy_on_context_create(http_context, filter_context) + .expect_log(Some(LogLevel::Debug), None) + .execute_and_expect(ReturnType::None) + .unwrap(); + + request_headers_expectations(&mut module, http_context); + + // Request Body + let incomplete_chat_completions_request_body = "\ + {\ + \"messages\": [\ + {\ + \"role\": \"system\",\ + },\ + {\ + \"role\": \"user\",\ + \"content\": \"Compose a poem that explains the concept of recursion in programming.\"\ + }\ + ]\ + }"; + + module + .call_proxy_on_request_body( + http_context, + incomplete_chat_completions_request_body.len() as i32, + true, + ) + .expect_get_buffer_bytes(Some(BufferType::HttpRequestBody)) + .returning(Some(incomplete_chat_completions_request_body)) + .expect_log(Some(LogLevel::Debug), None) + .expect_send_local_response( + Some(StatusCode::BAD_REQUEST.as_u16().into()), + None, + None, + None, + ) + .execute_and_expect(ReturnType::Action(Action::Pause)) + .unwrap(); +} + +#[test] +#[serial] +fn request_ratelimited() { + let args = tester::MockSettings { + wasm_path: wasm_module(), + quiet: false, + allow_unexpected: false, + }; + let mut module = tester::mock(args).unwrap(); + + module + .call_start() + .execute_and_expect(ReturnType::None) + .unwrap(); + + // Setup Filter + let filter_context = setup_filter(&mut module, default_config()); + + // Setup HTTP Stream + let http_context = 2; + + normal_flow(&mut module, filter_context, http_context); + + let arch_fc_resp = ChatCompletionsResponse { + usage: Some(Usage { + completion_tokens: 0, + }), + choices: vec![Choice { + finish_reason: "test".to_string(), + index: 0, + message: Message { + role: "system".to_string(), + content: None, + tool_calls: Some(vec![ToolCall { + id: String::from("test"), + tool_type: ToolType::Function, + function: FunctionCallDetail { + name: String::from("weather_forecast"), + arguments: HashMap::from([( + String::from("city"), + Value::String(String::from("seattle")), + )]), + }, + }]), + model: None, + }, + }], + model: String::from("test"), + metadata: None, + }; + + let arch_fc_resp_str = serde_json::to_string(&arch_fc_resp).unwrap(); + module + .call_proxy_on_http_call_response(http_context, 4, 0, arch_fc_resp_str.len() as i32, 0) + .expect_metric_increment("active_http_calls", -1) + .expect_get_buffer_bytes(Some(BufferType::HttpCallResponseBody)) + .returning(Some(&arch_fc_resp_str)) + .expect_log(Some(LogLevel::Debug), None) + .expect_log(Some(LogLevel::Debug), None) + .expect_log(Some(LogLevel::Debug), None) + .expect_log(Some(LogLevel::Debug), None) + .expect_log(Some(LogLevel::Debug), None) + .expect_log(Some(LogLevel::Debug), None) + .expect_http_call( + Some("arch_internal"), + Some(vec![ + ("x-arch-upstream", "model_server"), + (":method", "POST"), + (":path", "/hallucination"), + (":authority", "model_server"), + ("content-type", "application/json"), + ("x-envoy-max-retries", "3"), + ("x-envoy-upstream-rq-timeout-ms", "60000"), + ]), + None, + None, + None, + ) + .returning(Some(5)) + .expect_metric_increment("active_http_calls", 1) + .execute_and_expect(ReturnType::None) + .unwrap(); + + let hallucatination_body = HallucinationClassificationResponse { + params_scores: HashMap::from([("city".to_string(), 0.99)]), + model: "nli-model".to_string(), + }; + + let body_text = serde_json::to_string(&hallucatination_body).unwrap(); + + module + .call_proxy_on_http_call_response(http_context, 5, 0, body_text.len() as i32, 0) + .expect_metric_increment("active_http_calls", -1) + .expect_get_buffer_bytes(Some(BufferType::HttpCallResponseBody)) + .returning(Some(&body_text)) + .expect_log(Some(LogLevel::Debug), None) + .expect_http_call( + Some("arch_internal"), + Some(vec![ + ("x-arch-upstream", "api_server"), + (":method", "POST"), + (":path", "/weather"), + (":authority", "api_server"), + ("content-type", "application/json"), + ("x-envoy-max-retries", "3"), + ]), + None, + None, + None, + ) + .returning(Some(6)) + .expect_metric_increment("active_http_calls", 1) + .execute_and_expect(ReturnType::None) + .unwrap(); + + let body_text = String::from("test body"); + module + .call_proxy_on_http_call_response(http_context, 6, 0, body_text.len() as i32, 0) + .expect_metric_increment("active_http_calls", -1) + .expect_get_buffer_bytes(Some(BufferType::HttpCallResponseBody)) + .returning(Some(&body_text)) + .expect_get_header_map_value(Some(MapType::HttpCallResponseHeaders), Some(":status")) + .returning(Some("200")) + .expect_log(Some(LogLevel::Debug), None) + .expect_log(Some(LogLevel::Debug), None) + .expect_log(Some(LogLevel::Debug), None) + .expect_log(Some(LogLevel::Debug), None) + .expect_log(Some(LogLevel::Debug), None) + .expect_send_local_response( + Some(StatusCode::TOO_MANY_REQUESTS.as_u16().into()), + None, + None, + None, + ) + .expect_metric_increment("ratelimited_rq", 1) + .execute_and_expect(ReturnType::None) + .unwrap(); +} + +#[test] +#[serial] +fn request_not_ratelimited() { + let args = tester::MockSettings { + wasm_path: wasm_module(), + quiet: false, + allow_unexpected: false, + }; + let mut module = tester::mock(args).unwrap(); + + module + .call_start() + .execute_and_expect(ReturnType::None) + .unwrap(); + + // Setup Filter + let mut config: Configuration = serde_yaml::from_str(default_config()).unwrap(); + config.ratelimits.as_mut().unwrap()[0].limit.tokens += 1000; + let config_str = serde_json::to_string(&config).unwrap(); + + let filter_context = setup_filter(&mut module, &config_str); + + // Setup HTTP Stream + let http_context = 2; + + normal_flow(&mut module, filter_context, http_context); + + let arch_fc_resp = ChatCompletionsResponse { + usage: Some(Usage { + completion_tokens: 0, + }), + choices: vec![Choice { + finish_reason: "test".to_string(), + index: 0, + message: Message { + role: "system".to_string(), + content: None, + tool_calls: Some(vec![ToolCall { + id: String::from("test"), + tool_type: ToolType::Function, + function: FunctionCallDetail { + name: String::from("weather_forecast"), + arguments: HashMap::from([( + String::from("city"), + Value::String(String::from("seattle")), + )]), + }, + }]), + model: None, + }, + }], + model: String::from("test"), + metadata: None, + }; + + let arch_fc_resp_str = serde_json::to_string(&arch_fc_resp).unwrap(); + module + .call_proxy_on_http_call_response(http_context, 4, 0, arch_fc_resp_str.len() as i32, 0) + .expect_metric_increment("active_http_calls", -1) + .expect_get_buffer_bytes(Some(BufferType::HttpCallResponseBody)) + .returning(Some(&arch_fc_resp_str)) + .expect_log(Some(LogLevel::Debug), None) + .expect_log(Some(LogLevel::Debug), None) + .expect_log(Some(LogLevel::Debug), None) + .expect_log(Some(LogLevel::Debug), None) + .expect_log(Some(LogLevel::Debug), None) + .expect_log(Some(LogLevel::Debug), None) + .expect_http_call( + Some("arch_internal"), + Some(vec![ + ("x-arch-upstream", "model_server"), + (":method", "POST"), + (":path", "/hallucination"), + (":authority", "model_server"), + ("content-type", "application/json"), + ("x-envoy-max-retries", "3"), + ("x-envoy-upstream-rq-timeout-ms", "60000"), + ]), + None, + None, + None, + ) + .returning(Some(5)) + .expect_metric_increment("active_http_calls", 1) + .execute_and_expect(ReturnType::None) + .unwrap(); + + // hallucination should return that parameters were not halliucinated + // prompt: str + // parameters: dict + // model: str + + let hallucatination_body = HallucinationClassificationResponse { + params_scores: HashMap::from([("city".to_string(), 0.99)]), + model: "nli-model".to_string(), + }; + + let body_text = serde_json::to_string(&hallucatination_body).unwrap(); + + module + .call_proxy_on_http_call_response(http_context, 5, 0, body_text.len() as i32, 0) + .expect_metric_increment("active_http_calls", -1) + .expect_get_buffer_bytes(Some(BufferType::HttpCallResponseBody)) + .returning(Some(&body_text)) + .expect_log(Some(LogLevel::Debug), None) + .expect_http_call( + Some("arch_internal"), + Some(vec![ + ("x-arch-upstream", "api_server"), + (":method", "POST"), + (":path", "/weather"), + (":authority", "api_server"), + ("content-type", "application/json"), + ("x-envoy-max-retries", "3"), + ]), + None, + None, + None, + ) + .returning(Some(6)) + .expect_metric_increment("active_http_calls", 1) + .execute_and_expect(ReturnType::None) + .unwrap(); + + let body_text = String::from("test body"); + module + .call_proxy_on_http_call_response(http_context, 6, 0, body_text.len() as i32, 0) + .expect_metric_increment("active_http_calls", -1) + .expect_get_buffer_bytes(Some(BufferType::HttpCallResponseBody)) + .returning(Some(&body_text)) + .expect_get_header_map_value(Some(MapType::HttpCallResponseHeaders), Some(":status")) + .returning(Some("200")) + .expect_log(Some(LogLevel::Debug), None) + .expect_log(Some(LogLevel::Debug), None) + .expect_log(Some(LogLevel::Debug), None) + .expect_log(Some(LogLevel::Debug), None) + .expect_set_buffer_bytes(Some(BufferType::HttpRequestBody), None) + .execute_and_expect(ReturnType::None) + .unwrap(); +} diff --git a/gateway.code-workspace b/gateway.code-workspace index 617e49ec..ed15406b 100644 --- a/gateway.code-workspace +++ b/gateway.code-workspace @@ -5,8 +5,16 @@ "path": "." }, { - "name": "arch", - "path": "arch" + "name": "common", + "path": "crates/common" + }, + { + "name": "prompt_gateway", + "path": "crates/prompt_gateway" + }, + { + "name": "llm_gateway", + "path": "crates/prompt_gateway" }, { "name": "arch/tools", diff --git a/public_types/Cargo.lock b/public_types/Cargo.lock deleted file mode 100644 index b253445b..00000000 --- a/public_types/Cargo.lock +++ /dev/null @@ -1,171 +0,0 @@ -# This file is automatically @generated by Cargo. -# It is not intended for manual editing. -version = 3 - -[[package]] -name = "diff" -version = "0.1.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8" - -[[package]] -name = "duration-string" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6fcc1d9ae294a15ed05aeae8e11ee5f2b3fe971c077d45a42fb20825fba6ee13" -dependencies = [ - "serde", -] - -[[package]] -name = "equivalent" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" - -[[package]] -name = "hashbrown" -version = "0.14.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" - -[[package]] -name = "indexmap" -version = "2.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68b900aa2f7301e21c36462b170ee99994de34dff39a4a6a528e80e7376d07e5" -dependencies = [ - "equivalent", - "hashbrown", -] - -[[package]] -name = "itoa" -version = "1.0.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" - -[[package]] -name = "memchr" -version = "2.7.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" - -[[package]] -name = "pretty_assertions" -version = "1.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ae130e2f271fbc2ac3a40fb1d07180839cdbbe443c7a27e1e3c13c5cac0116d" -dependencies = [ - "diff", - "yansi", -] - -[[package]] -name = "proc-macro2" -version = "1.0.86" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77" -dependencies = [ - "unicode-ident", -] - -[[package]] -name = "public_types" -version = "0.1.0" -dependencies = [ - "duration-string", - "pretty_assertions", - "serde", - "serde_json", - "serde_yaml", -] - -[[package]] -name = "quote" -version = "1.0.37" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af" -dependencies = [ - "proc-macro2", -] - -[[package]] -name = "ryu" -version = "1.0.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" - -[[package]] -name = "serde" -version = "1.0.210" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8e3592472072e6e22e0a54d5904d9febf8508f65fb8552499a1abc7d1078c3a" -dependencies = [ - "serde_derive", -] - -[[package]] -name = "serde_derive" -version = "1.0.210" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "243902eda00fad750862fc144cea25caca5e20d615af0a81bee94ca738f1df1f" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "serde_json" -version = "1.0.128" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ff5456707a1de34e7e37f2a6fd3d3f808c318259cbd01ab6377795054b483d8" -dependencies = [ - "itoa", - "memchr", - "ryu", - "serde", -] - -[[package]] -name = "serde_yaml" -version = "0.9.34+deprecated" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47" -dependencies = [ - "indexmap", - "itoa", - "ryu", - "serde", - "unsafe-libyaml", -] - -[[package]] -name = "syn" -version = "2.0.77" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f35bcdf61fd8e7be6caf75f429fdca8beb3ed76584befb503b1569faee373ed" -dependencies = [ - "proc-macro2", - "quote", - "unicode-ident", -] - -[[package]] -name = "unicode-ident" -version = "1.0.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e91b56cd4cadaeb79bbf1a5645f6b4f8dc5bde8834ad5894a8db35fda9efa1fe" - -[[package]] -name = "unsafe-libyaml" -version = "0.2.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "673aac59facbab8a9007c7f6108d11f63b603f7cabff99fabf650fea5c32b861" - -[[package]] -name = "yansi" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049" diff --git a/public_types/src/lib.rs b/public_types/src/lib.rs deleted file mode 100644 index a7c191c8..00000000 --- a/public_types/src/lib.rs +++ /dev/null @@ -1,5 +0,0 @@ -#![allow(unused_imports)] - -pub mod common_types; -pub mod configuration; -pub mod embeddings;