From 662a840ac59dc2c1f6bce6b4414992c635da7b69 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Ulises=20Ni=C3=B1o=20Rivera?= Date: Mon, 28 Oct 2024 20:05:06 -0400 Subject: [PATCH] Add support for streaming and fixes few issues (see description) (#202) --- .github/workflows/checks.yml | 35 - .github/workflows/e2e_tests.yml | 32 + .github/workflows/model-server-tests.yml | 2 +- .github/workflows/rust_tests.yml | 33 + .gitignore | 1 + arch/Dockerfile | 6 +- arch/arch_config_schema.yaml | 1 - arch/build_filter_image.sh | 2 +- arch/docker-compose.dev.yaml | 16 +- arch/docker-compose.e2e.yaml | 17 + arch/docker-compose.yaml | 2 +- arch/envoy.template.yaml | 46 ++ arch/tools/cli/config_generator.py | 15 +- archgw.code-workspace | 4 + chatbot_ui/.vscode/launch.json | 16 +- chatbot_ui/app/arch_util.py | 20 + chatbot_ui/app/run.py | 107 ++- chatbot_ui/app/run_stream.py | 36 - crates/common/src/common_types.rs | 308 +++++++- crates/common/src/configuration.rs | 10 +- crates/common/src/consts.rs | 4 +- crates/common/src/errors.rs | 5 +- crates/common/src/tokenizer.rs | 14 +- crates/llm_gateway/src/stream_context.rs | 229 +++--- crates/llm_gateway/tests/integration.rs | 28 +- crates/prompt_gateway/src/filter_context.rs | 2 +- crates/prompt_gateway/src/hallucination.rs | 24 +- crates/prompt_gateway/src/http_context.rs | 222 +++--- crates/prompt_gateway/src/stream_context.rs | 194 +++-- crates/prompt_gateway/tests/integration.rs | 8 +- demos/function_calling/api_server/app/main.py | 6 +- demos/function_calling/arch_config.yaml | 25 +- demos/function_calling/docker-compose.yaml | 6 +- demos/llm_routing/arch_config.yaml | 32 + demos/llm_routing/docker-compose.yaml | 14 + e2e_tests/.vscode/settings.json | 7 + e2e_tests/README.md | 34 + e2e_tests/common.py | 42 ++ e2e_tests/common_scripts.sh | 33 + e2e_tests/poetry.lock | 702 ++++++++++++++++++ e2e_tests/pyproject.toml | 23 + e2e_tests/run_e2e_tests.sh | 80 ++ e2e_tests/test_llm_gateway.py | 36 + e2e_tests/test_prompt_gateway.py | 262 +++++++ model_server/app/cli.py | 2 +- 45 files changed, 2266 insertions(+), 477 deletions(-) delete mode 100644 .github/workflows/checks.yml create mode 100644 .github/workflows/e2e_tests.yml create mode 100644 .github/workflows/rust_tests.yml create mode 100644 arch/docker-compose.e2e.yaml create mode 100644 chatbot_ui/app/arch_util.py delete mode 100644 chatbot_ui/app/run_stream.py create mode 100644 demos/llm_routing/arch_config.yaml create mode 100644 demos/llm_routing/docker-compose.yaml create mode 100644 e2e_tests/.vscode/settings.json create mode 100644 e2e_tests/README.md create mode 100644 e2e_tests/common.py create mode 100644 e2e_tests/common_scripts.sh create mode 100644 e2e_tests/poetry.lock create mode 100644 e2e_tests/pyproject.toml create mode 100644 e2e_tests/run_e2e_tests.sh create mode 100644 e2e_tests/test_llm_gateway.py create mode 100644 e2e_tests/test_prompt_gateway.py diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml deleted file mode 100644 index ac33c76c..00000000 --- a/.github/workflows/checks.yml +++ /dev/null @@ -1,35 +0,0 @@ -name: Checks - -on: - pull_request: - push: - branches: [main] - -jobs: - test: - name: Test - runs-on: ubuntu-latest - steps: - - name: Setup | Checkout - uses: actions/checkout@v4 - - - name: Setup | Rust - run: rustup toolchain install stable --profile minimal - - - name: Setup | Install wasm toolchain - run: rustup target add wasm32-wasi - - - name: Run Tests on common crate - run: cd crates/common && cargo test - - - name: Build wasm module for prompt_gateway - run: cd crates/prompt_gateway && cargo build --release --target=wasm32-wasi - - - name: Run Tests on prompt_gateway crate - run: cd crates/prompt_gateway && cargo test - - - name: Build wasm module for llm_gateway - run: cd crates/llm_gateway && cargo build --release --target=wasm32-wasi - - - name: Run Tests on llm_gateway crate - run: cd crates/llm_gateway && cargo test diff --git a/.github/workflows/e2e_tests.yml b/.github/workflows/e2e_tests.yml new file mode 100644 index 00000000..2d53a6d6 --- /dev/null +++ b/.github/workflows/e2e_tests.yml @@ -0,0 +1,32 @@ +name: e2e tests + +on: + push: + branches: + - main # Run tests on pushes to the main branch + pull_request: + +jobs: + test: + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v3 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: "3.10" + + - name: Install Poetry + run: | + curl -sSL https://install.python-poetry.org | python3 - + export PATH="$HOME/.local/bin:$PATH" + + - name: Run e2e tests + env: + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + MISTRAL_API_KEY: ${{ secrets.MISTRAL_API_KEY }} + run: | + cd e2e_tests && bash run_e2e_tests.sh diff --git a/.github/workflows/model-server-tests.yml b/.github/workflows/model-server-tests.yml index 1b33b5fc..64489d34 100644 --- a/.github/workflows/model-server-tests.yml +++ b/.github/workflows/model-server-tests.yml @@ -1,4 +1,4 @@ -name: Run Model Server tests +name: model server tests on: push: diff --git a/.github/workflows/rust_tests.yml b/.github/workflows/rust_tests.yml new file mode 100644 index 00000000..548e74e1 --- /dev/null +++ b/.github/workflows/rust_tests.yml @@ -0,0 +1,33 @@ +name: rust tests (prompt and llm gateway) + +on: + pull_request: + push: + branches: [main] + +jobs: + test: + name: Test + runs-on: ubuntu-latest + defaults: + run: + working-directory: ./crates + + steps: + - name: Setup | Checkout + uses: actions/checkout@v4 + + - name: Setup | Rust + run: rustup toolchain install stable --profile minimal + + - name: Setup | Install wasm toolchain + run: rustup target add wasm32-wasi + + - name: Build wasm module + run: cargo build --release --target=wasm32-wasi + + - name: Run unit tests + run: cargo test --lib + + - name: Run integration tests + run: cargo test --test integration diff --git a/.gitignore b/.gitignore index 9ed22f96..1a25cc1c 100644 --- a/.gitignore +++ b/.gitignore @@ -31,3 +31,4 @@ arch_logs/ dist/ crates/*/target/ crates/target/ +build.log diff --git a/arch/Dockerfile b/arch/Dockerfile index 073c0b6b..85721f58 100644 --- a/arch/Dockerfile +++ b/arch/Dockerfile @@ -12,6 +12,9 @@ FROM envoyproxy/envoy:v1.31-latest as envoy #Build config generator, so that we have a single build image for both Rust and Python FROM python:3-slim as arch + +RUN apt-get update && apt-get install -y gettext-base && apt-get clean && rm -rf /var/lib/apt/lists/* + COPY --from=builder /arch/target/wasm32-wasi/release/prompt_gateway.wasm /etc/envoy/proxy-wasm-plugins/prompt_gateway.wasm COPY --from=builder /arch/target/wasm32-wasi/release/llm_gateway.wasm /etc/envoy/proxy-wasm-plugins/llm_gateway.wasm COPY --from=envoy /usr/local/bin/envoy /usr/local/bin/envoy @@ -22,4 +25,5 @@ COPY arch/tools/cli/config_generator.py . COPY arch/envoy.template.yaml . COPY arch/arch_config_schema.yaml . -CMD ["sh", "-c", "python config_generator.py && envoy -c /etc/envoy/envoy.yaml --component-log-level wasm:debug"] + +ENTRYPOINT ["sh", "-c", "python config_generator.py && envsubst < /etc/envoy/envoy.yaml > /etc/envoy.env_sub.yaml && envoy -c /etc/envoy.env_sub.yaml --component-log-level wasm:debug"] diff --git a/arch/arch_config_schema.yaml b/arch/arch_config_schema.yaml index 9b63840e..142fe338 100644 --- a/arch/arch_config_schema.yaml +++ b/arch/arch_config_schema.yaml @@ -160,4 +160,3 @@ required: - version - listener - llm_providers - - prompt_targets diff --git a/arch/build_filter_image.sh b/arch/build_filter_image.sh index a0b6f55b..75ac81ce 100644 --- a/arch/build_filter_image.sh +++ b/arch/build_filter_image.sh @@ -1 +1 @@ -docker build -t archgw .. -f Dockerfile +docker build -f Dockerfile .. -t katanemo/archgw diff --git a/arch/docker-compose.dev.yaml b/arch/docker-compose.dev.yaml index 36c364bb..fdf024c6 100644 --- a/arch/docker-compose.dev.yaml +++ b/arch/docker-compose.dev.yaml @@ -1,6 +1,6 @@ services: archgw: - image: archgw:latest + image: katanemo/archgw:latest ports: - "10000:10000" - "11000:11000" @@ -10,9 +10,13 @@ services: - ${ARCH_CONFIG_FILE:-../demos/function_calling/arch_config.yaml}:/config/arch_config.yaml - /etc/ssl/cert.pem:/etc/ssl/cert.pem - ./envoy.template.yaml:/config/envoy.template.yaml - - ./target/wasm32-wasi/release/intelligent_prompt_gateway.wasm:/etc/envoy/proxy-wasm-plugins/intelligent_prompt_gateway.wasm - ./arch_config_schema.yaml:/config/arch_config_schema.yaml - - ./tools/config_generator.py:/config/config_generator.py - - ./arch_logs:/var/log/ - env_file: - - stage.env + - ./tools/cli/config_generator.py:/config/config_generator.py + - ../crates/target/wasm32-wasi/release/llm_gateway.wasm:/etc/envoy/proxy-wasm-plugins/llm_gateway.wasm + - ../crates/target/wasm32-wasi/release/prompt_gateway.wasm:/etc/envoy/proxy-wasm-plugins/prompt_gateway.wasm + - ~/archgw_logs:/var/log/ + extra_hosts: + - "host.docker.internal:host-gateway" + environment: + - OPENAI_API_KEY=${OPENAI_API_KEY:?error} + - MISTRAL_API_KEY=${MISTRAL_API_KEY:?error} diff --git a/arch/docker-compose.e2e.yaml b/arch/docker-compose.e2e.yaml new file mode 100644 index 00000000..ebff459e --- /dev/null +++ b/arch/docker-compose.e2e.yaml @@ -0,0 +1,17 @@ +services: + archgw: + image: katanemo/archgw:latest + ports: + - "10000:10000" + - "11000:11000" + - "12000:12000" + - "19901:9901" + volumes: + - ${ARCH_CONFIG_FILE:-../demos/function_calling/arch_config.yaml}:/config/arch_config.yaml + - /etc/ssl/cert.pem:/etc/ssl/cert.pem + - ~/archgw_logs:/var/log/ + extra_hosts: + - "host.docker.internal:host-gateway" + environment: + - OPENAI_API_KEY=${OPENAI_API_KEY:?error} + - MISTRAL_API_KEY=${MISTRAL_API_KEY:?error} diff --git a/arch/docker-compose.yaml b/arch/docker-compose.yaml index 0a2e5a99..78345547 100644 --- a/arch/docker-compose.yaml +++ b/arch/docker-compose.yaml @@ -7,7 +7,7 @@ services: - "12000:12000" - "19901:9901" volumes: - - ${ARCH_CONFIG_FILE:-./demos/function_calling/arch_confg.yaml}:/config/arch_config.yaml + - ${ARCH_CONFIG_FILE:-../demos/function_calling/arch_config.yaml}:/config/arch_config.yaml - /etc/ssl/cert.pem:/etc/ssl/cert.pem - ~/archgw_logs:/var/log/ env_file: diff --git a/arch/envoy.template.yaml b/arch/envoy.template.yaml index 14e26e84..a1ed4472 100644 --- a/arch/envoy.template.yaml +++ b/arch/envoy.template.yaml @@ -52,6 +52,15 @@ static_resources: cluster: arch_llm_listener timeout: 60s http_filters: + - name: envoy.filters.http.compressor + typed_config: + "@type": type.googleapis.com/envoy.extensions.filters.http.compressor.v3.Compressor + compressor_library: + name: compress + typed_config: + "@type": type.googleapis.com/envoy.extensions.compression.gzip.compressor.v3.Gzip + memory_level: 3 + window_bits: 10 - name: envoy.filters.http.wasm typed_config: "@type": type.googleapis.com/udpa.type.v1.TypedStruct @@ -69,6 +78,17 @@ static_resources: code: local: filename: "/etc/envoy/proxy-wasm-plugins/prompt_gateway.wasm" + - name: envoy.filters.http.decompressor + typed_config: + "@type": type.googleapis.com/envoy.extensions.filters.http.decompressor.v3.Decompressor + decompressor_library: + name: decompress + typed_config: + "@type": "type.googleapis.com/envoy.extensions.compression.gzip.decompressor.v3.Gzip" + window_bits: 9 + chunk_size: 8192 + # If this ratio is set too low, then body data will not be decompressed completely. + max_inflate_ratio: 1000 - name: envoy.filters.http.router typed_config: "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router @@ -187,6 +207,12 @@ static_resources: domains: - "*" routes: + - match: + prefix: "/healthz" + route: + auto_host_rewrite: true + cluster: openai + timeout: 60s {% for provider in arch_llm_providers %} - match: prefix: "/" @@ -206,6 +232,15 @@ static_resources: body: inline_string: "x-arch-llm-provider header not set, llm gateway cannot perform routing\n" http_filters: + - name: envoy.filters.http.compressor + typed_config: + "@type": type.googleapis.com/envoy.extensions.filters.http.compressor.v3.Compressor + compressor_library: + name: compress + typed_config: + "@type": type.googleapis.com/envoy.extensions.compression.gzip.compressor.v3.Gzip + memory_level: 3 + window_bits: 10 - name: envoy.filters.http.wasm typed_config: "@type": type.googleapis.com/udpa.type.v1.TypedStruct @@ -223,6 +258,17 @@ static_resources: code: local: filename: "/etc/envoy/proxy-wasm-plugins/llm_gateway.wasm" + - name: envoy.filters.http.decompressor + typed_config: + "@type": type.googleapis.com/envoy.extensions.filters.http.decompressor.v3.Decompressor + decompressor_library: + name: decompress + typed_config: + "@type": "type.googleapis.com/envoy.extensions.compression.gzip.decompressor.v3.Gzip" + window_bits: 9 + chunk_size: 8192 + # If this ratio is set too low, then body data will not be decompressed completely. + max_inflate_ratio: 1000 - name: envoy.filters.http.router typed_config: "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router diff --git a/arch/tools/cli/config_generator.py b/arch/tools/cli/config_generator.py index 33741ee9..1e5fd4a3 100644 --- a/arch/tools/cli/config_generator.py +++ b/arch/tools/cli/config_generator.py @@ -47,13 +47,14 @@ def validate_and_render_schema(): config_schema_yaml = yaml.safe_load(arch_config_schema) inferred_clusters = {} - for prompt_target in config_yaml["prompt_targets"]: - name = prompt_target.get("endpoint", {}).get("name", "") - if name not in inferred_clusters: - inferred_clusters[name] = { - "name": name, - "port": 80, # default port - } + if "prompt_targets" in config_yaml: + for prompt_target in config_yaml["prompt_targets"]: + name = prompt_target.get("endpoint", {}).get("name", "") + if name not in inferred_clusters: + inferred_clusters[name] = { + "name": name, + "port": 80, # default port + } print(inferred_clusters) endpoints = config_yaml.get("endpoints", {}) diff --git a/archgw.code-workspace b/archgw.code-workspace index 9148057d..230e2225 100644 --- a/archgw.code-workspace +++ b/archgw.code-workspace @@ -20,6 +20,10 @@ "name": "chatbot_ui", "path": "chatbot_ui" }, + { + "name": "e2e_tests", + "path": "e2e_tests" + }, { "name": "demos/function_calling", "path": "./demos/function_calling", diff --git a/chatbot_ui/.vscode/launch.json b/chatbot_ui/.vscode/launch.json index 8b42a191..2064a252 100644 --- a/chatbot_ui/.vscode/launch.json +++ b/chatbot_ui/.vscode/launch.json @@ -14,7 +14,9 @@ "console": "integratedTerminal", "env": { "LLM": "1", - "CHAT_COMPLETION_ENDPOINT": "http://localhost:10000/v1" + "CHAT_COMPLETION_ENDPOINT": "http://localhost:10000/v1", + "STREAMING": "True", + "ARCH_CONFIG": "../../demos/function_calling/arch_config.yaml" } }, { @@ -29,17 +31,5 @@ "CHAT_COMPLETION_ENDPOINT": "http://localhost:12000/v1" } }, - { - "name": "chatbot-ui streaming", - "cwd": "${workspaceFolder}/app", - "type": "debugpy", - "request": "launch", - "program": "run_stream.py", - "console": "integratedTerminal", - "env": { - "LLM": "1", - "CHAT_COMPLETION_ENDPOINT": "http://localhost:10000/v1" - } - } ] } diff --git a/chatbot_ui/app/arch_util.py b/chatbot_ui/app/arch_util.py new file mode 100644 index 00000000..567640e5 --- /dev/null +++ b/chatbot_ui/app/arch_util.py @@ -0,0 +1,20 @@ +import json + + +ARCH_STATE_HEADER = "x-arch-state" + + +def get_arch_messages(response_json): + arch_messages = [] + if response_json and "metadata" in response_json: + # load arch_state from metadata + arch_state_str = response_json.get("metadata", {}).get(ARCH_STATE_HEADER, "{}") + # parse arch_state into json object + arch_state = json.loads(arch_state_str) + # load messages from arch_state + arch_messages_str = arch_state.get("messages", "[]") + # parse messages into json object + arch_messages = json.loads(arch_messages_str) + # append messages from arch gateway to history + return arch_messages + return [] diff --git a/chatbot_ui/app/run.py b/chatbot_ui/app/run.py index 05a6a6db..b0d5acc6 100644 --- a/chatbot_ui/app/run.py +++ b/chatbot_ui/app/run.py @@ -2,14 +2,17 @@ import json import os import logging import yaml +from arch_util import get_arch_messages import gradio as gr from typing import List, Optional, Tuple -from openai import OpenAI, DefaultHttpxClient +from openai import OpenAI from dotenv import load_dotenv load_dotenv() +STREAM_RESPONSE = bool(os.getenv("STREAM_RESPOSE", True)) + logging.basicConfig( level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s", @@ -20,7 +23,6 @@ log = logging.getLogger(__name__) CHAT_COMPLETION_ENDPOINT = os.getenv("CHAT_COMPLETION_ENDPOINT") log.info(f"CHAT_COMPLETION_ENDPOINT: {CHAT_COMPLETION_ENDPOINT}") -ARCH_STATE_HEADER = "x-arch-state" CSS_STYLE = """ .json-container { @@ -37,7 +39,7 @@ footer {visibility: hidden} client = OpenAI( api_key="--", base_url=CHAT_COMPLETION_ENDPOINT, - http_client=DefaultHttpxClient(headers={"accept-encoding": "*"}), + # http_client=DefaultHttpxClient(headers={"accept-encoding": "*"}), ) @@ -69,7 +71,7 @@ def convert_prompt_target_to_openai_format(target): def get_prompt_targets(): try: - with open("arch_config.yaml", "r") as file: + with open(os.getenv("ARCH_CONFIG", "arch_config.yaml"), "r") as file: config = yaml.safe_load(file) available_tools = [] @@ -105,48 +107,85 @@ def chat(query: Optional[str], conversation: Optional[List[Tuple[str, str]]], st temperature=1.0, # metadata=metadata, extra_headers=custom_headers, + stream=STREAM_RESPONSE, ) except Exception as e: log.info(e) # remove last user message in case of exception history.pop() - log.info("Error calling gateway API: {}".format(e.message)) - raise gr.Error("Error calling gateway API: {}".format(e.message)) + log.info("Error calling gateway API: {}".format(e)) + raise gr.Error("Error calling gateway API: {}".format(e)) - log.error(f"raw_response: {raw_response.text}") - response = raw_response.parse() + if STREAM_RESPONSE: + response = raw_response.parse() + history.append({"role": "assistant", "content": "", "model": ""}) + # for gradio UI we don't want to show raw tool calls and messages from developer application + # so we're filtering those out + history_view = [h for h in history if h["role"] != "tool" and "content" in h] - # extract arch_state from metadata and store it in gradio session state - # this state must be passed back to the gateway in the next request - response_json = json.loads(raw_response.text) - log.info(response_json) - if response_json and "metadata" in response_json: - # load arch_state from metadata - arch_state_str = response_json.get("metadata", {}).get(ARCH_STATE_HEADER, "{}") - # parse arch_state into json object - arch_state = json.loads(arch_state_str) - # load messages from arch_state - arch_messages_str = arch_state.get("messages", "[]") - # parse messages into json object - arch_messages = json.loads(arch_messages_str) - # append messages from arch gateway to history - for message in arch_messages: - history.append(message) + messages = [ + (history_view[i]["content"], history_view[i + 1]["content"]) + for i in range(0, len(history_view) - 1, 2) + ] - content = response.choices[0].message.content + for chunk in response: + if len(chunk.choices) > 0: + if chunk.choices[0].delta.role: + if history[-1]["role"] != chunk.choices[0].delta.role: + history.append( + { + "role": chunk.choices[0].delta.role, + "content": chunk.choices[0].delta.content, + "model": chunk.model, + "tool_calls": chunk.choices[0].delta.tool_calls, + } + ) - history.append({"role": "assistant", "content": content, "model": response.model}) + history[-1]["model"] = chunk.model + if chunk.choices[0].delta.content: + if not history[-1]["content"]: + history[-1]["content"] = "" + history[-1]["content"] = ( + history[-1]["content"] + chunk.choices[0].delta.content + ) + if chunk.choices[0].delta.tool_calls: + history[-1]["tool_calls"] = chunk.choices[0].delta.tool_calls - # for gradio UI we don't want to show raw tool calls and messages from developer application - # so we're filtering those out - history_view = [h for h in history if h["role"] != "tool" and "content" in h] + if chunk.model and chunk.choices[0].delta.content: + messages[-1] = ( + messages[-1][0], + messages[-1][1] + chunk.choices[0].delta.content, + ) + yield "", messages, state + else: + log.error(f"raw_response: {raw_response.text}") + response = raw_response.parse() - messages = [ - (history_view[i]["content"], history_view[i + 1]["content"]) - for i in range(0, len(history_view) - 1, 2) - ] + # extract arch_state from metadata and store it in gradio session state + # this state must be passed back to the gateway in the next request + response_json = json.loads(raw_response.text) + log.info(response_json) - return "", messages, state + arch_messages = get_arch_messages(response_json) + for arch_message in arch_messages: + history.append(arch_message) + + content = response.choices[0].message.content + + history.append( + {"role": "assistant", "content": content, "model": response.model} + ) + + # for gradio UI we don't want to show raw tool calls and messages from developer application + # so we're filtering those out + history_view = [h for h in history if h["role"] != "tool" and "content" in h] + + messages = [ + (history_view[i]["content"], history_view[i + 1]["content"]) + for i in range(0, len(history_view) - 1, 2) + ] + + yield "", messages, state def main(): diff --git a/chatbot_ui/app/run_stream.py b/chatbot_ui/app/run_stream.py deleted file mode 100644 index 8be5a16b..00000000 --- a/chatbot_ui/app/run_stream.py +++ /dev/null @@ -1,36 +0,0 @@ -# copied from https://www.gradio.app/guides/creating-a-chatbot-fast#a-streaming-example-using-openai - -import os -from openai import OpenAI -import gradio as gr - -api_key = os.getenv("OPENAI_API_KEY") -CHAT_COMPLETION_ENDPOINT = os.getenv( - "CHAT_COMPLETION_ENDPOINT", "https://api.openai.com/v1" -) - -client = OpenAI(api_key=api_key, base_url=CHAT_COMPLETION_ENDPOINT) - - -def predict(message, history): - history_openai_format = [] - for human, assistant in history: - history_openai_format.append({"role": "user", "content": human}) - history_openai_format.append({"role": "assistant", "content": assistant}) - history_openai_format.append({"role": "user", "content": message}) - - response = client.chat.completions.create( - model="gpt-3.5-turbo", - messages=history_openai_format, - temperature=1.0, - stream=True, - ) - - partial_message = "" - for chunk in response: - if chunk.choices[0].delta.content is not None: - partial_message = partial_message + chunk.choices[0].delta.content - yield partial_message - - -gr.ChatInterface(predict).launch(server_name="0.0.0.0", server_port=8081) diff --git a/crates/common/src/common_types.rs b/crates/common/src/common_types.rs index c8f91e0f..35404096 100644 --- a/crates/common/src/common_types.rs +++ b/crates/common/src/common_types.rs @@ -34,11 +34,16 @@ pub struct SearchPointResult { } pub mod open_ai { - use std::collections::HashMap; + use std::{ + collections::{HashMap, VecDeque}, + fmt::Display, + }; use serde::{ser::SerializeMap, Deserialize, Serialize}; use serde_yaml::Value; + use crate::consts::{ARCH_FC_MODEL_NAME, ASSISTANT_ROLE}; + #[derive(Debug, Clone, Serialize, Deserialize)] pub struct ChatCompletionsRequest { #[serde(default)] @@ -182,12 +187,16 @@ pub mod open_ai { #[derive(Debug, Clone, Serialize, Deserialize)] pub struct Message { pub role: String, + #[serde(skip_serializing_if = "Option::is_none")] pub content: Option, + #[serde(skip_serializing_if = "Option::is_none")] pub model: Option, + #[serde(skip_serializing_if = "Option::is_none")] pub tool_calls: Option>, + #[serde(skip_serializing_if = "Option::is_none")] pub tool_call_id: Option, } @@ -235,17 +244,116 @@ pub mod open_ai { pub metadata: Option>, } + impl ChatCompletionsResponse { + pub fn new(message: String) -> Self { + ChatCompletionsResponse { + choices: vec![Choice { + message: Message { + role: ASSISTANT_ROLE.to_string(), + content: Some(message), + model: Some(ARCH_FC_MODEL_NAME.to_string()), + tool_calls: None, + tool_call_id: None, + }, + index: 0, + finish_reason: "done".to_string(), + }], + usage: None, + model: ARCH_FC_MODEL_NAME.to_string(), + metadata: None, + } + } + } + #[derive(Debug, Clone, Serialize, Deserialize)] pub struct Usage { pub completion_tokens: usize, } #[derive(Debug, Clone, Serialize, Deserialize)] - pub struct ChatCompletionChunkResponse { - pub model: String, + pub struct ChatCompletionStreamResponse { + #[serde(skip_serializing_if = "Option::is_none")] + pub model: Option, pub choices: Vec, } + impl ChatCompletionStreamResponse { + pub fn new( + response: Option, + role: Option, + model: Option, + tool_calls: Option>, + ) -> Self { + ChatCompletionStreamResponse { + model, + choices: vec![ChunkChoice { + delta: Delta { + role, + content: response, + tool_calls, + model: None, + tool_call_id: None, + }, + finish_reason: None, + }], + } + } + } + + #[derive(Debug, thiserror::Error)] + pub enum ChatCompletionChunkResponseError { + #[error("failed to deserialize")] + Deserialization(#[from] serde_json::Error), + #[error("empty content in data chunk")] + EmptyContent, + #[error("no chunks present")] + NoChunks, + } + + pub struct ChatCompletionStreamResponseServerEvents { + pub events: Vec, + } + + impl Display for ChatCompletionStreamResponseServerEvents { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let tokens_str = self + .events + .iter() + .map(|response_chunk| { + if response_chunk.choices.is_empty() { + return "".to_string(); + } + response_chunk.choices[0] + .delta + .content + .clone() + .unwrap_or("".to_string()) + }) + .collect::>() + .join(""); + + write!(f, "{}", tokens_str) + } + } + + impl TryFrom<&str> for ChatCompletionStreamResponseServerEvents { + type Error = ChatCompletionChunkResponseError; + + fn try_from(value: &str) -> Result { + let response_chunks: VecDeque = value + .lines() + .filter(|line| line.starts_with("data: ")) + .map(|line| line.get(6..).unwrap()) + .filter(|data_chunk| *data_chunk != "[DONE]") + .map(serde_json::from_str::) + .collect::, _>>()?; + + Ok(ChatCompletionStreamResponseServerEvents { + events: response_chunks.into(), + }) + } + } + #[derive(Debug, Clone, Serialize, Deserialize)] pub struct ChunkChoice { pub delta: Delta, @@ -255,7 +363,30 @@ pub mod open_ai { #[derive(Debug, Clone, Serialize, Deserialize)] pub struct Delta { + #[serde(skip_serializing_if = "Option::is_none")] + pub role: Option, + + #[serde(skip_serializing_if = "Option::is_none")] pub content: Option, + + #[serde(skip_serializing_if = "Option::is_none")] + pub tool_calls: Option>, + + #[serde(skip_serializing_if = "Option::is_none")] + pub model: Option, + + #[serde(skip_serializing_if = "Option::is_none")] + pub tool_call_id: Option, + } + + pub fn to_server_events(chunks: Vec) -> String { + let mut response_str = String::new(); + for chunk in chunks.iter() { + response_str.push_str("data: "); + response_str.push_str(&serde_json::to_string(&chunk).unwrap()); + response_str.push_str("\n\n"); + } + response_str } } @@ -313,7 +444,7 @@ pub struct PromptGuardResponse { #[cfg(test)] mod test { - use crate::common_types::open_ai::Message; + use crate::common_types::open_ai::{ChatCompletionStreamResponseServerEvents, Message}; use pretty_assertions::{assert_eq, assert_ne}; use std::collections::HashMap; @@ -448,4 +579,173 @@ mod test { ParameterType::String ); } + + #[test] + fn stream_chunk_parse() { + use super::open_ai::{ChatCompletionStreamResponse, ChunkChoice, Delta}; + + const CHUNK_RESPONSE: &str = r#"data: {"id":"chatcmpl-ALmdmtKulBMEq3fRLbrnxJwcKOqvS","object":"chat.completion.chunk","created":1729755226,"model":"gpt-3.5-turbo-0125","system_fingerprint":null,"choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}]} + +data: {"id":"chatcmpl-ALmdmtKulBMEq3fRLbrnxJwcKOqvS","object":"chat.completion.chunk","created":1729755226,"model":"gpt-3.5-turbo-0125","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"Hello"},"logprobs":null,"finish_reason":null}]} + +data: {"id":"chatcmpl-ALmdmtKulBMEq3fRLbrnxJwcKOqvS","object":"chat.completion.chunk","created":1729755226,"model":"gpt-3.5-turbo-0125","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"!"},"logprobs":null,"finish_reason":null}]} + +data: {"id":"chatcmpl-ALmdmtKulBMEq3fRLbrnxJwcKOqvS","object":"chat.completion.chunk","created":1729755226,"model":"gpt-3.5-turbo-0125","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" How"},"logprobs":null,"finish_reason":null}]} + +data: {"id":"chatcmpl-ALmdmtKulBMEq3fRLbrnxJwcKOqvS","object":"chat.completion.chunk","created":1729755226,"model":"gpt-3.5-turbo-0125","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" can"},"logprobs":null,"finish_reason":null}]} + + +"#; + + let sever_events = + ChatCompletionStreamResponseServerEvents::try_from(CHUNK_RESPONSE).unwrap(); + assert_eq!(sever_events.events.len(), 5); + assert_eq!( + sever_events.events[0].choices[0] + .delta + .content + .as_ref() + .unwrap(), + "" + ); + assert_eq!( + sever_events.events[1].choices[0] + .delta + .content + .as_ref() + .unwrap(), + "Hello" + ); + assert_eq!( + sever_events.events[2].choices[0] + .delta + .content + .as_ref() + .unwrap(), + "!" + ); + assert_eq!( + sever_events.events[3].choices[0] + .delta + .content + .as_ref() + .unwrap(), + " How" + ); + assert_eq!( + sever_events.events[4].choices[0] + .delta + .content + .as_ref() + .unwrap(), + " can" + ); + assert_eq!(sever_events.to_string(), "Hello! How can"); + } + + #[test] + fn stream_chunk_parse_done() { + use super::open_ai::{ChatCompletionStreamResponse, ChunkChoice, Delta}; + + const CHUNK_RESPONSE: &str = r#"data: {"id":"chatcmpl-ALn2KTfmrIpYd9N3Un4Kyg08WIIP6","object":"chat.completion.chunk","created":1729756748,"model":"gpt-3.5-turbo-0125","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" I"},"logprobs":null,"finish_reason":null}]} + +data: {"id":"chatcmpl-ALn2KTfmrIpYd9N3Un4Kyg08WIIP6","object":"chat.completion.chunk","created":1729756748,"model":"gpt-3.5-turbo-0125","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" assist"},"logprobs":null,"finish_reason":null}]} + +data: {"id":"chatcmpl-ALn2KTfmrIpYd9N3Un4Kyg08WIIP6","object":"chat.completion.chunk","created":1729756748,"model":"gpt-3.5-turbo-0125","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" you"},"logprobs":null,"finish_reason":null}]} + +data: {"id":"chatcmpl-ALn2KTfmrIpYd9N3Un4Kyg08WIIP6","object":"chat.completion.chunk","created":1729756748,"model":"gpt-3.5-turbo-0125","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" today"},"logprobs":null,"finish_reason":null}]} + +data: {"id":"chatcmpl-ALn2KTfmrIpYd9N3Un4Kyg08WIIP6","object":"chat.completion.chunk","created":1729756748,"model":"gpt-3.5-turbo-0125","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}]} + +data: {"id":"chatcmpl-ALn2KTfmrIpYd9N3Un4Kyg08WIIP6","object":"chat.completion.chunk","created":1729756748,"model":"gpt-3.5-turbo-0125","system_fingerprint":null,"choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}]} + +data: [DONE] +"#; + + let sever_events: ChatCompletionStreamResponseServerEvents = + ChatCompletionStreamResponseServerEvents::try_from(CHUNK_RESPONSE).unwrap(); + assert_eq!(sever_events.events.len(), 6); + assert_eq!( + sever_events.events[0].choices[0] + .delta + .content + .as_ref() + .unwrap(), + " I" + ); + assert_eq!( + sever_events.events[1].choices[0] + .delta + .content + .as_ref() + .unwrap(), + " assist" + ); + assert_eq!( + sever_events.events[2].choices[0] + .delta + .content + .as_ref() + .unwrap(), + " you" + ); + assert_eq!( + sever_events.events[3].choices[0] + .delta + .content + .as_ref() + .unwrap(), + " today" + ); + assert_eq!( + sever_events.events[4].choices[0] + .delta + .content + .as_ref() + .unwrap(), + "?" + ); + assert_eq!(sever_events.events[5].choices[0].delta.content, None); + + assert_eq!(sever_events.to_string(), " I assist you today?"); + } + + #[test] + fn stream_chunk_parse_mistral() { + use super::open_ai::{ChatCompletionStreamResponse, ChunkChoice, Delta}; + + const CHUNK_RESPONSE: &str = r#"data: {"id":"e1ebce16de5443b79613512c2d757936","object":"chat.completion.chunk","created":1729805261,"model":"ministral-8b-latest","choices":[{"index":0,"delta":{"role":"assistant","content":""},"finish_reason":null}]} + +data: {"id":"e1ebce16de5443b79613512c2d757936","object":"chat.completion.chunk","created":1729805261,"model":"ministral-8b-latest","choices":[{"index":0,"delta":{"content":"Hello"},"finish_reason":null}]} + +data: {"id":"e1ebce16de5443b79613512c2d757936","object":"chat.completion.chunk","created":1729805261,"model":"ministral-8b-latest","choices":[{"index":0,"delta":{"content":"!"},"finish_reason":null}]} + +data: {"id":"e1ebce16de5443b79613512c2d757936","object":"chat.completion.chunk","created":1729805261,"model":"ministral-8b-latest","choices":[{"index":0,"delta":{"content":" How"},"finish_reason":null}]} + +data: {"id":"e1ebce16de5443b79613512c2d757936","object":"chat.completion.chunk","created":1729805261,"model":"ministral-8b-latest","choices":[{"index":0,"delta":{"content":" can"},"finish_reason":null}]} + +data: {"id":"e1ebce16de5443b79613512c2d757936","object":"chat.completion.chunk","created":1729805261,"model":"ministral-8b-latest","choices":[{"index":0,"delta":{"content":" I"},"finish_reason":null}]} + +data: {"id":"e1ebce16de5443b79613512c2d757936","object":"chat.completion.chunk","created":1729805261,"model":"ministral-8b-latest","choices":[{"index":0,"delta":{"content":" assist"},"finish_reason":null}]} + +data: {"id":"e1ebce16de5443b79613512c2d757936","object":"chat.completion.chunk","created":1729805261,"model":"ministral-8b-latest","choices":[{"index":0,"delta":{"content":" you"},"finish_reason":null}]} + +data: {"id":"e1ebce16de5443b79613512c2d757936","object":"chat.completion.chunk","created":1729805261,"model":"ministral-8b-latest","choices":[{"index":0,"delta":{"content":" today"},"finish_reason":null}]} + +data: {"id":"e1ebce16de5443b79613512c2d757936","object":"chat.completion.chunk","created":1729805261,"model":"ministral-8b-latest","choices":[{"index":0,"delta":{"content":"?"},"finish_reason":null}]} + +data: {"id":"e1ebce16de5443b79613512c2d757936","object":"chat.completion.chunk","created":1729805261,"model":"ministral-8b-latest","choices":[{"index":0,"delta":{"content":""},"finish_reason":"stop"}],"usage":{"prompt_tokens":4,"total_tokens":13,"completion_tokens":9}} + +data: [DONE] +"#; + + let sever_events: ChatCompletionStreamResponseServerEvents = + ChatCompletionStreamResponseServerEvents::try_from(CHUNK_RESPONSE).unwrap(); + assert_eq!(sever_events.events.len(), 11); + + assert_eq!( + sever_events.to_string(), + "Hello! How can I assist you today?" + ); + } } diff --git a/crates/common/src/configuration.rs b/crates/common/src/configuration.rs index 293dad09..ef57845a 100644 --- a/crates/common/src/configuration.rs +++ b/crates/common/src/configuration.rs @@ -27,12 +27,12 @@ pub enum GatewayMode { pub struct Configuration { pub version: String, pub listener: Listener, - pub endpoints: HashMap, + pub endpoints: Option>, pub llm_providers: Vec, pub overrides: Option, pub system_prompt: Option, pub prompt_guards: Option, - pub prompt_targets: Vec, + pub prompt_targets: Option>, pub error_target: Option, pub ratelimits: Option>, pub tracing: Option, @@ -246,8 +246,10 @@ mod test { ); let prompt_targets = &config.prompt_targets; - assert_eq!(prompt_targets.len(), 2); + assert_eq!(prompt_targets.as_ref().unwrap().len(), 2); let prompt_target = prompt_targets + .as_ref() + .unwrap() .iter() .find(|p| p.name == "reboot_network_device") .unwrap(); @@ -255,6 +257,8 @@ mod test { assert_eq!(prompt_target.default, None); let prompt_target = prompt_targets + .as_ref() + .unwrap() .iter() .find(|p| p.name == "information_extraction") .unwrap(); diff --git a/crates/common/src/consts.rs b/crates/common/src/consts.rs index 81df31f8..f782cf99 100644 --- a/crates/common/src/consts.rs +++ b/crates/common/src/consts.rs @@ -18,6 +18,7 @@ pub const ARCH_ROUTING_HEADER: &str = "x-arch-llm-provider"; pub const MESSAGES_KEY: &str = "messages"; pub const ARCH_PROVIDER_HINT_HEADER: &str = "x-arch-llm-provider-hint"; pub const CHAT_COMPLETIONS_PATH: &str = "/v1/chat/completions"; +pub const HEALTHZ_PATH: &str = "/healthz"; pub const ARCH_STATE_HEADER: &str = "x-arch-state"; pub const ARCH_FC_MODEL_NAME: &str = "Arch-Function-1.5B"; pub const REQUEST_ID_HEADER: &str = "x-request-id"; @@ -25,4 +26,5 @@ pub const ARCH_INTERNAL_CLUSTER_NAME: &str = "arch_internal"; pub const ARCH_UPSTREAM_HOST_HEADER: &str = "x-arch-upstream"; pub const ARCH_LLM_UPSTREAM_LISTENER: &str = "arch_llm_listener"; pub const ARCH_MODEL_PREFIX: &str = "Arch"; -pub const HALLUCINATION_TEMPLATE: &str = "It seems I’m missing some information. Could you provide the following details "; +pub const HALLUCINATION_TEMPLATE: &str = + "It seems I'm missing some information. Could you provide the following details "; diff --git a/crates/common/src/errors.rs b/crates/common/src/errors.rs index 27b0341e..6808793d 100644 --- a/crates/common/src/errors.rs +++ b/crates/common/src/errors.rs @@ -1,6 +1,7 @@ use proxy_wasm::types::Status; +use serde_json::error; -use crate::ratelimit; +use crate::{common_types::open_ai::ChatCompletionChunkResponseError, ratelimit}; #[derive(thiserror::Error, Debug)] pub enum ClientError { @@ -37,4 +38,6 @@ pub enum ServerError { ExceededRatelimit(ratelimit::Error), #[error("{why}")] BadRequest { why: String }, + #[error("error in streaming response")] + Streaming(#[from] ChatCompletionChunkResponseError), } diff --git a/crates/common/src/tokenizer.rs b/crates/common/src/tokenizer.rs index 25ac924e..aa0870f2 100644 --- a/crates/common/src/tokenizer.rs +++ b/crates/common/src/tokenizer.rs @@ -1,17 +1,19 @@ use log::debug; -#[derive(Debug, PartialEq, Eq)] +#[derive(thiserror::Error, Debug, PartialEq, Eq)] #[allow(dead_code)] pub enum Error { - UnknownModel, - FailedToTokenize, + #[error("Unknown model: {model_name}")] + UnknownModel { model_name: String }, } #[allow(dead_code)] pub fn token_count(model_name: &str, text: &str) -> Result { debug!("getting token count model={}", model_name); // Consideration: is it more expensive to instantiate the BPE object every time, or to contend the singleton? - let bpe = tiktoken_rs::get_bpe_from_model(model_name).map_err(|_| Error::UnknownModel)?; + let bpe = tiktoken_rs::get_bpe_from_model(model_name).map_err(|_| Error::UnknownModel { + model_name: model_name.to_string(), + })?; Ok(bpe.encode_ordinary(text).len()) } @@ -32,7 +34,9 @@ mod test { #[test] fn unrecognized_model() { assert_eq!( - Error::UnknownModel, + Error::UnknownModel { + model_name: "unknown".to_string() + }, token_count("unknown", "").expect_err("unknown model") ) } diff --git a/crates/llm_gateway/src/stream_context.rs b/crates/llm_gateway/src/stream_context.rs index bd2fba5e..c0e1212c 100644 --- a/crates/llm_gateway/src/stream_context.rs +++ b/crates/llm_gateway/src/stream_context.rs @@ -1,23 +1,21 @@ use crate::filter_context::WasmMetrics; use common::common_types::open_ai::{ - ArchState, ChatCompletionChunkResponse, ChatCompletionsRequest, ChatCompletionsResponse, - Message, ToolCall, ToolCallState, + ChatCompletionStreamResponseServerEvents, ChatCompletionsRequest, ChatCompletionsResponse, + StreamOptions, }; use common::configuration::LlmProvider; use common::consts::{ - ARCH_PROVIDER_HINT_HEADER, ARCH_ROUTING_HEADER, ARCH_STATE_HEADER, CHAT_COMPLETIONS_PATH, - RATELIMIT_SELECTOR_HEADER_KEY, REQUEST_ID_HEADER, USER_ROLE, + ARCH_PROVIDER_HINT_HEADER, ARCH_ROUTING_HEADER, CHAT_COMPLETIONS_PATH, + RATELIMIT_SELECTOR_HEADER_KEY, REQUEST_ID_HEADER, }; use common::errors::ServerError; use common::llm_providers::LlmProviders; use common::ratelimit::Header; use common::{ratelimit, routing, tokenizer}; use http::StatusCode; -use log::debug; +use log::{debug, trace, warn}; use proxy_wasm::traits::*; use proxy_wasm::types::*; -use serde_json::Value; -use sha2::{Digest, Sha256}; use std::num::NonZero; use std::rc::Rc; @@ -26,15 +24,10 @@ use common::stats::IncrementingMetric; pub struct StreamContext { context_id: u32, metrics: Rc, - tool_calls: Option>, - tool_call_response: Option, - arch_state: Option>, ratelimit_selector: Option
, streaming_response: bool, - user_prompt: Option, response_tokens: usize, is_chat_completions_request: bool, - chat_completions_request: Option, llm_providers: Rc, llm_provider: Option>, request_id: Option, @@ -45,13 +38,8 @@ impl StreamContext { StreamContext { context_id, metrics, - chat_completions_request: None, - tool_calls: None, - tool_call_response: None, - arch_state: None, ratelimit_selector: None, streaming_response: false, - user_prompt: None, response_tokens: 0, is_chat_completions_request: false, llm_providers, @@ -223,6 +211,21 @@ impl HttpContext for StreamContext { .clone_from(&self.llm_provider.as_ref().unwrap().model); let chat_completion_request_str = serde_json::to_string(&deserialized_body).unwrap(); + trace!( + "arch => {:?}, body: {}", + deserialized_body.model, + chat_completion_request_str + ); + + if deserialized_body.stream { + self.streaming_response = true; + } + if deserialized_body.stream && deserialized_body.stream_options.is_none() { + deserialized_body.stream_options = Some(StreamOptions { + include_usage: true, + }); + } + // enforce ratelimits on ingress if let Err(e) = self.enforce_ratelimits(&deserialized_body.model, &chat_completion_request_str) @@ -235,10 +238,6 @@ impl HttpContext for StreamContext { return Action::Continue; } - debug!( - "arch => {:?}, body: {}", - deserialized_body.model, chat_completion_request_str - ); self.set_http_request_body(0, body_size, chat_completion_request_str.as_bytes()); Action::Continue @@ -246,78 +245,112 @@ impl HttpContext for StreamContext { fn on_http_response_body(&mut self, body_size: usize, end_of_stream: bool) -> Action { debug!( - "recv [S={}] bytes={} end_stream={}", + "on_http_response_body [S={}] bytes={} end_stream={}", self.context_id, body_size, end_of_stream ); if !self.is_chat_completions_request { - if let Some(body_str) = self - .get_http_response_body(0, body_size) - .and_then(|bytes| String::from_utf8(bytes).ok()) - { - debug!("recv [S={}] body_str={}", self.context_id, body_str); - } + debug!("non-chatcompletion request"); return Action::Continue; } - if !end_of_stream { - return Action::Pause; - } - - let body = self - .get_http_response_body(0, body_size) - .expect("cant get response body"); - - if self.streaming_response { - let body_str = String::from_utf8(body).expect("body is not utf-8"); - debug!("streaming response"); - let chat_completions_data = match body_str.split_once("data: ") { - Some((_, chat_completions_data)) => chat_completions_data, + let body = if self.streaming_response { + if end_of_stream && body_size == 0 { + return Action::Continue; + } + let chunk_start = 0; + let chunk_size = body_size; + debug!( + "streaming response reading, {}..{}", + chunk_start, chunk_size + ); + let streaming_chunk = match self.get_http_response_body(0, chunk_size) { + Some(chunk) => chunk, None => { - self.send_server_error( - ServerError::LogicError(String::from("parsing error in streaming data")), - None, + warn!( + "response body empty, chunk_start: {}, chunk_size: {}", + chunk_start, chunk_size ); - return Action::Pause; + return Action::Continue; } }; - let chat_completions_chunk_response: ChatCompletionChunkResponse = - match serde_json::from_str(chat_completions_data) { - Ok(de) => de, - Err(_) => { - if chat_completions_data != "[NONE]" { - self.send_server_error( - ServerError::LogicError(String::from( - "error in streaming response", - )), - None, - ); - return Action::Continue; - } + if streaming_chunk.len() != chunk_size { + warn!( + "chunk size mismatch: read: {} != requested: {}", + streaming_chunk.len(), + chunk_size + ); + } + streaming_chunk + } else { + debug!("non streaming response bytes read: 0:{}", body_size); + match self.get_http_response_body(0, body_size) { + Some(body) => body, + None => { + warn!("non streaming response body empty"); + return Action::Continue; + } + } + }; + + let body_utf8 = match String::from_utf8(body) { + Ok(body_utf8) => body_utf8, + Err(e) => { + debug!("could not convert to utf8: {}", e); + return Action::Continue; + } + }; + + if self.streaming_response { + let chat_completions_chunk_response_events = + match ChatCompletionStreamResponseServerEvents::try_from(body_utf8.as_str()) { + Ok(response) => response, + Err(e) => { + debug!( + "invalid streaming response: body str: {}, {:?}", + body_utf8, e + ); return Action::Continue; } }; - if let Some(content) = chat_completions_chunk_response - .choices + if chat_completions_chunk_response_events.events.is_empty() { + debug!("empty streaming response"); + return Action::Continue; + } + + let mut model = chat_completions_chunk_response_events + .events .first() .unwrap() - .delta - .content - .as_ref() + .model + .clone(); + let tokens_str = chat_completions_chunk_response_events.to_string(); + //HACK: add support for tokenizing mistral and other models + //filed issue https://github.com/katanemo/arch/issues/222 + if model.as_ref().unwrap().starts_with("mistral") + || model.as_ref().unwrap().starts_with("ministral") { - let model = &chat_completions_chunk_response.model; - let token_count = tokenizer::token_count(model, content).unwrap_or(0); - self.response_tokens += token_count; + model = Some("gpt-4".to_string()); } + let token_count = + match tokenizer::token_count(model.as_ref().unwrap().as_str(), tokens_str.as_str()) + { + Ok(token_count) => token_count, + Err(e) => { + debug!("could not get token count: {:?}", e); + return Action::Continue; + } + }; + self.response_tokens += token_count; } else { debug!("non streaming response"); let chat_completions_response: ChatCompletionsResponse = - match serde_json::from_slice(&body) { + match serde_json::from_str(body_utf8.as_str()) { Ok(de) => de, Err(_e) => { - debug!("invalid response: {}", String::from_utf8_lossy(&body)); + debug!("invalid response: {}", body_utf8); return Action::Continue; } }; @@ -329,65 +362,6 @@ impl HttpContext for StreamContext { .unwrap() .completion_tokens; } - - if let Some(tool_calls) = self.tool_calls.as_ref() { - if !tool_calls.is_empty() { - if self.arch_state.is_none() { - self.arch_state = Some(Vec::new()); - } - - // compute sha hash from message history - let mut hasher = Sha256::new(); - let prompts: Vec = self - .chat_completions_request - .as_ref() - .unwrap() - .messages - .iter() - .filter(|msg| msg.role == USER_ROLE) - .map(|msg| msg.content.clone().unwrap()) - .collect(); - let prompts_merged = prompts.join("#.#"); - hasher.update(prompts_merged.clone()); - let hash_key = hasher.finalize(); - // conver hash to hex string - let hash_key_str = format!("{:x}", hash_key); - debug!("hash key: {}, prompts: {}", hash_key_str, prompts_merged); - - // create new tool call state - let tool_call_state = ToolCallState { - key: hash_key_str, - message: self.user_prompt.clone(), - tool_call: tool_calls[0].function.clone(), - tool_response: self.tool_call_response.clone().unwrap(), - }; - - // push tool call state to arch state - self.arch_state - .as_mut() - .unwrap() - .push(ArchState::ToolCall(vec![tool_call_state])); - - let mut data: Value = serde_json::from_slice(&body).unwrap(); - // use serde::Value to manipulate the json object and ensure that we don't lose any data - if let Value::Object(ref mut map) = data { - // serialize arch state and add to metadata - let arch_state_str = serde_json::to_string(&self.arch_state).unwrap(); - debug!("arch_state: {}", arch_state_str); - let metadata = map - .entry("metadata") - .or_insert(Value::Object(serde_json::Map::new())); - metadata.as_object_mut().unwrap().insert( - ARCH_STATE_HEADER.to_string(), - serde_json::Value::String(arch_state_str), - ); - - let data_serialized = serde_json::to_string(&data).unwrap(); - debug!("arch => user: {}", data_serialized); - self.set_http_response_body(0, body_size, data_serialized.as_bytes()); - }; - } - } } debug!( @@ -395,7 +369,6 @@ impl HttpContext for StreamContext { self.context_id, self.response_tokens, end_of_stream ); - // TODO:: ratelimit based on response tokens. Action::Continue } } diff --git a/crates/llm_gateway/tests/integration.rs b/crates/llm_gateway/tests/integration.rs index 7ec92ccd..5a5ed4a8 100644 --- a/crates/llm_gateway/tests/integration.rs +++ b/crates/llm_gateway/tests/integration.rs @@ -149,14 +149,14 @@ ratelimits: key: selector-key value: selector-value limit: - tokens: 50 + tokens: 100 unit: minute "# } #[test] #[serial] -fn successful_request_to_open_ai_chat_completions() { +fn llm_gateway_successful_request_to_open_ai_chat_completions() { let args = tester::MockSettings { wasm_path: wasm_module(), quiet: false, @@ -207,7 +207,7 @@ fn successful_request_to_open_ai_chat_completions() { ) .expect_get_buffer_bytes(Some(BufferType::HttpRequestBody)) .returning(Some(chat_completions_request_body)) - .expect_log(Some(LogLevel::Debug), None) + .expect_log(Some(LogLevel::Trace), None) .expect_log(Some(LogLevel::Debug), None) .expect_log(Some(LogLevel::Debug), None) .expect_set_buffer_bytes(Some(BufferType::HttpRequestBody), None) @@ -217,7 +217,7 @@ fn successful_request_to_open_ai_chat_completions() { #[test] #[serial] -fn bad_request_to_open_ai_chat_completions() { +fn llm_gateway_bad_request_to_open_ai_chat_completions() { let args = tester::MockSettings { wasm_path: wasm_module(), quiet: false, @@ -279,7 +279,7 @@ fn bad_request_to_open_ai_chat_completions() { #[test] #[serial] -fn request_ratelimited() { +fn llm_gateway_request_ratelimited() { let args = tester::MockSettings { wasm_path: wasm_module(), quiet: false, @@ -306,11 +306,11 @@ fn request_ratelimited() { \"messages\": [\ {\ \"role\": \"system\",\ - \"content\": \"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"\ + \"content\": \"You are a helpful poetic assistant!, skilled in explaining complex programming concepts with creative flair. Be sure to be concise and to the point.\"\ },\ {\ \"role\": \"user\",\ - \"content\": \"Compose a poem that explains the concept of recursion in programming. Compose a poem that explains the concept of recursion in programming. Compose a poem that explains the concept of recursion in programming. \"\ + \"content\": \"Compose a poem that explains the concept of recursion in programming. Compose a poem that explains the concept of recursion in programming. Compose a poem that explains the concept of recursion in programming. And also summarize it how a 4th graded would understand it.\"\ }\ ],\ \"model\": \"gpt-4\"\ @@ -325,6 +325,7 @@ fn request_ratelimited() { .expect_get_buffer_bytes(Some(BufferType::HttpRequestBody)) .returning(Some(chat_completions_request_body)) // The actual call is not important in this test, we just need to grab the token_id + .expect_log(Some(LogLevel::Trace), None) .expect_log(Some(LogLevel::Debug), None) .expect_log(Some(LogLevel::Debug), None) .expect_log(Some(LogLevel::Debug), None) @@ -342,7 +343,7 @@ fn request_ratelimited() { #[test] #[serial] -fn request_not_ratelimited() { +fn llm_gateway_request_not_ratelimited() { let args = tester::MockSettings { wasm_path: wasm_module(), quiet: false, @@ -388,17 +389,10 @@ fn request_not_ratelimited() { .expect_get_buffer_bytes(Some(BufferType::HttpRequestBody)) .returning(Some(chat_completions_request_body)) // The actual call is not important in this test, we just need to grab the token_id + .expect_log(Some(LogLevel::Trace), None) .expect_log(Some(LogLevel::Debug), None) .expect_log(Some(LogLevel::Debug), None) - .expect_log(Some(LogLevel::Debug), None) - // .expect_metric_increment("active_http_calls", 1) - .expect_send_local_response( - Some(StatusCode::TOO_MANY_REQUESTS.as_u16().into()), - None, - None, - None, - ) - .expect_metric_increment("ratelimited_rq", 1) + .expect_set_buffer_bytes(Some(BufferType::HttpRequestBody), None) .execute_and_expect(ReturnType::Action(Action::Continue)) .unwrap(); } diff --git a/crates/prompt_gateway/src/filter_context.rs b/crates/prompt_gateway/src/filter_context.rs index 3f1d3f0d..de120369 100644 --- a/crates/prompt_gateway/src/filter_context.rs +++ b/crates/prompt_gateway/src/filter_context.rs @@ -243,7 +243,7 @@ impl RootContext for FilterContext { self.overrides = Rc::new(config.overrides); let mut prompt_targets = HashMap::new(); - for pt in config.prompt_targets { + for pt in config.prompt_targets.unwrap_or_default() { prompt_targets.insert(pt.name.clone(), pt.clone()); } self.system_prompt = Rc::new(config.system_prompt); diff --git a/crates/prompt_gateway/src/hallucination.rs b/crates/prompt_gateway/src/hallucination.rs index c4425957..130f8723 100644 --- a/crates/prompt_gateway/src/hallucination.rs +++ b/crates/prompt_gateway/src/hallucination.rs @@ -1,9 +1,9 @@ use common::{ common_types::open_ai::Message, - consts::{ARCH_MODEL_PREFIX, USER_ROLE, HALLUCINATION_TEMPLATE}, + consts::{ARCH_MODEL_PREFIX, HALLUCINATION_TEMPLATE, USER_ROLE}, }; -pub fn extract_messages_for_hallucination(messages: &Vec) -> Vec { +pub fn extract_messages_for_hallucination(messages: &[Message]) -> Vec { let mut arch_assistant = false; let mut user_messages = Vec::new(); if messages.len() >= 2 { @@ -18,11 +18,11 @@ pub fn extract_messages_for_hallucination(messages: &Vec) -> Vec) -> Vec archgw: {}", String::from_utf8_lossy(&body_bytes)); + debug!( + "developer => archgw: {}", + String::from_utf8_lossy(&body_bytes) + ); // Deserialize body into spec. // Currently OpenAI API. - let mut deserialized_body: ChatCompletionsRequest = - match serde_json::from_slice(&body_bytes) { - Ok(deserialized) => deserialized, - Err(e) => { - self.send_server_error( - ServerError::Deserialization(e), - Some(StatusCode::BAD_REQUEST), - ); - return Action::Pause; - } - }; + let deserialized_body: ChatCompletionsRequest = match serde_json::from_slice(&body_bytes) { + Ok(deserialized) => deserialized, + Err(e) => { + self.send_server_error( + ServerError::Deserialization(e), + Some(StatusCode::BAD_REQUEST), + ); + return Action::Pause; + } + }; self.arch_state = match deserialized_body.metadata { Some(ref metadata) => { @@ -110,11 +121,6 @@ impl HttpContext for StreamContext { }; self.streaming_response = deserialized_body.stream; - if deserialized_body.stream && deserialized_body.stream_options.is_none() { - deserialized_body.stream_options = Some(StreamOptions { - include_usage: true, - }); - } let last_user_prompt = match deserialized_body .messages @@ -235,105 +241,111 @@ impl HttpContext for StreamContext { ); if !self.is_chat_completions_request { - if let Some(body_str) = self - .get_http_response_body(0, body_size) - .and_then(|bytes| String::from_utf8(bytes).ok()) - { - debug!("recv [S={}] body_str={}", self.context_id, body_str); - } + debug!("non-streaming request"); return Action::Continue; } - if !end_of_stream { - return Action::Pause; - } + let body = if self.streaming_response { + let streaming_chunk = match self.get_http_response_body(0, body_size) { + Some(chunk) => chunk, + None => { + warn!( + "response body empty, chunk_start: {}, chunk_size: {}", + 0, body_size + ); + return Action::Continue; + } + }; - let body = self - .get_http_response_body(0, body_size) - .expect("cant get response body"); + if streaming_chunk.len() != body_size { + warn!( + "chunk size mismatch: read: {} != requested: {}", + streaming_chunk.len(), + body_size + ); + } + + streaming_chunk + } else { + debug!("non streaming response bytes read: 0:{}", body_size); + match self.get_http_response_body(0, body_size) { + Some(body) => body, + None => { + warn!("non streaming response body empty"); + return Action::Continue; + } + } + }; + + let body_utf8 = match String::from_utf8(body) { + Ok(body_utf8) => body_utf8, + Err(e) => { + debug!("could not convert to utf8: {}", e); + return Action::Continue; + } + }; if self.streaming_response { trace!("streaming response"); - } else { - trace!("non streaming response"); - let chat_completions_response: ChatCompletionsResponse = - match serde_json::from_slice(&body) { - Ok(de) => de, - Err(e) => { - trace!( - "invalid response: {}, {}", - String::from_utf8_lossy(&body), - e - ); - return Action::Continue; - } - }; - if chat_completions_response.usage.is_some() { - self.response_tokens += chat_completions_response - .usage - .as_ref() - .unwrap() - .completion_tokens; + if self.tool_calls.is_some() && !self.tool_calls.as_ref().unwrap().is_empty() { + let chunks = vec![ + ChatCompletionStreamResponse::new( + None, + Some(ASSISTANT_ROLE.to_string()), + Some(ARCH_FC_MODEL_NAME.to_string()), + self.tool_calls.to_owned(), + ), + ChatCompletionStreamResponse::new( + self.tool_call_response.clone(), + Some(TOOL_ROLE.to_string()), + Some(ARCH_FC_MODEL_NAME.to_string()), + None, + ), + ]; + + let mut response_str = to_server_events(chunks); + // append the original response from the model to the stream + response_str.push_str(&body_utf8); + self.set_http_response_body(0, body_size, response_str.as_bytes()); + self.tool_calls = None; } + } else if let Some(tool_calls) = self.tool_calls.as_ref() { + if !tool_calls.is_empty() { + if self.arch_state.is_none() { + self.arch_state = Some(Vec::new()); + } - if let Some(tool_calls) = self.tool_calls.as_ref() { - if !tool_calls.is_empty() { - if self.arch_state.is_none() { - self.arch_state = Some(Vec::new()); + let mut data = serde_json::from_str(&body_utf8).unwrap(); + // use serde::Value to manipulate the json object and ensure that we don't lose any data + if let Value::Object(ref mut map) = data { + // serialize arch state and add to metadata + let metadata = map + .entry("metadata") + .or_insert(Value::Object(serde_json::Map::new())); + if metadata == &Value::Null { + *metadata = Value::Object(serde_json::Map::new()); } - let mut data = serde_json::from_slice(&body).unwrap(); - // use serde::Value to manipulate the json object and ensure that we don't lose any data - if let Value::Object(ref mut map) = data { - // serialize arch state and add to metadata - let metadata = map - .entry("metadata") - .or_insert(Value::Object(serde_json::Map::new())); - if metadata == &Value::Null { - *metadata = Value::Object(serde_json::Map::new()); - } - - // since arch gateway generates tool calls (using arch-fc) and calls upstream api to - // get response, we will send these back to developer so they can see the api response - // and tool call arch-fc generated - let fc_messages = vec![ - Message { - role: ASSISTANT_ROLE.to_string(), - content: None, - model: Some(ARCH_FC_MODEL_NAME.to_string()), - tool_calls: self.tool_calls.clone(), - tool_call_id: None, - }, - Message { - role: TOOL_ROLE.to_string(), - content: self.tool_call_response.clone(), - model: None, - tool_calls: None, - tool_call_id: Some(self.tool_calls.as_ref().unwrap()[0].id.clone()), - }, - ]; - let fc_messages_str = serde_json::to_string(&fc_messages).unwrap(); - let arch_state = HashMap::from([("messages".to_string(), fc_messages_str)]); - let arch_state_str = serde_json::to_string(&arch_state).unwrap(); - metadata.as_object_mut().unwrap().insert( - ARCH_STATE_HEADER.to_string(), - serde_json::Value::String(arch_state_str), - ); - let data_serialized = serde_json::to_string(&data).unwrap(); - debug!("archgw <= developer: {}", data_serialized); - self.set_http_response_body(0, body_size, data_serialized.as_bytes()); - }; - } + let fc_messages = vec![ + self.generate_toll_call_message(), + self.generate_api_response_message(), + ]; + let fc_messages_str = serde_json::to_string(&fc_messages).unwrap(); + let arch_state = HashMap::from([("messages".to_string(), fc_messages_str)]); + let arch_state_str = serde_json::to_string(&arch_state).unwrap(); + metadata.as_object_mut().unwrap().insert( + ARCH_STATE_HEADER.to_string(), + serde_json::Value::String(arch_state_str), + ); + let data_serialized = serde_json::to_string(&data).unwrap(); + debug!("archgw <= developer: {}", data_serialized); + self.set_http_response_body(0, body_size, data_serialized.as_bytes()); + }; } } - trace!( - "recv [S={}] total_tokens={} end_stream={}", - self.context_id, - self.response_tokens, - end_of_stream - ); + trace!("recv [S={}] end_stream={}", self.context_id, end_of_stream); Action::Continue } diff --git a/crates/prompt_gateway/src/stream_context.rs b/crates/prompt_gateway/src/stream_context.rs index 6f4a36ea..5d79d181 100644 --- a/crates/prompt_gateway/src/stream_context.rs +++ b/crates/prompt_gateway/src/stream_context.rs @@ -2,9 +2,9 @@ use crate::filter_context::{EmbeddingsStore, WasmMetrics}; use crate::hallucination::extract_messages_for_hallucination; use acap::cos; use common::common_types::open_ai::{ - ArchState, ChatCompletionTool, ChatCompletionsRequest, ChatCompletionsResponse, Choice, - FunctionDefinition, FunctionParameter, FunctionParameters, Message, ParameterType, ToolCall, - ToolType, + to_server_events, ArchState, ChatCompletionStreamResponse, ChatCompletionTool, + ChatCompletionsRequest, ChatCompletionsResponse, FunctionDefinition, FunctionParameter, + FunctionParameters, Message, ParameterType, ToolCall, ToolType, }; use common::common_types::{ EmbeddingType, HallucinationClassificationRequest, HallucinationClassificationResponse, @@ -12,7 +12,12 @@ use common::common_types::{ }; use common::configuration::{Overrides, PromptGuards, PromptTarget}; use common::consts::{ - ARCH_FC_INTERNAL_HOST, ARCH_FC_MODEL_NAME, ARCH_FC_REQUEST_TIMEOUT_MS, ARCH_INTERNAL_CLUSTER_NAME, MESSAGES_KEY, ARCH_MODEL_PREFIX, ARCH_STATE_HEADER, ARCH_UPSTREAM_HOST_HEADER, ASSISTANT_ROLE, DEFAULT_EMBEDDING_MODEL, HALLUCINATION_TEMPLATE, DEFAULT_HALLUCINATED_THRESHOLD, DEFAULT_INTENT_MODEL, DEFAULT_PROMPT_TARGET_THRESHOLD, EMBEDDINGS_INTERNAL_HOST, HALLUCINATION_INTERNAL_HOST, REQUEST_ID_HEADER, SYSTEM_ROLE, TOOL_ROLE, USER_ROLE, ZEROSHOT_INTERNAL_HOST + ARCH_FC_INTERNAL_HOST, ARCH_FC_MODEL_NAME, ARCH_FC_REQUEST_TIMEOUT_MS, + ARCH_INTERNAL_CLUSTER_NAME, ARCH_MODEL_PREFIX, ARCH_STATE_HEADER, ARCH_UPSTREAM_HOST_HEADER, + ASSISTANT_ROLE, DEFAULT_EMBEDDING_MODEL, DEFAULT_HALLUCINATED_THRESHOLD, DEFAULT_INTENT_MODEL, + DEFAULT_PROMPT_TARGET_THRESHOLD, EMBEDDINGS_INTERNAL_HOST, HALLUCINATION_INTERNAL_HOST, + HALLUCINATION_TEMPLATE, MESSAGES_KEY, REQUEST_ID_HEADER, SYSTEM_ROLE, TOOL_ROLE, USER_ROLE, + ZEROSHOT_INTERNAL_HOST, }; use common::embeddings::{ CreateEmbeddingRequest, CreateEmbeddingRequestInput, CreateEmbeddingResponse, @@ -57,7 +62,7 @@ pub struct StreamCallContext { pub struct StreamContext { system_prompt: Rc>, prompt_targets: Rc>, - embeddings_store: Option>, + pub embeddings_store: Option>, overrides: Rc>, pub metrics: Rc, pub callouts: RefCell>, @@ -66,9 +71,8 @@ pub struct StreamContext { pub tool_call_response: Option, pub arch_state: Option>, pub request_body_size: usize, - pub streaming_response: bool, pub user_prompt: Option, - pub response_tokens: usize, + pub streaming_response: bool, pub is_chat_completions_request: bool, pub chat_completions_request: Option, pub prompt_guards: Rc, @@ -99,7 +103,6 @@ impl StreamContext { request_body_size: 0, streaming_response: false, user_prompt: None, - response_tokens: 0, is_chat_completions_request: false, prompt_guards, overrides, @@ -300,13 +303,17 @@ impl StreamContext { body: Vec, callout_context: StreamCallContext, ) { - let boyd_str = String::from_utf8(body).expect("could not convert body to string"); - debug!("archgw <= hallucination response: {}", boyd_str); + let body_str = String::from_utf8(body).expect("could not convert body to string"); + debug!("archgw <= hallucination response: {}", body_str); let hallucination_response: HallucinationClassificationResponse = - match serde_json::from_str(boyd_str.as_str()) { + match serde_json::from_str(body_str.as_str()) { Ok(hallucination_response) => hallucination_response, Err(e) => { - warn!("error deserializing hallucination response: {}", e); + warn!( + "error deserializing hallucination response: {}, body: {}", + e, + body_str.as_str() + ); return self.send_server_error(ServerError::Deserialization(e), None); } }; @@ -323,37 +330,36 @@ impl StreamContext { if !keys_with_low_score.is_empty() { let response = - HALLUCINATION_TEMPLATE.to_string() - + &keys_with_low_score.join(", ") - + " ?"; - let message = Message { - role: ASSISTANT_ROLE.to_string(), - content: Some(response), - model: Some(ARCH_FC_MODEL_NAME.to_string()), - tool_calls: None, - tool_call_id: None, - }; + HALLUCINATION_TEMPLATE.to_string() + &keys_with_low_score.join(", ") + " ?"; - let chat_completion_response = ChatCompletionsResponse { - choices: vec![Choice { - message, - index: 0, - finish_reason: "done".to_string(), - }], - usage: None, - model: ARCH_FC_MODEL_NAME.to_string(), - metadata: None, - }; + let response_str = if self.streaming_response { + let chunks = vec![ + ChatCompletionStreamResponse::new( + None, + Some(ASSISTANT_ROLE.to_string()), + Some(ARCH_FC_MODEL_NAME.to_owned()), + None, + ), + ChatCompletionStreamResponse::new( + Some(response), + None, + Some(ARCH_FC_MODEL_NAME.to_owned()), + None, + ), + ]; - trace!("hallucination response: {:?}", chat_completion_response); + to_server_events(chunks) + } else { + let chat_completion_response = ChatCompletionsResponse::new(response); + serde_json::to_string(&chat_completion_response).unwrap() + }; + debug!("hallucination response: {:?}", response_str); + // make sure on_http_response_body does not attach tool calls and tool response to the response + self.tool_calls = None; self.send_http_response( StatusCode::OK.as_u16().into(), vec![("Powered-By", "Katanemo")], - Some( - serde_json::to_string(&chat_completion_response) - .unwrap() - .as_bytes(), - ), + Some(response_str.as_bytes()), ); } else { // not a hallucination, resume the flow @@ -629,6 +635,7 @@ impl StreamContext { .message .tool_calls .clone_into(&mut self.tool_calls); + if self.tool_calls.as_ref().unwrap().len() > 1 { warn!( "multiple tool calls not supported yet, tool_calls count found: {}", @@ -643,10 +650,39 @@ impl StreamContext { //TODO: add resolver name to the response so the client can send the response back to the correct resolver + let direct_response_str = if self.streaming_response { + let chunks = vec![ + ChatCompletionStreamResponse::new( + None, + Some(ASSISTANT_ROLE.to_string()), + Some(ARCH_FC_MODEL_NAME.to_owned()), + None, + ), + ChatCompletionStreamResponse::new( + Some( + arch_fc_response.choices[0] + .message + .content + .as_ref() + .unwrap() + .clone(), + ), + None, + Some(ARCH_FC_MODEL_NAME.to_owned()), + None, + ), + ]; + + to_server_events(chunks) + } else { + body_str + }; + + self.tool_calls = None; return self.send_http_response( StatusCode::OK.as_u16().into(), vec![("Powered-By", "Katanemo")], - Some(body_str.as_bytes()), + Some(direct_response_str.as_bytes()), ); } @@ -943,7 +979,7 @@ impl StreamContext { self.get_embeddings(callout_context); } - pub fn default_target_handler(&self, body: Vec, callout_context: StreamCallContext) { + pub fn default_target_handler(&self, body: Vec, mut callout_context: StreamCallContext) { let prompt_target = self .prompt_targets .get(callout_context.prompt_target_name.as_ref().unwrap()) @@ -951,8 +987,34 @@ impl StreamContext { .clone(); // check if the default target should be dispatched to the LLM provider - if !prompt_target.auto_llm_dispatch_on_response.unwrap_or(false) { - let default_target_response_str = String::from_utf8(body).unwrap(); + if !prompt_target + .auto_llm_dispatch_on_response + .unwrap_or_default() + { + let default_target_response_str = if self.streaming_response { + let chat_completion_response = + serde_json::from_slice::(&body).unwrap(); + + let chunks = vec![ + ChatCompletionStreamResponse::new( + None, + Some(ASSISTANT_ROLE.to_string()), + Some(chat_completion_response.model.clone()), + None, + ), + ChatCompletionStreamResponse::new( + chat_completion_response.choices[0].message.content.clone(), + None, + Some(chat_completion_response.model.clone()), + None, + ), + ]; + + to_server_events(chunks) + } else { + String::from_utf8(body).unwrap() + }; + self.send_http_response( StatusCode::OK.as_u16().into(), vec![("Powered-By", "Katanemo")], @@ -960,20 +1022,20 @@ impl StreamContext { ); return; } + let chat_completions_resp: ChatCompletionsResponse = match serde_json::from_slice(&body) { Ok(chat_completions_resp) => chat_completions_resp, Err(e) => { - warn!("error deserializing default target response: {}", e); + warn!( + "error deserializing default target response: {}, body str: {}", + e, + String::from_utf8(body).unwrap() + ); return self.send_server_error(ServerError::Deserialization(e), None); } }; - let api_resp = chat_completions_resp.choices[0] - .message - .content - .as_ref() - .unwrap(); - let mut messages = callout_context.request_body.messages; + let mut messages = Vec::new(); // add system prompt match prompt_target.system_prompt.as_ref() { None => {} @@ -989,13 +1051,24 @@ impl StreamContext { } } + messages.append(&mut callout_context.request_body.messages); + + let api_resp = chat_completions_resp.choices[0] + .message + .content + .as_ref() + .unwrap(); + + let user_message = messages.pop().unwrap(); + let message = format!("{}\ncontext: {}", user_message.content.unwrap(), api_resp); messages.push(Message { role: USER_ROLE.to_string(), - content: Some(api_resp.clone()), + content: Some(message), model: None, tool_calls: None, tool_call_id: None, }); + let chat_completion_request = ChatCompletionsRequest { model: self .chat_completions_request @@ -1009,11 +1082,32 @@ impl StreamContext { stream_options: callout_context.request_body.stream_options, metadata: None, }; + let json_resp = serde_json::to_string(&chat_completion_request).unwrap(); debug!("archgw => (default target) llm request: {}", json_resp); self.set_http_request_body(0, self.request_body_size, json_resp.as_bytes()); self.resume_http_request(); } + + pub fn generate_toll_call_message(&mut self) -> Message { + Message { + role: ASSISTANT_ROLE.to_string(), + content: None, + model: Some(ARCH_FC_MODEL_NAME.to_string()), + tool_calls: self.tool_calls.clone(), + tool_call_id: None, + } + } + + pub fn generate_api_response_message(&mut self) -> Message { + Message { + role: TOOL_ROLE.to_string(), + content: self.tool_call_response.clone(), + model: None, + tool_calls: None, + tool_call_id: Some(self.tool_calls.as_ref().unwrap()[0].id.clone()), + } + } } impl Client for StreamContext { diff --git a/crates/prompt_gateway/tests/integration.rs b/crates/prompt_gateway/tests/integration.rs index 27eac427..1bf581c5 100644 --- a/crates/prompt_gateway/tests/integration.rs +++ b/crates/prompt_gateway/tests/integration.rs @@ -375,7 +375,7 @@ ratelimits: #[test] #[serial] -fn successful_request_to_open_ai_chat_completions() { +fn prompt_gateway_successful_request_to_open_ai_chat_completions() { let args = tester::MockSettings { wasm_path: wasm_module(), quiet: false, @@ -438,7 +438,7 @@ fn successful_request_to_open_ai_chat_completions() { #[test] #[serial] -fn bad_request_to_open_ai_chat_completions() { +fn prompt_gateway_bad_request_to_open_ai_chat_completions() { let args = tester::MockSettings { wasm_path: wasm_module(), quiet: false, @@ -501,7 +501,7 @@ fn bad_request_to_open_ai_chat_completions() { #[test] #[serial] -fn request_to_llm_gateway() { +fn prompt_gateway_request_to_llm_gateway() { let args = tester::MockSettings { wasm_path: wasm_module(), quiet: false, @@ -669,8 +669,8 @@ fn request_to_llm_gateway() { .expect_get_buffer_bytes(Some(BufferType::HttpResponseBody)) .returning(Some(chat_completion_response_str.as_str())) .expect_log(Some(LogLevel::Trace), None) + .expect_log(Some(LogLevel::Debug), None) .expect_set_buffer_bytes(Some(BufferType::HttpResponseBody), None) - .expect_log(Some(LogLevel::Trace), None) .expect_log(Some(LogLevel::Debug), None) .expect_log(Some(LogLevel::Trace), None) .execute_and_expect(ReturnType::Action(Action::Continue)) diff --git a/demos/function_calling/api_server/app/main.py b/demos/function_calling/api_server/app/main.py index 041a921d..e87a3a21 100644 --- a/demos/function_calling/api_server/app/main.py +++ b/demos/function_calling/api_server/app/main.py @@ -66,18 +66,18 @@ async def insurance_claim_details(req: InsuranceClaimDetailsRequest, res: Respon class DefaultTargetRequest(BaseModel): - arch_messages: list + messages: list @app.post("/default_target") async def default_target(req: DefaultTargetRequest, res: Response): - logger.info(f"Received arch_messages: {req.arch_messages}") + logger.info(f"Received arch_messages: {req.messages}") resp = { "choices": [ { "message": { "role": "assistant", - "content": "hello world from api server", + "content": "I can help you with weather forecast or insurance claim details", }, "finish_reason": "completed", "index": 0, diff --git a/demos/function_calling/arch_config.yaml b/demos/function_calling/arch_config.yaml index a2c92883..e7448c7e 100644 --- a/demos/function_calling/arch_config.yaml +++ b/demos/function_calling/arch_config.yaml @@ -16,12 +16,27 @@ overrides: prompt_target_intent_matching_threshold: 0.6 llm_providers: - - name: gpt - access_key: OPENAI_API_KEY + - name: gpt-4o-mini + access_key: $OPENAI_API_KEY provider: openai - model: gpt-3.5-turbo + model: gpt-4o-mini default: true + - name: gpt-3.5-turbo-0125 + access_key: $OPENAI_API_KEY + provider: openai + model: gpt-3.5-turbo-0125 + + - name: gpt-4o + access_key: $OPENAI_API_KEY + provider: openai + model: gpt-4o + + - name: ministral-3b + access_key: $MISTRAL_API_KEY + provider: mistral + model: ministral-3b-latest + system_prompt: | You are a helpful assistant. @@ -67,10 +82,10 @@ prompt_targets: name: api_server path: /default_target system_prompt: | - You are a helpful assistant. Use the information that is provided to you. + You are a helpful assistant! Summarize the user's request and provide a helpful response. # if it is set to false arch will send response that it received from this prompt target to the user # if true arch will forward the response to the default LLM - auto_llm_dispatch_on_response: true + auto_llm_dispatch_on_response: false tracing: random_sampling: 100 diff --git a/demos/function_calling/docker-compose.yaml b/demos/function_calling/docker-compose.yaml index 379b2cf7..40a01743 100644 --- a/demos/function_calling/docker-compose.yaml +++ b/demos/function_calling/docker-compose.yaml @@ -13,11 +13,11 @@ services: chatbot_ui: build: context: ../../chatbot_ui - dockerfile: Dockerfile ports: - "18080:8080" environment: - - CHAT_COMPLETION_ENDPOINT=http://host.docker.internal:10000/v1 #this is only because we are running the sample app in the same docker container environemtn as archgw + # this is only because we are running the sample app in the same docker container environemtn as archgw + - CHAT_COMPLETION_ENDPOINT=http://host.docker.internal:10000/v1 extra_hosts: - "host.docker.internal:host-gateway" volumes: @@ -38,6 +38,8 @@ services: - "${PORT_UI:-55679}:55679" - "${PORT_GRPC:-4317}:4317" - "${PORT_HTTP:-4318}:4318" + profiles: + - monitoring prometheus: image: prom/prometheus diff --git a/demos/llm_routing/arch_config.yaml b/demos/llm_routing/arch_config.yaml new file mode 100644 index 00000000..620a1d10 --- /dev/null +++ b/demos/llm_routing/arch_config.yaml @@ -0,0 +1,32 @@ +version: "0.1-beta" + +listener: + address: 0.0.0.0 + port: 10000 + message_format: huggingface + connect_timeout: 0.005s + +llm_providers: + - name: gpt-4o-mini + access_key: $OPENAI_API_KEY + provider: openai + model: gpt-4o-mini + default: true + + - name: gpt-3.5-turbo-0125 + access_key: $OPENAI_API_KEY + provider: openai + model: gpt-3.5-turbo-0125 + + - name: gpt-4o + access_key: $OPENAI_API_KEY + provider: openai + model: gpt-4o + + - name: ministral-3b + access_key: $MISTRAL_API_KEY + provider: mistral + model: ministral-3b-latest + +tracing: + random_sampling: 100 diff --git a/demos/llm_routing/docker-compose.yaml b/demos/llm_routing/docker-compose.yaml new file mode 100644 index 00000000..1ce6963b --- /dev/null +++ b/demos/llm_routing/docker-compose.yaml @@ -0,0 +1,14 @@ +services: + + chatbot_ui: + build: + context: ../../chatbot_ui + dockerfile: Dockerfile + ports: + - "18080:8080" + environment: + - CHAT_COMPLETION_ENDPOINT=http://host.docker.internal:12000/v1 + extra_hosts: + - "host.docker.internal:host-gateway" + volumes: + - ./arch_config.yaml:/app/arch_config.yaml diff --git a/e2e_tests/.vscode/settings.json b/e2e_tests/.vscode/settings.json new file mode 100644 index 00000000..98ba633e --- /dev/null +++ b/e2e_tests/.vscode/settings.json @@ -0,0 +1,7 @@ +{ + "python.testing.pytestArgs": [ + "." + ], + "python.testing.unittestEnabled": false, + "python.testing.pytestEnabled": true +} diff --git a/e2e_tests/README.md b/e2e_tests/README.md new file mode 100644 index 00000000..368fdd13 --- /dev/null +++ b/e2e_tests/README.md @@ -0,0 +1,34 @@ +# e2e tests + +e2e tests for arch llm gateway and prompt gateway + +To be able to run e2e tests successfully run_e2e_script prepares environment in following way, + +1. build and start function_calling demo (using docker compose) +1. build, install and start model server async (using poetry) +1. build and start arch gateway (using docker compose) +1. wait for model server to be ready +1. wait for arch gateway to be ready +1. start e2e tests (using poetry) + 1. runs llm gateway tests for llm routing + 2. runs prompt gateway tests to test function calling, parameter gathering and summarization +2. cleanup + 1. stops arch gateway + 2. stops model server + 3. stops function_calling demo + +## How to run + +To run locally make sure that following requirements are met. + +### Requirements + +- Python 3.10 +- Poetry +- Docker + +### Running tests locally + +```sh +sh run_e2e_test.sh +``` diff --git a/e2e_tests/common.py b/e2e_tests/common.py new file mode 100644 index 00000000..7ccee7c4 --- /dev/null +++ b/e2e_tests/common.py @@ -0,0 +1,42 @@ +import json +import os + + +PROMPT_GATEWAY_ENDPOINT = os.getenv( + "PROMPT_GATEWAY_ENDPOINT", "http://localhost:10000/v1/chat/completions" +) +LLM_GATEWAY_ENDPOINT = os.getenv( + "LLM_GATEWAY_ENDPOINT", "http://localhost:12000/v1/chat/completions" +) +ARCH_STATE_HEADER = "x-arch-state" + + +def get_data_chunks(stream, n=1): + chunks = [] + for chunk in stream.iter_lines(): + if chunk: + chunk = chunk.decode("utf-8") + chunk_data_id = chunk[0:6] + assert chunk_data_id == "data: " + chunk_data = chunk[6:] + chunk_data = chunk_data.strip() + chunks.append(chunk_data) + if len(chunks) >= n: + break + return chunks + + +def get_arch_messages(response_json): + arch_messages = [] + if response_json and "metadata" in response_json: + # load arch_state from metadata + arch_state_str = response_json.get("metadata", {}).get(ARCH_STATE_HEADER, "{}") + # parse arch_state into json object + arch_state = json.loads(arch_state_str) + # load messages from arch_state + arch_messages_str = arch_state.get("messages", "[]") + # parse messages into json object + arch_messages = json.loads(arch_messages_str) + # append messages from arch gateway to history + return arch_messages + return [] diff --git a/e2e_tests/common_scripts.sh b/e2e_tests/common_scripts.sh new file mode 100644 index 00000000..90d31603 --- /dev/null +++ b/e2e_tests/common_scripts.sh @@ -0,0 +1,33 @@ +#!/bin/bash + +log() { + timestamp=$(date +"%Y-%m-%d %H:%M:%S") + message="$*" + echo "$timestamp: $message" +} + +wait_for_healthz() { + local healthz_url="$1" + local timeout_seconds="${2:-30}" # Default timeout of 30 seconds + local sleep_between="${3:-5}" # Default sleep of 5 seconds + + local start_time=$(date +%s) + + while true; do + local response_code=$(curl -s -o /dev/null -w "%{http_code}" "$healthz_url") + + log "Healthz endpoint $healthz_url response code: $response_code" + if [[ "$response_code" -eq 200 ]]; then + log "Healthz endpoint is healthy. Proceeding..." + return 0 + fi + + local elapsed_time=$(( $(date +%s) - $start_time )) + if [[ $elapsed_time -ge $timeout_seconds ]]; then + log "Timeout reached. Healthz endpoint is still unhealthy. Exiting..." + return 1 + fi + + sleep $sleep_between + done +} diff --git a/e2e_tests/poetry.lock b/e2e_tests/poetry.lock new file mode 100644 index 00000000..68ebfcf5 --- /dev/null +++ b/e2e_tests/poetry.lock @@ -0,0 +1,702 @@ +# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. + +[[package]] +name = "attrs" +version = "24.2.0" +description = "Classes Without Boilerplate" +optional = false +python-versions = ">=3.7" +files = [ + {file = "attrs-24.2.0-py3-none-any.whl", hash = "sha256:81921eb96de3191c8258c199618104dd27ac608d9366f5e35d011eae1867ede2"}, + {file = "attrs-24.2.0.tar.gz", hash = "sha256:5cfb1b9148b5b086569baec03f20d7b6bf3bcacc9a42bebf87ffaaca362f6346"}, +] + +[package.extras] +benchmark = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-codspeed", "pytest-mypy-plugins", "pytest-xdist[psutil]"] +cov = ["cloudpickle", "coverage[toml] (>=5.3)", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] +dev = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pre-commit", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] +docs = ["cogapp", "furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier (<24.7)"] +tests = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] +tests-mypy = ["mypy (>=1.11.1)", "pytest-mypy-plugins"] + +[[package]] +name = "certifi" +version = "2024.8.30" +description = "Python package for providing Mozilla's CA Bundle." +optional = false +python-versions = ">=3.6" +files = [ + {file = "certifi-2024.8.30-py3-none-any.whl", hash = "sha256:922820b53db7a7257ffbda3f597266d435245903d80737e34f8a45ff3e3230d8"}, + {file = "certifi-2024.8.30.tar.gz", hash = "sha256:bec941d2aa8195e248a60b31ff9f0558284cf01a52591ceda73ea9afffd69fd9"}, +] + +[[package]] +name = "cffi" +version = "1.17.1" +description = "Foreign Function Interface for Python calling C code." +optional = false +python-versions = ">=3.8" +files = [ + {file = "cffi-1.17.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:df8b1c11f177bc2313ec4b2d46baec87a5f3e71fc8b45dab2ee7cae86d9aba14"}, + {file = "cffi-1.17.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8f2cdc858323644ab277e9bb925ad72ae0e67f69e804f4898c070998d50b1a67"}, + {file = "cffi-1.17.1-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:edae79245293e15384b51f88b00613ba9f7198016a5948b5dddf4917d4d26382"}, + {file = "cffi-1.17.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:45398b671ac6d70e67da8e4224a065cec6a93541bb7aebe1b198a61b58c7b702"}, + {file = "cffi-1.17.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ad9413ccdeda48c5afdae7e4fa2192157e991ff761e7ab8fdd8926f40b160cc3"}, + {file = "cffi-1.17.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5da5719280082ac6bd9aa7becb3938dc9f9cbd57fac7d2871717b1feb0902ab6"}, + {file = "cffi-1.17.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2bb1a08b8008b281856e5971307cc386a8e9c5b625ac297e853d36da6efe9c17"}, + {file = "cffi-1.17.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:045d61c734659cc045141be4bae381a41d89b741f795af1dd018bfb532fd0df8"}, + {file = "cffi-1.17.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:6883e737d7d9e4899a8a695e00ec36bd4e5e4f18fabe0aca0efe0a4b44cdb13e"}, + {file = "cffi-1.17.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:6b8b4a92e1c65048ff98cfe1f735ef8f1ceb72e3d5f0c25fdb12087a23da22be"}, + {file = "cffi-1.17.1-cp310-cp310-win32.whl", hash = "sha256:c9c3d058ebabb74db66e431095118094d06abf53284d9c81f27300d0e0d8bc7c"}, + {file = "cffi-1.17.1-cp310-cp310-win_amd64.whl", hash = "sha256:0f048dcf80db46f0098ccac01132761580d28e28bc0f78ae0d58048063317e15"}, + {file = "cffi-1.17.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a45e3c6913c5b87b3ff120dcdc03f6131fa0065027d0ed7ee6190736a74cd401"}, + {file = "cffi-1.17.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:30c5e0cb5ae493c04c8b42916e52ca38079f1b235c2f8ae5f4527b963c401caf"}, + {file = "cffi-1.17.1-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f75c7ab1f9e4aca5414ed4d8e5c0e303a34f4421f8a0d47a4d019ceff0ab6af4"}, + {file = "cffi-1.17.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a1ed2dd2972641495a3ec98445e09766f077aee98a1c896dcb4ad0d303628e41"}, + {file = "cffi-1.17.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:46bf43160c1a35f7ec506d254e5c890f3c03648a4dbac12d624e4490a7046cd1"}, + {file = "cffi-1.17.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a24ed04c8ffd54b0729c07cee15a81d964e6fee0e3d4d342a27b020d22959dc6"}, + {file = "cffi-1.17.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:610faea79c43e44c71e1ec53a554553fa22321b65fae24889706c0a84d4ad86d"}, + {file = "cffi-1.17.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:a9b15d491f3ad5d692e11f6b71f7857e7835eb677955c00cc0aefcd0669adaf6"}, + {file = "cffi-1.17.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:de2ea4b5833625383e464549fec1bc395c1bdeeb5f25c4a3a82b5a8c756ec22f"}, + {file = "cffi-1.17.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:fc48c783f9c87e60831201f2cce7f3b2e4846bf4d8728eabe54d60700b318a0b"}, + {file = "cffi-1.17.1-cp311-cp311-win32.whl", hash = "sha256:85a950a4ac9c359340d5963966e3e0a94a676bd6245a4b55bc43949eee26a655"}, + {file = "cffi-1.17.1-cp311-cp311-win_amd64.whl", hash = "sha256:caaf0640ef5f5517f49bc275eca1406b0ffa6aa184892812030f04c2abf589a0"}, + {file = "cffi-1.17.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:805b4371bf7197c329fcb3ead37e710d1bca9da5d583f5073b799d5c5bd1eee4"}, + {file = "cffi-1.17.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:733e99bc2df47476e3848417c5a4540522f234dfd4ef3ab7fafdf555b082ec0c"}, + {file = "cffi-1.17.1-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1257bdabf294dceb59f5e70c64a3e2f462c30c7ad68092d01bbbfb1c16b1ba36"}, + {file = "cffi-1.17.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:da95af8214998d77a98cc14e3a3bd00aa191526343078b530ceb0bd710fb48a5"}, + {file = "cffi-1.17.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d63afe322132c194cf832bfec0dc69a99fb9bb6bbd550f161a49e9e855cc78ff"}, + {file = "cffi-1.17.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f79fc4fc25f1c8698ff97788206bb3c2598949bfe0fef03d299eb1b5356ada99"}, + {file = "cffi-1.17.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b62ce867176a75d03a665bad002af8e6d54644fad99a3c70905c543130e39d93"}, + {file = "cffi-1.17.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:386c8bf53c502fff58903061338ce4f4950cbdcb23e2902d86c0f722b786bbe3"}, + {file = "cffi-1.17.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4ceb10419a9adf4460ea14cfd6bc43d08701f0835e979bf821052f1805850fe8"}, + {file = "cffi-1.17.1-cp312-cp312-win32.whl", hash = "sha256:a08d7e755f8ed21095a310a693525137cfe756ce62d066e53f502a83dc550f65"}, + {file = "cffi-1.17.1-cp312-cp312-win_amd64.whl", hash = "sha256:51392eae71afec0d0c8fb1a53b204dbb3bcabcb3c9b807eedf3e1e6ccf2de903"}, + {file = "cffi-1.17.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f3a2b4222ce6b60e2e8b337bb9596923045681d71e5a082783484d845390938e"}, + {file = "cffi-1.17.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:0984a4925a435b1da406122d4d7968dd861c1385afe3b45ba82b750f229811e2"}, + {file = "cffi-1.17.1-cp313-cp313-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d01b12eeeb4427d3110de311e1774046ad344f5b1a7403101878976ecd7a10f3"}, + {file = "cffi-1.17.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:706510fe141c86a69c8ddc029c7910003a17353970cff3b904ff0686a5927683"}, + {file = "cffi-1.17.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:de55b766c7aa2e2a3092c51e0483d700341182f08e67c63630d5b6f200bb28e5"}, + {file = "cffi-1.17.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c59d6e989d07460165cc5ad3c61f9fd8f1b4796eacbd81cee78957842b834af4"}, + {file = "cffi-1.17.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd398dbc6773384a17fe0d3e7eeb8d1a21c2200473ee6806bb5e6a8e62bb73dd"}, + {file = "cffi-1.17.1-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:3edc8d958eb099c634dace3c7e16560ae474aa3803a5df240542b305d14e14ed"}, + {file = "cffi-1.17.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:72e72408cad3d5419375fc87d289076ee319835bdfa2caad331e377589aebba9"}, + {file = "cffi-1.17.1-cp313-cp313-win32.whl", hash = "sha256:e03eab0a8677fa80d646b5ddece1cbeaf556c313dcfac435ba11f107ba117b5d"}, + {file = "cffi-1.17.1-cp313-cp313-win_amd64.whl", hash = "sha256:f6a16c31041f09ead72d69f583767292f750d24913dadacf5756b966aacb3f1a"}, + {file = "cffi-1.17.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:636062ea65bd0195bc012fea9321aca499c0504409f413dc88af450b57ffd03b"}, + {file = "cffi-1.17.1-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c7eac2ef9b63c79431bc4b25f1cd649d7f061a28808cbc6c47b534bd789ef964"}, + {file = "cffi-1.17.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e221cf152cff04059d011ee126477f0d9588303eb57e88923578ace7baad17f9"}, + {file = "cffi-1.17.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:31000ec67d4221a71bd3f67df918b1f88f676f1c3b535a7eb473255fdc0b83fc"}, + {file = "cffi-1.17.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6f17be4345073b0a7b8ea599688f692ac3ef23ce28e5df79c04de519dbc4912c"}, + {file = "cffi-1.17.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0e2b1fac190ae3ebfe37b979cc1ce69c81f4e4fe5746bb401dca63a9062cdaf1"}, + {file = "cffi-1.17.1-cp38-cp38-win32.whl", hash = "sha256:7596d6620d3fa590f677e9ee430df2958d2d6d6de2feeae5b20e82c00b76fbf8"}, + {file = "cffi-1.17.1-cp38-cp38-win_amd64.whl", hash = "sha256:78122be759c3f8a014ce010908ae03364d00a1f81ab5c7f4a7a5120607ea56e1"}, + {file = "cffi-1.17.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b2ab587605f4ba0bf81dc0cb08a41bd1c0a5906bd59243d56bad7668a6fc6c16"}, + {file = "cffi-1.17.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:28b16024becceed8c6dfbc75629e27788d8a3f9030691a1dbf9821a128b22c36"}, + {file = "cffi-1.17.1-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1d599671f396c4723d016dbddb72fe8e0397082b0a77a4fab8028923bec050e8"}, + {file = "cffi-1.17.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ca74b8dbe6e8e8263c0ffd60277de77dcee6c837a3d0881d8c1ead7268c9e576"}, + {file = "cffi-1.17.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f7f5baafcc48261359e14bcd6d9bff6d4b28d9103847c9e136694cb0501aef87"}, + {file = "cffi-1.17.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:98e3969bcff97cae1b2def8ba499ea3d6f31ddfdb7635374834cf89a1a08ecf0"}, + {file = "cffi-1.17.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cdf5ce3acdfd1661132f2a9c19cac174758dc2352bfe37d98aa7512c6b7178b3"}, + {file = "cffi-1.17.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:9755e4345d1ec879e3849e62222a18c7174d65a6a92d5b346b1863912168b595"}, + {file = "cffi-1.17.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:f1e22e8c4419538cb197e4dd60acc919d7696e5ef98ee4da4e01d3f8cfa4cc5a"}, + {file = "cffi-1.17.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:c03e868a0b3bc35839ba98e74211ed2b05d2119be4e8a0f224fba9384f1fe02e"}, + {file = "cffi-1.17.1-cp39-cp39-win32.whl", hash = "sha256:e31ae45bc2e29f6b2abd0de1cc3b9d5205aa847cafaecb8af1476a609a2f6eb7"}, + {file = "cffi-1.17.1-cp39-cp39-win_amd64.whl", hash = "sha256:d016c76bdd850f3c626af19b0542c9677ba156e4ee4fccfdd7848803533ef662"}, + {file = "cffi-1.17.1.tar.gz", hash = "sha256:1c39c6016c32bc48dd54561950ebd6836e1670f2ae46128f67cf49e789c52824"}, +] + +[package.dependencies] +pycparser = "*" + +[[package]] +name = "charset-normalizer" +version = "3.4.0" +description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." +optional = false +python-versions = ">=3.7.0" +files = [ + {file = "charset_normalizer-3.4.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:4f9fc98dad6c2eaa32fc3af1417d95b5e3d08aff968df0cd320066def971f9a6"}, + {file = "charset_normalizer-3.4.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0de7b687289d3c1b3e8660d0741874abe7888100efe14bd0f9fd7141bcbda92b"}, + {file = "charset_normalizer-3.4.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:5ed2e36c3e9b4f21dd9422f6893dec0abf2cca553af509b10cd630f878d3eb99"}, + {file = "charset_normalizer-3.4.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:40d3ff7fc90b98c637bda91c89d51264a3dcf210cade3a2c6f838c7268d7a4ca"}, + {file = "charset_normalizer-3.4.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1110e22af8ca26b90bd6364fe4c763329b0ebf1ee213ba32b68c73de5752323d"}, + {file = "charset_normalizer-3.4.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:86f4e8cca779080f66ff4f191a685ced73d2f72d50216f7112185dc02b90b9b7"}, + {file = "charset_normalizer-3.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7f683ddc7eedd742e2889d2bfb96d69573fde1d92fcb811979cdb7165bb9c7d3"}, + {file = "charset_normalizer-3.4.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:27623ba66c183eca01bf9ff833875b459cad267aeeb044477fedac35e19ba907"}, + {file = "charset_normalizer-3.4.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:f606a1881d2663630ea5b8ce2efe2111740df4b687bd78b34a8131baa007f79b"}, + {file = "charset_normalizer-3.4.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:0b309d1747110feb25d7ed6b01afdec269c647d382c857ef4663bbe6ad95a912"}, + {file = "charset_normalizer-3.4.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:136815f06a3ae311fae551c3df1f998a1ebd01ddd424aa5603a4336997629e95"}, + {file = "charset_normalizer-3.4.0-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:14215b71a762336254351b00ec720a8e85cada43b987da5a042e4ce3e82bd68e"}, + {file = "charset_normalizer-3.4.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:79983512b108e4a164b9c8d34de3992f76d48cadc9554c9e60b43f308988aabe"}, + {file = "charset_normalizer-3.4.0-cp310-cp310-win32.whl", hash = "sha256:c94057af19bc953643a33581844649a7fdab902624d2eb739738a30e2b3e60fc"}, + {file = "charset_normalizer-3.4.0-cp310-cp310-win_amd64.whl", hash = "sha256:55f56e2ebd4e3bc50442fbc0888c9d8c94e4e06a933804e2af3e89e2f9c1c749"}, + {file = "charset_normalizer-3.4.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:0d99dd8ff461990f12d6e42c7347fd9ab2532fb70e9621ba520f9e8637161d7c"}, + {file = "charset_normalizer-3.4.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c57516e58fd17d03ebe67e181a4e4e2ccab1168f8c2976c6a334d4f819fe5944"}, + {file = "charset_normalizer-3.4.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:6dba5d19c4dfab08e58d5b36304b3f92f3bd5d42c1a3fa37b5ba5cdf6dfcbcee"}, + {file = "charset_normalizer-3.4.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bf4475b82be41b07cc5e5ff94810e6a01f276e37c2d55571e3fe175e467a1a1c"}, + {file = "charset_normalizer-3.4.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ce031db0408e487fd2775d745ce30a7cd2923667cf3b69d48d219f1d8f5ddeb6"}, + {file = "charset_normalizer-3.4.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8ff4e7cdfdb1ab5698e675ca622e72d58a6fa2a8aa58195de0c0061288e6e3ea"}, + {file = "charset_normalizer-3.4.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3710a9751938947e6327ea9f3ea6332a09bf0ba0c09cae9cb1f250bd1f1549bc"}, + {file = "charset_normalizer-3.4.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:82357d85de703176b5587dbe6ade8ff67f9f69a41c0733cf2425378b49954de5"}, + {file = "charset_normalizer-3.4.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:47334db71978b23ebcf3c0f9f5ee98b8d65992b65c9c4f2d34c2eaf5bcaf0594"}, + {file = "charset_normalizer-3.4.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:8ce7fd6767a1cc5a92a639b391891bf1c268b03ec7e021c7d6d902285259685c"}, + {file = "charset_normalizer-3.4.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:f1a2f519ae173b5b6a2c9d5fa3116ce16e48b3462c8b96dfdded11055e3d6365"}, + {file = "charset_normalizer-3.4.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:63bc5c4ae26e4bc6be6469943b8253c0fd4e4186c43ad46e713ea61a0ba49129"}, + {file = "charset_normalizer-3.4.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:bcb4f8ea87d03bc51ad04add8ceaf9b0f085ac045ab4d74e73bbc2dc033f0236"}, + {file = "charset_normalizer-3.4.0-cp311-cp311-win32.whl", hash = "sha256:9ae4ef0b3f6b41bad6366fb0ea4fc1d7ed051528e113a60fa2a65a9abb5b1d99"}, + {file = "charset_normalizer-3.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:cee4373f4d3ad28f1ab6290684d8e2ebdb9e7a1b74fdc39e4c211995f77bec27"}, + {file = "charset_normalizer-3.4.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:0713f3adb9d03d49d365b70b84775d0a0d18e4ab08d12bc46baa6132ba78aaf6"}, + {file = "charset_normalizer-3.4.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:de7376c29d95d6719048c194a9cf1a1b0393fbe8488a22008610b0361d834ecf"}, + {file = "charset_normalizer-3.4.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4a51b48f42d9358460b78725283f04bddaf44a9358197b889657deba38f329db"}, + {file = "charset_normalizer-3.4.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b295729485b06c1a0683af02a9e42d2caa9db04a373dc38a6a58cdd1e8abddf1"}, + {file = "charset_normalizer-3.4.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ee803480535c44e7f5ad00788526da7d85525cfefaf8acf8ab9a310000be4b03"}, + {file = "charset_normalizer-3.4.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3d59d125ffbd6d552765510e3f31ed75ebac2c7470c7274195b9161a32350284"}, + {file = "charset_normalizer-3.4.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8cda06946eac330cbe6598f77bb54e690b4ca93f593dee1568ad22b04f347c15"}, + {file = "charset_normalizer-3.4.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:07afec21bbbbf8a5cc3651aa96b980afe2526e7f048fdfb7f1014d84acc8b6d8"}, + {file = "charset_normalizer-3.4.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6b40e8d38afe634559e398cc32b1472f376a4099c75fe6299ae607e404c033b2"}, + {file = "charset_normalizer-3.4.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:b8dcd239c743aa2f9c22ce674a145e0a25cb1566c495928440a181ca1ccf6719"}, + {file = "charset_normalizer-3.4.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:84450ba661fb96e9fd67629b93d2941c871ca86fc38d835d19d4225ff946a631"}, + {file = "charset_normalizer-3.4.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:44aeb140295a2f0659e113b31cfe92c9061622cadbc9e2a2f7b8ef6b1e29ef4b"}, + {file = "charset_normalizer-3.4.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:1db4e7fefefd0f548d73e2e2e041f9df5c59e178b4c72fbac4cc6f535cfb1565"}, + {file = "charset_normalizer-3.4.0-cp312-cp312-win32.whl", hash = "sha256:5726cf76c982532c1863fb64d8c6dd0e4c90b6ece9feb06c9f202417a31f7dd7"}, + {file = "charset_normalizer-3.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:b197e7094f232959f8f20541ead1d9862ac5ebea1d58e9849c1bf979255dfac9"}, + {file = "charset_normalizer-3.4.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:dd4eda173a9fcccb5f2e2bd2a9f423d180194b1bf17cf59e3269899235b2a114"}, + {file = "charset_normalizer-3.4.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e9e3c4c9e1ed40ea53acf11e2a386383c3304212c965773704e4603d589343ed"}, + {file = "charset_normalizer-3.4.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:92a7e36b000bf022ef3dbb9c46bfe2d52c047d5e3f3343f43204263c5addc250"}, + {file = "charset_normalizer-3.4.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:54b6a92d009cbe2fb11054ba694bc9e284dad30a26757b1e372a1fdddaf21920"}, + {file = "charset_normalizer-3.4.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1ffd9493de4c922f2a38c2bf62b831dcec90ac673ed1ca182fe11b4d8e9f2a64"}, + {file = "charset_normalizer-3.4.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:35c404d74c2926d0287fbd63ed5d27eb911eb9e4a3bb2c6d294f3cfd4a9e0c23"}, + {file = "charset_normalizer-3.4.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4796efc4faf6b53a18e3d46343535caed491776a22af773f366534056c4e1fbc"}, + {file = "charset_normalizer-3.4.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e7fdd52961feb4c96507aa649550ec2a0d527c086d284749b2f582f2d40a2e0d"}, + {file = "charset_normalizer-3.4.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:92db3c28b5b2a273346bebb24857fda45601aef6ae1c011c0a997106581e8a88"}, + {file = "charset_normalizer-3.4.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:ab973df98fc99ab39080bfb0eb3a925181454d7c3ac8a1e695fddfae696d9e90"}, + {file = "charset_normalizer-3.4.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:4b67fdab07fdd3c10bb21edab3cbfe8cf5696f453afce75d815d9d7223fbe88b"}, + {file = "charset_normalizer-3.4.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:aa41e526a5d4a9dfcfbab0716c7e8a1b215abd3f3df5a45cf18a12721d31cb5d"}, + {file = "charset_normalizer-3.4.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ffc519621dce0c767e96b9c53f09c5d215578e10b02c285809f76509a3931482"}, + {file = "charset_normalizer-3.4.0-cp313-cp313-win32.whl", hash = "sha256:f19c1585933c82098c2a520f8ec1227f20e339e33aca8fa6f956f6691b784e67"}, + {file = "charset_normalizer-3.4.0-cp313-cp313-win_amd64.whl", hash = "sha256:707b82d19e65c9bd28b81dde95249b07bf9f5b90ebe1ef17d9b57473f8a64b7b"}, + {file = "charset_normalizer-3.4.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:dbe03226baf438ac4fda9e2d0715022fd579cb641c4cf639fa40d53b2fe6f3e2"}, + {file = "charset_normalizer-3.4.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dd9a8bd8900e65504a305bf8ae6fa9fbc66de94178c420791d0293702fce2df7"}, + {file = "charset_normalizer-3.4.0-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b8831399554b92b72af5932cdbbd4ddc55c55f631bb13ff8fe4e6536a06c5c51"}, + {file = "charset_normalizer-3.4.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a14969b8691f7998e74663b77b4c36c0337cb1df552da83d5c9004a93afdb574"}, + {file = "charset_normalizer-3.4.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dcaf7c1524c0542ee2fc82cc8ec337f7a9f7edee2532421ab200d2b920fc97cf"}, + {file = "charset_normalizer-3.4.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:425c5f215d0eecee9a56cdb703203dda90423247421bf0d67125add85d0c4455"}, + {file = "charset_normalizer-3.4.0-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:d5b054862739d276e09928de37c79ddeec42a6e1bfc55863be96a36ba22926f6"}, + {file = "charset_normalizer-3.4.0-cp37-cp37m-musllinux_1_2_i686.whl", hash = "sha256:f3e73a4255342d4eb26ef6df01e3962e73aa29baa3124a8e824c5d3364a65748"}, + {file = "charset_normalizer-3.4.0-cp37-cp37m-musllinux_1_2_ppc64le.whl", hash = "sha256:2f6c34da58ea9c1a9515621f4d9ac379871a8f21168ba1b5e09d74250de5ad62"}, + {file = "charset_normalizer-3.4.0-cp37-cp37m-musllinux_1_2_s390x.whl", hash = "sha256:f09cb5a7bbe1ecae6e87901a2eb23e0256bb524a79ccc53eb0b7629fbe7677c4"}, + {file = "charset_normalizer-3.4.0-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:0099d79bdfcf5c1f0c2c72f91516702ebf8b0b8ddd8905f97a8aecf49712c621"}, + {file = "charset_normalizer-3.4.0-cp37-cp37m-win32.whl", hash = "sha256:9c98230f5042f4945f957d006edccc2af1e03ed5e37ce7c373f00a5a4daa6149"}, + {file = "charset_normalizer-3.4.0-cp37-cp37m-win_amd64.whl", hash = "sha256:62f60aebecfc7f4b82e3f639a7d1433a20ec32824db2199a11ad4f5e146ef5ee"}, + {file = "charset_normalizer-3.4.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:af73657b7a68211996527dbfeffbb0864e043d270580c5aef06dc4b659a4b578"}, + {file = "charset_normalizer-3.4.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:cab5d0b79d987c67f3b9e9c53f54a61360422a5a0bc075f43cab5621d530c3b6"}, + {file = "charset_normalizer-3.4.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:9289fd5dddcf57bab41d044f1756550f9e7cf0c8e373b8cdf0ce8773dc4bd417"}, + {file = "charset_normalizer-3.4.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6b493a043635eb376e50eedf7818f2f322eabbaa974e948bd8bdd29eb7ef2a51"}, + {file = "charset_normalizer-3.4.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9fa2566ca27d67c86569e8c85297aaf413ffab85a8960500f12ea34ff98e4c41"}, + {file = "charset_normalizer-3.4.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a8e538f46104c815be19c975572d74afb53f29650ea2025bbfaef359d2de2f7f"}, + {file = "charset_normalizer-3.4.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6fd30dc99682dc2c603c2b315bded2799019cea829f8bf57dc6b61efde6611c8"}, + {file = "charset_normalizer-3.4.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2006769bd1640bdf4d5641c69a3d63b71b81445473cac5ded39740a226fa88ab"}, + {file = "charset_normalizer-3.4.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:dc15e99b2d8a656f8e666854404f1ba54765871104e50c8e9813af8a7db07f12"}, + {file = "charset_normalizer-3.4.0-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:ab2e5bef076f5a235c3774b4f4028a680432cded7cad37bba0fd90d64b187d19"}, + {file = "charset_normalizer-3.4.0-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:4ec9dd88a5b71abfc74e9df5ebe7921c35cbb3b641181a531ca65cdb5e8e4dea"}, + {file = "charset_normalizer-3.4.0-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:43193c5cda5d612f247172016c4bb71251c784d7a4d9314677186a838ad34858"}, + {file = "charset_normalizer-3.4.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:aa693779a8b50cd97570e5a0f343538a8dbd3e496fa5dcb87e29406ad0299654"}, + {file = "charset_normalizer-3.4.0-cp38-cp38-win32.whl", hash = "sha256:7706f5850360ac01d80c89bcef1640683cc12ed87f42579dab6c5d3ed6888613"}, + {file = "charset_normalizer-3.4.0-cp38-cp38-win_amd64.whl", hash = "sha256:c3e446d253bd88f6377260d07c895816ebf33ffffd56c1c792b13bff9c3e1ade"}, + {file = "charset_normalizer-3.4.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:980b4f289d1d90ca5efcf07958d3eb38ed9c0b7676bf2831a54d4f66f9c27dfa"}, + {file = "charset_normalizer-3.4.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:f28f891ccd15c514a0981f3b9db9aa23d62fe1a99997512b0491d2ed323d229a"}, + {file = "charset_normalizer-3.4.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a8aacce6e2e1edcb6ac625fb0f8c3a9570ccc7bfba1f63419b3769ccf6a00ed0"}, + {file = "charset_normalizer-3.4.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bd7af3717683bea4c87acd8c0d3d5b44d56120b26fd3f8a692bdd2d5260c620a"}, + {file = "charset_normalizer-3.4.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5ff2ed8194587faf56555927b3aa10e6fb69d931e33953943bc4f837dfee2242"}, + {file = "charset_normalizer-3.4.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e91f541a85298cf35433bf66f3fab2a4a2cff05c127eeca4af174f6d497f0d4b"}, + {file = "charset_normalizer-3.4.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:309a7de0a0ff3040acaebb35ec45d18db4b28232f21998851cfa709eeff49d62"}, + {file = "charset_normalizer-3.4.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:285e96d9d53422efc0d7a17c60e59f37fbf3dfa942073f666db4ac71e8d726d0"}, + {file = "charset_normalizer-3.4.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:5d447056e2ca60382d460a604b6302d8db69476fd2015c81e7c35417cfabe4cd"}, + {file = "charset_normalizer-3.4.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:20587d20f557fe189b7947d8e7ec5afa110ccf72a3128d61a2a387c3313f46be"}, + {file = "charset_normalizer-3.4.0-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:130272c698667a982a5d0e626851ceff662565379baf0ff2cc58067b81d4f11d"}, + {file = "charset_normalizer-3.4.0-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:ab22fbd9765e6954bc0bcff24c25ff71dcbfdb185fcdaca49e81bac68fe724d3"}, + {file = "charset_normalizer-3.4.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:7782afc9b6b42200f7362858f9e73b1f8316afb276d316336c0ec3bd73312742"}, + {file = "charset_normalizer-3.4.0-cp39-cp39-win32.whl", hash = "sha256:2de62e8801ddfff069cd5c504ce3bc9672b23266597d4e4f50eda28846c322f2"}, + {file = "charset_normalizer-3.4.0-cp39-cp39-win_amd64.whl", hash = "sha256:95c3c157765b031331dd4db3c775e58deaee050a3042fcad72cbc4189d7c8dca"}, + {file = "charset_normalizer-3.4.0-py3-none-any.whl", hash = "sha256:fe9f97feb71aa9896b81973a7bbada8c49501dc73e58a10fcef6663af95e5079"}, + {file = "charset_normalizer-3.4.0.tar.gz", hash = "sha256:223217c3d4f82c3ac5e29032b3f1c2eb0fb591b72161f86d93f5719079dae93e"}, +] + +[[package]] +name = "colorama" +version = "0.4.6" +description = "Cross-platform colored terminal text." +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" +files = [ + {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, + {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, +] + +[[package]] +name = "coverage" +version = "7.6.4" +description = "Code coverage measurement for Python" +optional = false +python-versions = ">=3.9" +files = [ + {file = "coverage-7.6.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5f8ae553cba74085db385d489c7a792ad66f7f9ba2ee85bfa508aeb84cf0ba07"}, + {file = "coverage-7.6.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8165b796df0bd42e10527a3f493c592ba494f16ef3c8b531288e3d0d72c1f6f0"}, + {file = "coverage-7.6.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c7c8b95bf47db6d19096a5e052ffca0a05f335bc63cef281a6e8fe864d450a72"}, + {file = "coverage-7.6.4-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8ed9281d1b52628e81393f5eaee24a45cbd64965f41857559c2b7ff19385df51"}, + {file = "coverage-7.6.4-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0809082ee480bb8f7416507538243c8863ac74fd8a5d2485c46f0f7499f2b491"}, + {file = "coverage-7.6.4-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:d541423cdd416b78626b55f123412fcf979d22a2c39fce251b350de38c15c15b"}, + {file = "coverage-7.6.4-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:58809e238a8a12a625c70450b48e8767cff9eb67c62e6154a642b21ddf79baea"}, + {file = "coverage-7.6.4-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:c9b8e184898ed014884ca84c70562b4a82cbc63b044d366fedc68bc2b2f3394a"}, + {file = "coverage-7.6.4-cp310-cp310-win32.whl", hash = "sha256:6bd818b7ea14bc6e1f06e241e8234508b21edf1b242d49831831a9450e2f35fa"}, + {file = "coverage-7.6.4-cp310-cp310-win_amd64.whl", hash = "sha256:06babbb8f4e74b063dbaeb74ad68dfce9186c595a15f11f5d5683f748fa1d172"}, + {file = "coverage-7.6.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:73d2b73584446e66ee633eaad1a56aad577c077f46c35ca3283cd687b7715b0b"}, + {file = "coverage-7.6.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:51b44306032045b383a7a8a2c13878de375117946d68dcb54308111f39775a25"}, + {file = "coverage-7.6.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0b3fb02fe73bed561fa12d279a417b432e5b50fe03e8d663d61b3d5990f29546"}, + {file = "coverage-7.6.4-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ed8fe9189d2beb6edc14d3ad19800626e1d9f2d975e436f84e19efb7fa19469b"}, + {file = "coverage-7.6.4-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b369ead6527d025a0fe7bd3864e46dbee3aa8f652d48df6174f8d0bac9e26e0e"}, + {file = "coverage-7.6.4-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:ade3ca1e5f0ff46b678b66201f7ff477e8fa11fb537f3b55c3f0568fbfe6e718"}, + {file = "coverage-7.6.4-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:27fb4a050aaf18772db513091c9c13f6cb94ed40eacdef8dad8411d92d9992db"}, + {file = "coverage-7.6.4-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:4f704f0998911abf728a7783799444fcbbe8261c4a6c166f667937ae6a8aa522"}, + {file = "coverage-7.6.4-cp311-cp311-win32.whl", hash = "sha256:29155cd511ee058e260db648b6182c419422a0d2e9a4fa44501898cf918866cf"}, + {file = "coverage-7.6.4-cp311-cp311-win_amd64.whl", hash = "sha256:8902dd6a30173d4ef09954bfcb24b5d7b5190cf14a43170e386979651e09ba19"}, + {file = "coverage-7.6.4-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:12394842a3a8affa3ba62b0d4ab7e9e210c5e366fbac3e8b2a68636fb19892c2"}, + {file = "coverage-7.6.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2b6b4c83d8e8ea79f27ab80778c19bc037759aea298da4b56621f4474ffeb117"}, + {file = "coverage-7.6.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1d5b8007f81b88696d06f7df0cb9af0d3b835fe0c8dbf489bad70b45f0e45613"}, + {file = "coverage-7.6.4-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b57b768feb866f44eeed9f46975f3d6406380275c5ddfe22f531a2bf187eda27"}, + {file = "coverage-7.6.4-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5915fcdec0e54ee229926868e9b08586376cae1f5faa9bbaf8faf3561b393d52"}, + {file = "coverage-7.6.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:0b58c672d14f16ed92a48db984612f5ce3836ae7d72cdd161001cc54512571f2"}, + {file = "coverage-7.6.4-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:2fdef0d83a2d08d69b1f2210a93c416d54e14d9eb398f6ab2f0a209433db19e1"}, + {file = "coverage-7.6.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:8cf717ee42012be8c0cb205dbbf18ffa9003c4cbf4ad078db47b95e10748eec5"}, + {file = "coverage-7.6.4-cp312-cp312-win32.whl", hash = "sha256:7bb92c539a624cf86296dd0c68cd5cc286c9eef2d0c3b8b192b604ce9de20a17"}, + {file = "coverage-7.6.4-cp312-cp312-win_amd64.whl", hash = "sha256:1032e178b76a4e2b5b32e19d0fd0abbce4b58e77a1ca695820d10e491fa32b08"}, + {file = "coverage-7.6.4-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:023bf8ee3ec6d35af9c1c6ccc1d18fa69afa1cb29eaac57cb064dbb262a517f9"}, + {file = "coverage-7.6.4-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:b0ac3d42cb51c4b12df9c5f0dd2f13a4f24f01943627120ec4d293c9181219ba"}, + {file = "coverage-7.6.4-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f8fe4984b431f8621ca53d9380901f62bfb54ff759a1348cd140490ada7b693c"}, + {file = "coverage-7.6.4-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5fbd612f8a091954a0c8dd4c0b571b973487277d26476f8480bfa4b2a65b5d06"}, + {file = "coverage-7.6.4-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dacbc52de979f2823a819571f2e3a350a7e36b8cb7484cdb1e289bceaf35305f"}, + {file = "coverage-7.6.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:dab4d16dfef34b185032580e2f2f89253d302facba093d5fa9dbe04f569c4f4b"}, + {file = "coverage-7.6.4-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:862264b12ebb65ad8d863d51f17758b1684560b66ab02770d4f0baf2ff75da21"}, + {file = "coverage-7.6.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:5beb1ee382ad32afe424097de57134175fea3faf847b9af002cc7895be4e2a5a"}, + {file = "coverage-7.6.4-cp313-cp313-win32.whl", hash = "sha256:bf20494da9653f6410213424f5f8ad0ed885e01f7e8e59811f572bdb20b8972e"}, + {file = "coverage-7.6.4-cp313-cp313-win_amd64.whl", hash = "sha256:182e6cd5c040cec0a1c8d415a87b67ed01193ed9ad458ee427741c7d8513d963"}, + {file = "coverage-7.6.4-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:a181e99301a0ae128493a24cfe5cfb5b488c4e0bf2f8702091473d033494d04f"}, + {file = "coverage-7.6.4-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:df57bdbeffe694e7842092c5e2e0bc80fff7f43379d465f932ef36f027179806"}, + {file = "coverage-7.6.4-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0bcd1069e710600e8e4cf27f65c90c7843fa8edfb4520fb0ccb88894cad08b11"}, + {file = "coverage-7.6.4-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:99b41d18e6b2a48ba949418db48159d7a2e81c5cc290fc934b7d2380515bd0e3"}, + {file = "coverage-7.6.4-cp313-cp313t-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a6b1e54712ba3474f34b7ef7a41e65bd9037ad47916ccb1cc78769bae324c01a"}, + {file = "coverage-7.6.4-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:53d202fd109416ce011578f321460795abfe10bb901b883cafd9b3ef851bacfc"}, + {file = "coverage-7.6.4-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:c48167910a8f644671de9f2083a23630fbf7a1cb70ce939440cd3328e0919f70"}, + {file = "coverage-7.6.4-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:cc8ff50b50ce532de2fa7a7daae9dd12f0a699bfcd47f20945364e5c31799fef"}, + {file = "coverage-7.6.4-cp313-cp313t-win32.whl", hash = "sha256:b8d3a03d9bfcaf5b0141d07a88456bb6a4c3ce55c080712fec8418ef3610230e"}, + {file = "coverage-7.6.4-cp313-cp313t-win_amd64.whl", hash = "sha256:f3ddf056d3ebcf6ce47bdaf56142af51bb7fad09e4af310241e9db7a3a8022e1"}, + {file = "coverage-7.6.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9cb7fa111d21a6b55cbf633039f7bc2749e74932e3aa7cb7333f675a58a58bf3"}, + {file = "coverage-7.6.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:11a223a14e91a4693d2d0755c7a043db43d96a7450b4f356d506c2562c48642c"}, + {file = "coverage-7.6.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a413a096c4cbac202433c850ee43fa326d2e871b24554da8327b01632673a076"}, + {file = "coverage-7.6.4-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:00a1d69c112ff5149cabe60d2e2ee948752c975d95f1e1096742e6077affd376"}, + {file = "coverage-7.6.4-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1f76846299ba5c54d12c91d776d9605ae33f8ae2b9d1d3c3703cf2db1a67f2c0"}, + {file = "coverage-7.6.4-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:fe439416eb6380de434886b00c859304338f8b19f6f54811984f3420a2e03858"}, + {file = "coverage-7.6.4-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:0294ca37f1ba500667b1aef631e48d875ced93ad5e06fa665a3295bdd1d95111"}, + {file = "coverage-7.6.4-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:6f01ba56b1c0e9d149f9ac85a2f999724895229eb36bd997b61e62999e9b0901"}, + {file = "coverage-7.6.4-cp39-cp39-win32.whl", hash = "sha256:bc66f0bf1d7730a17430a50163bb264ba9ded56739112368ba985ddaa9c3bd09"}, + {file = "coverage-7.6.4-cp39-cp39-win_amd64.whl", hash = "sha256:c481b47f6b5845064c65a7bc78bc0860e635a9b055af0df46fdf1c58cebf8e8f"}, + {file = "coverage-7.6.4-pp39.pp310-none-any.whl", hash = "sha256:3c65d37f3a9ebb703e710befdc489a38683a5b152242664b973a7b7b22348a4e"}, + {file = "coverage-7.6.4.tar.gz", hash = "sha256:29fc0f17b1d3fea332f8001d4558f8214af7f1d87a345f3a133c901d60347c73"}, +] + +[package.dependencies] +tomli = {version = "*", optional = true, markers = "python_full_version <= \"3.11.0a6\" and extra == \"toml\""} + +[package.extras] +toml = ["tomli"] + +[[package]] +name = "deepdiff" +version = "8.0.1" +description = "Deep Difference and Search of any Python object/data. Recreate objects by adding adding deltas to each other." +optional = false +python-versions = ">=3.8" +files = [ + {file = "deepdiff-8.0.1-py3-none-any.whl", hash = "sha256:42e99004ce603f9a53934c634a57b04ad5900e0d8ed0abb15e635767489cbc05"}, + {file = "deepdiff-8.0.1.tar.gz", hash = "sha256:245599a4586ab59bb599ca3517a9c42f3318ff600ded5e80a3432693c8ec3c4b"}, +] + +[package.dependencies] +orderly-set = "5.2.2" + +[package.extras] +cli = ["click (==8.1.7)", "pyyaml (==6.0.1)"] +optimize = ["orjson"] + +[[package]] +name = "exceptiongroup" +version = "1.2.2" +description = "Backport of PEP 654 (exception groups)" +optional = false +python-versions = ">=3.7" +files = [ + {file = "exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b"}, + {file = "exceptiongroup-1.2.2.tar.gz", hash = "sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc"}, +] + +[package.extras] +test = ["pytest (>=6)"] + +[[package]] +name = "h11" +version = "0.14.0" +description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1" +optional = false +python-versions = ">=3.7" +files = [ + {file = "h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761"}, + {file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"}, +] + +[[package]] +name = "idna" +version = "3.10" +description = "Internationalized Domain Names in Applications (IDNA)" +optional = false +python-versions = ">=3.6" +files = [ + {file = "idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3"}, + {file = "idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9"}, +] + +[package.extras] +all = ["flake8 (>=7.1.1)", "mypy (>=1.11.2)", "pytest (>=8.3.2)", "ruff (>=0.6.2)"] + +[[package]] +name = "iniconfig" +version = "2.0.0" +description = "brain-dead simple config-ini parsing" +optional = false +python-versions = ">=3.7" +files = [ + {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"}, + {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, +] + +[[package]] +name = "orderly-set" +version = "5.2.2" +description = "Orderly set" +optional = false +python-versions = ">=3.8" +files = [ + {file = "orderly_set-5.2.2-py3-none-any.whl", hash = "sha256:f7a37c95a38c01cdfe41c3ffb62925a318a2286ea0a41790c057fc802aec54da"}, + {file = "orderly_set-5.2.2.tar.gz", hash = "sha256:52a18b86aaf3f5d5a498bbdb27bf3253a4e5c57ab38e5b7a56fa00115cd28448"}, +] + +[[package]] +name = "outcome" +version = "1.3.0.post0" +description = "Capture the outcome of Python function calls." +optional = false +python-versions = ">=3.7" +files = [ + {file = "outcome-1.3.0.post0-py2.py3-none-any.whl", hash = "sha256:e771c5ce06d1415e356078d3bdd68523f284b4ce5419828922b6871e65eda82b"}, + {file = "outcome-1.3.0.post0.tar.gz", hash = "sha256:9dcf02e65f2971b80047b377468e72a268e15c0af3cf1238e6ff14f7f91143b8"}, +] + +[package.dependencies] +attrs = ">=19.2.0" + +[[package]] +name = "packaging" +version = "24.1" +description = "Core utilities for Python packages" +optional = false +python-versions = ">=3.8" +files = [ + {file = "packaging-24.1-py3-none-any.whl", hash = "sha256:5b8f2217dbdbd2f7f384c41c628544e6d52f2d0f53c6d0c3ea61aa5d1d7ff124"}, + {file = "packaging-24.1.tar.gz", hash = "sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002"}, +] + +[[package]] +name = "pluggy" +version = "1.5.0" +description = "plugin and hook calling mechanisms for python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669"}, + {file = "pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1"}, +] + +[package.extras] +dev = ["pre-commit", "tox"] +testing = ["pytest", "pytest-benchmark"] + +[[package]] +name = "pycparser" +version = "2.22" +description = "C parser in Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pycparser-2.22-py3-none-any.whl", hash = "sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc"}, + {file = "pycparser-2.22.tar.gz", hash = "sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6"}, +] + +[[package]] +name = "pysocks" +version = "1.7.1" +description = "A Python SOCKS client module. See https://github.com/Anorov/PySocks for more information." +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +files = [ + {file = "PySocks-1.7.1-py27-none-any.whl", hash = "sha256:08e69f092cc6dbe92a0fdd16eeb9b9ffbc13cadfe5ca4c7bd92ffb078b293299"}, + {file = "PySocks-1.7.1-py3-none-any.whl", hash = "sha256:2725bd0a9925919b9b51739eea5f9e2bae91e83288108a9ad338b2e3a4435ee5"}, + {file = "PySocks-1.7.1.tar.gz", hash = "sha256:3f8804571ebe159c380ac6de37643bb4685970655d3bba243530d6558b799aa0"}, +] + +[[package]] +name = "pytest" +version = "7.4.4" +description = "pytest: simple powerful testing with Python" +optional = false +python-versions = ">=3.7" +files = [ + {file = "pytest-7.4.4-py3-none-any.whl", hash = "sha256:b090cdf5ed60bf4c45261be03239c2c1c22df034fbffe691abe93cd80cea01d8"}, + {file = "pytest-7.4.4.tar.gz", hash = "sha256:2cf0005922c6ace4a3e2ec8b4080eb0d9753fdc93107415332f50ce9e7994280"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "sys_platform == \"win32\""} +exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""} +iniconfig = "*" +packaging = "*" +pluggy = ">=0.12,<2.0" +tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""} + +[package.extras] +testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] + +[[package]] +name = "pytest-cov" +version = "4.1.0" +description = "Pytest plugin for measuring coverage." +optional = false +python-versions = ">=3.7" +files = [ + {file = "pytest-cov-4.1.0.tar.gz", hash = "sha256:3904b13dfbfec47f003b8e77fd5b589cd11904a21ddf1ab38a64f204d6a10ef6"}, + {file = "pytest_cov-4.1.0-py3-none-any.whl", hash = "sha256:6ba70b9e97e69fcc3fb45bfeab2d0a138fb65c4d0d6a41ef33983ad114be8c3a"}, +] + +[package.dependencies] +coverage = {version = ">=5.2.1", extras = ["toml"]} +pytest = ">=4.6" + +[package.extras] +testing = ["fields", "hunter", "process-tests", "pytest-xdist", "six", "virtualenv"] + +[[package]] +name = "pytest-sugar" +version = "1.0.0" +description = "pytest-sugar is a plugin for pytest that changes the default look and feel of pytest (e.g. progressbar, show tests that fail instantly)." +optional = false +python-versions = "*" +files = [ + {file = "pytest-sugar-1.0.0.tar.gz", hash = "sha256:6422e83258f5b0c04ce7c632176c7732cab5fdb909cb39cca5c9139f81276c0a"}, + {file = "pytest_sugar-1.0.0-py3-none-any.whl", hash = "sha256:70ebcd8fc5795dc457ff8b69d266a4e2e8a74ae0c3edc749381c64b5246c8dfd"}, +] + +[package.dependencies] +packaging = ">=21.3" +pytest = ">=6.2.0" +termcolor = ">=2.1.0" + +[package.extras] +dev = ["black", "flake8", "pre-commit"] + +[[package]] +name = "requests" +version = "2.32.3" +description = "Python HTTP for Humans." +optional = false +python-versions = ">=3.8" +files = [ + {file = "requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6"}, + {file = "requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760"}, +] + +[package.dependencies] +certifi = ">=2017.4.17" +charset-normalizer = ">=2,<4" +idna = ">=2.5,<4" +urllib3 = ">=1.21.1,<3" + +[package.extras] +socks = ["PySocks (>=1.5.6,!=1.5.7)"] +use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] + +[[package]] +name = "selenium" +version = "4.25.0" +description = "Official Python bindings for Selenium WebDriver" +optional = false +python-versions = ">=3.8" +files = [ + {file = "selenium-4.25.0-py3-none-any.whl", hash = "sha256:3798d2d12b4a570bc5790163ba57fef10b2afee958bf1d80f2a3cf07c4141f33"}, + {file = "selenium-4.25.0.tar.gz", hash = "sha256:95d08d3b82fb353f3c474895154516604c7f0e6a9a565ae6498ef36c9bac6921"}, +] + +[package.dependencies] +certifi = ">=2021.10.8" +trio = ">=0.17,<1.0" +trio-websocket = ">=0.9,<1.0" +typing_extensions = ">=4.9,<5.0" +urllib3 = {version = ">=1.26,<3", extras = ["socks"]} +websocket-client = ">=1.8,<2.0" + +[[package]] +name = "sniffio" +version = "1.3.1" +description = "Sniff out which async library your code is running under" +optional = false +python-versions = ">=3.7" +files = [ + {file = "sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2"}, + {file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"}, +] + +[[package]] +name = "sortedcontainers" +version = "2.4.0" +description = "Sorted Containers -- Sorted List, Sorted Dict, Sorted Set" +optional = false +python-versions = "*" +files = [ + {file = "sortedcontainers-2.4.0-py2.py3-none-any.whl", hash = "sha256:a163dcaede0f1c021485e957a39245190e74249897e2ae4b2aa38595db237ee0"}, + {file = "sortedcontainers-2.4.0.tar.gz", hash = "sha256:25caa5a06cc30b6b83d11423433f65d1f9d76c4c6a0c90e3379eaa43b9bfdb88"}, +] + +[[package]] +name = "termcolor" +version = "2.5.0" +description = "ANSI color formatting for output in terminal" +optional = false +python-versions = ">=3.9" +files = [ + {file = "termcolor-2.5.0-py3-none-any.whl", hash = "sha256:37b17b5fc1e604945c2642c872a3764b5d547a48009871aea3edd3afa180afb8"}, + {file = "termcolor-2.5.0.tar.gz", hash = "sha256:998d8d27da6d48442e8e1f016119076b690d962507531df4890fcd2db2ef8a6f"}, +] + +[package.extras] +tests = ["pytest", "pytest-cov"] + +[[package]] +name = "tomli" +version = "2.0.2" +description = "A lil' TOML parser" +optional = false +python-versions = ">=3.8" +files = [ + {file = "tomli-2.0.2-py3-none-any.whl", hash = "sha256:2ebe24485c53d303f690b0ec092806a085f07af5a5aa1464f3931eec36caaa38"}, + {file = "tomli-2.0.2.tar.gz", hash = "sha256:d46d457a85337051c36524bc5349dd91b1877838e2979ac5ced3e710ed8a60ed"}, +] + +[[package]] +name = "trio" +version = "0.27.0" +description = "A friendly Python library for async concurrency and I/O" +optional = false +python-versions = ">=3.8" +files = [ + {file = "trio-0.27.0-py3-none-any.whl", hash = "sha256:68eabbcf8f457d925df62da780eff15ff5dc68fd6b367e2dde59f7aaf2a0b884"}, + {file = "trio-0.27.0.tar.gz", hash = "sha256:1dcc95ab1726b2da054afea8fd761af74bad79bd52381b84eae408e983c76831"}, +] + +[package.dependencies] +attrs = ">=23.2.0" +cffi = {version = ">=1.14", markers = "os_name == \"nt\" and implementation_name != \"pypy\""} +exceptiongroup = {version = "*", markers = "python_version < \"3.11\""} +idna = "*" +outcome = "*" +sniffio = ">=1.3.0" +sortedcontainers = "*" + +[[package]] +name = "trio-websocket" +version = "0.11.1" +description = "WebSocket library for Trio" +optional = false +python-versions = ">=3.7" +files = [ + {file = "trio-websocket-0.11.1.tar.gz", hash = "sha256:18c11793647703c158b1f6e62de638acada927344d534e3c7628eedcb746839f"}, + {file = "trio_websocket-0.11.1-py3-none-any.whl", hash = "sha256:520d046b0d030cf970b8b2b2e00c4c2245b3807853ecd44214acd33d74581638"}, +] + +[package.dependencies] +exceptiongroup = {version = "*", markers = "python_version < \"3.11\""} +trio = ">=0.11" +wsproto = ">=0.14" + +[[package]] +name = "typing-extensions" +version = "4.12.2" +description = "Backported and Experimental Type Hints for Python 3.8+" +optional = false +python-versions = ">=3.8" +files = [ + {file = "typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d"}, + {file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"}, +] + +[[package]] +name = "urllib3" +version = "2.2.3" +description = "HTTP library with thread-safe connection pooling, file post, and more." +optional = false +python-versions = ">=3.8" +files = [ + {file = "urllib3-2.2.3-py3-none-any.whl", hash = "sha256:ca899ca043dcb1bafa3e262d73aa25c465bfb49e0bd9dd5d59f1d0acba2f8fac"}, + {file = "urllib3-2.2.3.tar.gz", hash = "sha256:e7d814a81dad81e6caf2ec9fdedb284ecc9c73076b62654547cc64ccdcae26e9"}, +] + +[package.dependencies] +pysocks = {version = ">=1.5.6,<1.5.7 || >1.5.7,<2.0", optional = true, markers = "extra == \"socks\""} + +[package.extras] +brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)"] +h2 = ["h2 (>=4,<5)"] +socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] +zstd = ["zstandard (>=0.18.0)"] + +[[package]] +name = "websocket-client" +version = "1.8.0" +description = "WebSocket client for Python with low level API options" +optional = false +python-versions = ">=3.8" +files = [ + {file = "websocket_client-1.8.0-py3-none-any.whl", hash = "sha256:17b44cc997f5c498e809b22cdf2d9c7a9e71c02c8cc2b6c56e7c2d1239bfa526"}, + {file = "websocket_client-1.8.0.tar.gz", hash = "sha256:3239df9f44da632f96012472805d40a23281a991027ce11d2f45a6f24ac4c3da"}, +] + +[package.extras] +docs = ["Sphinx (>=6.0)", "myst-parser (>=2.0.0)", "sphinx-rtd-theme (>=1.1.0)"] +optional = ["python-socks", "wsaccel"] +test = ["websockets"] + +[[package]] +name = "wsproto" +version = "1.2.0" +description = "WebSockets state-machine based protocol implementation" +optional = false +python-versions = ">=3.7.0" +files = [ + {file = "wsproto-1.2.0-py3-none-any.whl", hash = "sha256:b9acddd652b585d75b20477888c56642fdade28bdfd3579aa24a4d2c037dd736"}, + {file = "wsproto-1.2.0.tar.gz", hash = "sha256:ad565f26ecb92588a3e43bc3d96164de84cd9902482b130d0ddbaa9664a85065"}, +] + +[package.dependencies] +h11 = ">=0.9.0,<1" + +[metadata] +lock-version = "2.0" +python-versions = "^3.10" +content-hash = "6ae4fa6397091b87b63698201a08d7d97628ed65992d46514f118768b46b99ce" diff --git a/e2e_tests/pyproject.toml b/e2e_tests/pyproject.toml new file mode 100644 index 00000000..68724c18 --- /dev/null +++ b/e2e_tests/pyproject.toml @@ -0,0 +1,23 @@ +[tool.poetry] +name = "e2e_tests" +version = "0.0.1" +description = "e2e tests for prompt and llm gateway" +authors = ["Katanemo Labs, Inc "] +license = "Apache 2.0" +readme = "README.md" +package-mode = false + +[tool.poetry.dependencies] +python = "^3.10" +pytest = "^7.3.1" +requests = "^2.29.0" +selenium = "^4.11.2" +pytest-sugar = "^1.0.0" +deepdiff = "^8.0.1" + +[tool.poetry.dev-dependencies] +pytest-cov = "^4.1.0" + +[tool.pytest.ini_options] +python_files = ["test*.py"] +addopts = ["-v", "-s"] diff --git a/e2e_tests/run_e2e_tests.sh b/e2e_tests/run_e2e_tests.sh new file mode 100644 index 00000000..47f56459 --- /dev/null +++ b/e2e_tests/run_e2e_tests.sh @@ -0,0 +1,80 @@ +#/bin/bash +# if any of the commands fail, the script will exit +set -e + +. ./common_scripts.sh + +print_debug() { + log "Received signal to stop" + log "Printing debug logs for model_server" + log "====================================" + tail -n 500 ~/archgw_logs/modelserver.log + log "Printing debug logs for docker" + log "====================================" + tail -n 500 ../build.log +} + +trap 'print_debug' INT TERM ERR + +log starting > ../build.log + +log building function_callling demo +log =============================== +cd ../demos/function_calling +docker compose build -q + +log starting the function_calling demo +docker compose up -d +cd - + +log building model server +log ===================== +cd ../model_server +poetry install 2>&1 >> ../build.log +log starting model server +log ===================== +mkdir -p ~/archgw_logs +touch ~/archgw_logs/modelserver.log +poetry run archgw_modelserver restart & +tail -F ~/archgw_logs/modelserver.log & +model_server_tail_pid=$! +cd - + +log building llm and prompt gateway rust modules +log ============================================ +cd ../arch +docker build -f Dockerfile .. -t katanemo/archgw -q +log starting the arch gateway service +log ================================= +docker compose -f docker-compose.e2e.yaml down +log waiting for model service to be healthy +wait_for_healthz "http://localhost:51000/healthz" 300 +kill $model_server_tail_pid +docker compose -f docker-compose.e2e.yaml up -d +log waiting for arch gateway service to be healthy +wait_for_healthz "http://localhost:10000/healthz" 60 +log waiting for arch gateway service to be healthy +cd - + +log running e2e tests +log ================= +poetry install 2>&1 >> ../build.log +poetry run pytest + +log shutting down the arch gateway service +log ====================================== +cd ../arch +docker compose -f docker-compose.e2e.yaml stop 2>&1 >> ../build.log +cd - + +log shutting down the function_calling demo +log ======================================= +cd ../demos/function_calling +docker compose down 2>&1 >> ../build.log +cd - + +log shutting down the model server +log ============================== +cd ../model_server +poetry run archgw_modelserver stop 2>&1 >> ../build.log +cd - diff --git a/e2e_tests/test_llm_gateway.py b/e2e_tests/test_llm_gateway.py new file mode 100644 index 00000000..74209c84 --- /dev/null +++ b/e2e_tests/test_llm_gateway.py @@ -0,0 +1,36 @@ +import json +import pytest +import requests + +from common import LLM_GATEWAY_ENDPOINT, get_data_chunks + + +# test default llm +@pytest.mark.parametrize("stream", [True, False]) +@pytest.mark.parametrize("provider_hint", [None, "gpt-3.5-turbo-0125"]) +def test_hello_llm_gateway_llm(stream, provider_hint): + expected_llm = "gpt-4o-mini-2024-07-18" if provider_hint is None else provider_hint + body = { + "messages": [ + { + "role": "user", + "content": "hello", + } + ], + "stream": stream, + } + headers = {} + if provider_hint: + headers["x-arch-llm-provider-hint"] = provider_hint + response = requests.post( + LLM_GATEWAY_ENDPOINT, json=body, stream=stream, headers=headers + ) + assert response.status_code == 200 + if stream: + chunks = get_data_chunks(response) + assert len(chunks) > 0 + response_json = json.loads(chunks[0]) + assert response_json.get("model") == expected_llm + else: + response_json = response.json() + assert response_json.get("model") == expected_llm diff --git a/e2e_tests/test_prompt_gateway.py b/e2e_tests/test_prompt_gateway.py new file mode 100644 index 00000000..31f305d4 --- /dev/null +++ b/e2e_tests/test_prompt_gateway.py @@ -0,0 +1,262 @@ +import json +import pytest +import requests +from deepdiff import DeepDiff + +from common import PROMPT_GATEWAY_ENDPOINT, get_arch_messages, get_data_chunks + + +@pytest.mark.parametrize("stream", [True, False]) +def test_prompt_gateway(stream): + expected_tool_call = { + "name": "weather_forecast", + "arguments": {"city": "seattle", "days": 10}, + } + + body = { + "messages": [ + { + "role": "user", + "content": "how is the weather in seattle for next 10 days", + } + ], + "stream": stream, + } + response = requests.post(PROMPT_GATEWAY_ENDPOINT, json=body, stream=stream) + assert response.status_code == 200 + if stream: + chunks = get_data_chunks(response, n=20) + assert len(chunks) > 2 + + # first chunk is tool calls (role = assistant) + response_json = json.loads(chunks[0]) + assert response_json.get("model").startswith("Arch") + choices = response_json.get("choices", []) + assert len(choices) > 0 + assert "role" in choices[0]["delta"] + role = choices[0]["delta"]["role"] + assert role == "assistant" + tool_calls = choices[0].get("delta", {}).get("tool_calls", []) + assert len(tool_calls) > 0 + tool_call = tool_calls[0]["function"] + diff = DeepDiff(tool_call, expected_tool_call, ignore_string_case=True) + assert not diff + + # second chunk is api call result (role = tool) + response_json = json.loads(chunks[1]) + choices = response_json.get("choices", []) + assert len(choices) > 0 + assert "role" in choices[0]["delta"] + role = choices[0]["delta"]["role"] + assert role == "tool" + + # third..end chunk is summarization (role = assistant) + response_json = json.loads(chunks[2]) + assert response_json.get("model").startswith("gpt-4o-mini") + choices = response_json.get("choices", []) + assert len(choices) > 0 + assert "role" in choices[0]["delta"] + role = choices[0]["delta"]["role"] + assert role == "assistant" + + else: + response_json = response.json() + assert response_json.get("model").startswith("gpt-4o-mini") + choices = response_json.get("choices", []) + assert len(choices) > 0 + assert "role" in choices[0]["message"] + assert choices[0]["message"]["role"] == "assistant" + # now verify arch_messages (tool call and api response) that are sent as response metadata + arch_messages = get_arch_messages(response_json) + assert len(arch_messages) == 2 + tool_calls_message = arch_messages[0] + tool_calls = tool_calls_message.get("tool_calls", []) + assert len(tool_calls) > 0 + tool_call = tool_calls[0]["function"] + diff = DeepDiff(tool_call, expected_tool_call, ignore_string_case=True) + assert not diff + + +@pytest.mark.parametrize("stream", [True, False]) +def test_prompt_gateway_arch_direct_response(stream): + body = { + "messages": [ + { + "role": "user", + "content": "how is the weather", + } + ], + "stream": stream, + } + response = requests.post(PROMPT_GATEWAY_ENDPOINT, json=body, stream=stream) + assert response.status_code == 200 + if stream: + chunks = get_data_chunks(response, n=3) + assert len(chunks) > 0 + response_json = json.loads(chunks[0]) + # make sure arch responded directly + assert response_json.get("model").startswith("Arch") + # and tool call is null + choices = response_json.get("choices", []) + assert len(choices) > 0 + tool_calls = choices[0].get("delta", {}).get("tool_calls", []) + assert len(tool_calls) == 0 + else: + response_json = response.json() + assert response_json.get("model").startswith("Arch") + choices = response_json.get("choices", []) + assert len(choices) > 0 + message = choices[0]["message"]["content"] + assert "Could you provide the following details days" not in message + + +@pytest.mark.parametrize("stream", [True, False]) +def test_prompt_gateway_param_gathering(stream): + body = { + "messages": [ + { + "role": "user", + "content": "how is the weather in seattle", + } + ], + "stream": stream, + } + response = requests.post(PROMPT_GATEWAY_ENDPOINT, json=body, stream=stream) + assert response.status_code == 200 + if stream: + chunks = get_data_chunks(response, n=3) + assert len(chunks) > 0 + response_json = json.loads(chunks[0]) + # make sure arch responded directly + assert response_json.get("model").startswith("Arch") + # and tool call is null + choices = response_json.get("choices", []) + assert len(choices) > 0 + tool_calls = choices[0].get("delta", {}).get("tool_calls", []) + assert len(tool_calls) == 0 + else: + response_json = response.json() + assert response_json.get("model").startswith("Arch") + choices = response_json.get("choices", []) + assert len(choices) > 0 + message = choices[0]["message"]["content"] + assert "Could you provide the following details days" in message + + +@pytest.mark.parametrize("stream", [True, False]) +def test_prompt_gateway_param_tool_call(stream): + expected_tool_call = { + "name": "weather_forecast", + "arguments": {"city": "seattle", "days": 2}, + } + + body = { + "messages": [ + { + "role": "user", + "content": "how is the weather in seattle", + }, + { + "role": "assistant", + "content": "Could you provide the following details days ?", + "model": "Arch-Function-1.5B", + }, + { + "role": "user", + "content": "2 days", + }, + ], + "stream": stream, + } + response = requests.post(PROMPT_GATEWAY_ENDPOINT, json=body, stream=stream) + assert response.status_code == 200 + if stream: + chunks = get_data_chunks(response, n=20) + assert len(chunks) > 2 + + # first chunk is tool calls (role = assistant) + response_json = json.loads(chunks[0]) + assert response_json.get("model").startswith("Arch") + choices = response_json.get("choices", []) + assert len(choices) > 0 + assert "role" in choices[0]["delta"] + role = choices[0]["delta"]["role"] + assert role == "assistant" + tool_calls = choices[0].get("delta", {}).get("tool_calls", []) + assert len(tool_calls) > 0 + tool_call = tool_calls[0]["function"] + diff = DeepDiff(tool_call, expected_tool_call, ignore_string_case=True) + assert not diff + + # second chunk is api call result (role = tool) + response_json = json.loads(chunks[1]) + choices = response_json.get("choices", []) + assert len(choices) > 0 + assert "role" in choices[0]["delta"] + role = choices[0]["delta"]["role"] + assert role == "tool" + + # third..end chunk is summarization (role = assistant) + response_json = json.loads(chunks[2]) + assert response_json.get("model").startswith("gpt-4o-mini") + choices = response_json.get("choices", []) + assert len(choices) > 0 + assert "role" in choices[0]["delta"] + role = choices[0]["delta"]["role"] + assert role == "assistant" + + else: + response_json = response.json() + assert response_json.get("model").startswith("gpt-4o-mini") + choices = response_json.get("choices", []) + assert len(choices) > 0 + assert "role" in choices[0]["message"] + assert choices[0]["message"]["role"] == "assistant" + # now verify arch_messages (tool call and api response) that are sent as response metadata + arch_messages = get_arch_messages(response_json) + assert len(arch_messages) == 2 + tool_calls_message = arch_messages[0] + tool_calls = tool_calls_message.get("tool_calls", []) + assert len(tool_calls) > 0 + tool_call = tool_calls[0]["function"] + diff = DeepDiff(tool_call, expected_tool_call, ignore_string_case=True) + assert not diff + + +@pytest.mark.parametrize("stream", [True, False]) +def test_prompt_gateway_default_target(stream): + body = { + "messages": [ + { + "role": "user", + "content": "hello, what can you do for me?", + }, + ], + "stream": stream, + } + response = requests.post(PROMPT_GATEWAY_ENDPOINT, json=body, stream=stream) + assert response.status_code == 200 + if stream: + chunks = get_data_chunks(response, n=3) + assert len(chunks) > 0 + response_json = json.loads(chunks[0]) + assert response_json.get("model").startswith("api_server") + assert len(response_json.get("choices", [])) > 0 + assert response_json.get("choices")[0]["delta"]["role"] == "assistant" + + response_json = json.loads(chunks[1]) + choices = response_json.get("choices", []) + assert len(choices) > 0 + content = choices[0]["delta"]["content"] + assert ( + content == "I can help you with weather forecast or insurance claim details" + ) + else: + response_json = response.json() + assert response_json.get("model").startswith("api_server") + assert len(response_json.get("choices")) > 0 + assert response_json.get("choices")[0]["message"]["role"] == "assistant" + assert ( + response_json.get("choices")[0]["message"]["content"] + == "I can help you with weather forecast or insurance claim details" + ) diff --git a/model_server/app/cli.py b/model_server/app/cli.py index dd6a5679..014608b0 100644 --- a/model_server/app/cli.py +++ b/model_server/app/cli.py @@ -65,7 +65,7 @@ def start_server(port=51000): process.terminate() -def wait_for_health_check(url, timeout=180): +def wait_for_health_check(url, timeout=300): """Wait for the Uvicorn server to respond to health-check requests.""" start_time = time.time() while time.time() - start_time < timeout: