From 79a62fffe836676767da03afd1081022071da53b Mon Sep 17 00:00:00 2001
From: Adil Hafeez <adil@katanemo.com>
Date: Mon, 21 Jul 2025 10:09:22 -0700
Subject: [PATCH 1/7] release 0.3.5 (#534)

---
 .github/workflows/e2e_archgw.yml              |  2 +-
 .../workflows/e2e_test_currency_convert.yml   |  2 +-
 .../e2e_test_preference_based_routing.yml     |  2 +-
 .github/workflows/validate_arch_config.yml    |  2 +-
 README.md                                     |  4 +-
 arch/tools/README.md                          |  2 +-
 arch/tools/cli/consts.py                      |  2 +-
 arch/tools/poetry.lock                        | 12 ++---
 arch/tools/pyproject.toml                     |  4 +-
 .../preference_based_routing/README.md        |  4 +-
 docs/source/conf.py                           |  2 +-
 docs/source/get_started/quickstart.rst        |  2 +-
 model_server/poetry.lock                      | 46 ++++++++++++-------
 model_server/pyproject.toml                   |  2 +-
 14 files changed, 50 insertions(+), 38 deletions(-)

diff --git a/.github/workflows/e2e_archgw.yml b/.github/workflows/e2e_archgw.yml
index 9f0f1061..633a32d8 100644
--- a/.github/workflows/e2e_archgw.yml
+++ b/.github/workflows/e2e_archgw.yml
@@ -24,7 +24,7 @@ jobs:
 
       - name: build arch docker image
         run: |
-          cd ../../ && docker build -f arch/Dockerfile . -t katanemo/archgw -t katanemo/archgw:0.3.4 -t katanemo/archgw:latest
+          cd ../../ && docker build -f arch/Dockerfile . -t katanemo/archgw -t katanemo/archgw:0.3.5 -t katanemo/archgw:latest
 
       - name: start archgw
         env:
diff --git a/.github/workflows/e2e_test_currency_convert.yml b/.github/workflows/e2e_test_currency_convert.yml
index 6a80760b..de29ed72 100644
--- a/.github/workflows/e2e_test_currency_convert.yml
+++ b/.github/workflows/e2e_test_currency_convert.yml
@@ -24,7 +24,7 @@ jobs:
 
       - name: build arch docker image
         run: |
-          docker build  -f arch/Dockerfile . -t katanemo/archgw -t katanemo/archgw:0.3.4
+          docker build  -f arch/Dockerfile . -t katanemo/archgw -t katanemo/archgw:0.3.5
 
       - name: install poetry
         run: |
diff --git a/.github/workflows/e2e_test_preference_based_routing.yml b/.github/workflows/e2e_test_preference_based_routing.yml
index e9564882..c1ab7050 100644
--- a/.github/workflows/e2e_test_preference_based_routing.yml
+++ b/.github/workflows/e2e_test_preference_based_routing.yml
@@ -24,7 +24,7 @@ jobs:
 
       - name: build arch docker image
         run: |
-          docker build  -f arch/Dockerfile . -t katanemo/archgw -t katanemo/archgw:0.3.4
+          docker build  -f arch/Dockerfile . -t katanemo/archgw -t katanemo/archgw:0.3.5
 
       - name: install poetry
         run: |
diff --git a/.github/workflows/validate_arch_config.yml b/.github/workflows/validate_arch_config.yml
index e5fd6acb..c3dbfeb3 100644
--- a/.github/workflows/validate_arch_config.yml
+++ b/.github/workflows/validate_arch_config.yml
@@ -24,7 +24,7 @@ jobs:
 
       - name: build arch docker image
         run: |
-          docker build  -f arch/Dockerfile . -t katanemo/archgw -t katanemo/archgw:0.3.4
+          docker build  -f arch/Dockerfile . -t katanemo/archgw -t katanemo/archgw:0.3.5
 
       - name: validate arch config
         run: |
diff --git a/README.md b/README.md
index d90fcd52..3f4199a2 100644
--- a/README.md
+++ b/README.md
@@ -82,7 +82,7 @@ Arch's CLI allows you to manage and interact with the Arch gateway efficiently.
 ```console
 $ python3.12 -m venv venv
 $ source venv/bin/activate   # On Windows, use: venv\Scripts\activate
-$ pip install archgw==0.3.4
+$ pip install archgw==0.3.5
 ```
 
 ### Build Agentic Apps with Arch Gateway
@@ -148,7 +148,7 @@ endpoints:
 ```sh
 
 $ archgw up arch_config.yaml
-2024-12-05 16:56:27,979 - cli.main - INFO - Starting archgw cli version: 0.3.4
+2024-12-05 16:56:27,979 - cli.main - INFO - Starting archgw cli version: 0.3.5
 2024-12-05 16:56:28,485 - cli.utils - INFO - Schema validation successful!
 2024-12-05 16:56:28,485 - cli.main - INFO - Starting arch model server and arch gateway
 2024-12-05 16:56:51,647 - cli.core - INFO - Container is healthy!
diff --git a/arch/tools/README.md b/arch/tools/README.md
index 7d098186..e4e45284 100644
--- a/arch/tools/README.md
+++ b/arch/tools/README.md
@@ -19,7 +19,7 @@ source venv/bin/activate
 
 ### Step 3: Run the build script
 ```bash
-pip install archgw==0.3.4
+pip install archgw==0.3.5
 ```
 
 ## Uninstall Instructions: archgw CLI
diff --git a/arch/tools/cli/consts.py b/arch/tools/cli/consts.py
index d0307211..9114f53f 100644
--- a/arch/tools/cli/consts.py
+++ b/arch/tools/cli/consts.py
@@ -10,4 +10,4 @@ SERVICE_NAME_MODEL_SERVER = "model_server"
 SERVICE_ALL = "all"
 MODEL_SERVER_LOG_FILE = "~/archgw_logs/modelserver.log"
 ARCHGW_DOCKER_NAME = "archgw"
-ARCHGW_DOCKER_IMAGE = os.getenv("ARCHGW_DOCKER_IMAGE", "katanemo/archgw:0.3.4")
+ARCHGW_DOCKER_IMAGE = os.getenv("ARCHGW_DOCKER_IMAGE", "katanemo/archgw:0.3.5")
diff --git a/arch/tools/poetry.lock b/arch/tools/poetry.lock
index d66eae75..239f825e 100644
--- a/arch/tools/poetry.lock
+++ b/arch/tools/poetry.lock
@@ -2,7 +2,7 @@
 
 [[package]]
 name = "archgw_modelserver"
-version = "0.3.4"
+version = "0.3.5"
 description = "A model server for serving models"
 optional = false
 python-versions = "*"
@@ -104,13 +104,13 @@ i18n = ["Babel (>=2.7)"]
 
 [[package]]
 name = "jsonschema"
-version = "4.24.0"
+version = "4.25.0"
 description = "An implementation of JSON Schema validation for Python"
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "jsonschema-4.24.0-py3-none-any.whl", hash = "sha256:a462455f19f5faf404a7902952b6f0e3ce868f3ee09a359b05eca6673bd8412d"},
-    {file = "jsonschema-4.24.0.tar.gz", hash = "sha256:0b4e8069eb12aedfa881333004bccaec24ecef5a8a6a4b6df142b2cc9599d196"},
+    {file = "jsonschema-4.25.0-py3-none-any.whl", hash = "sha256:24c2e8da302de79c8b9382fee3e76b355e44d2a4364bb207159ce10b517bd716"},
+    {file = "jsonschema-4.25.0.tar.gz", hash = "sha256:e63acf5c11762c0e6672ffb61482bdf57f0876684d8d249c0fe2d730d48bc55f"},
 ]
 
 [package.dependencies]
@@ -121,7 +121,7 @@ rpds-py = ">=0.7.1"
 
 [package.extras]
 format = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3987", "uri-template", "webcolors (>=1.11)"]
-format-nongpl = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3986-validator (>0.1.0)", "uri-template", "webcolors (>=24.6.0)"]
+format-nongpl = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3986-validator (>0.1.0)", "rfc3987-syntax (>=1.1.0)", "uri-template", "webcolors (>=24.6.0)"]
 
 [[package]]
 name = "jsonschema-specifications"
@@ -576,4 +576,4 @@ files = [
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.10"
-content-hash = "d1e256d1bb05fab98731edc11ea15391e9611acf2c3461d00d25c5fa0040de23"
+content-hash = "e86085ff732244cb68d2e3f7f4c2903f4a8a50cc7e0963324c2506f0de90df11"
diff --git a/arch/tools/pyproject.toml b/arch/tools/pyproject.toml
index 179c36d5..cf75165c 100644
--- a/arch/tools/pyproject.toml
+++ b/arch/tools/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "archgw"
-version = "0.3.4"
+version = "0.3.5"
 description = "Python-based CLI tool to manage Arch Gateway."
 authors = ["Katanemo Labs, Inc."]
 packages = [
@@ -10,7 +10,7 @@ readme = "README.md"
 
 [tool.poetry.dependencies]
 python = "^3.10"
-archgw_modelserver = "^0.3.4"
+archgw_modelserver = "^0.3.5"
 click = "^8.1.7"
 jinja2 = "^3.1.4"
 jsonschema = "^4.23.0"
diff --git a/demos/use_cases/preference_based_routing/README.md b/demos/use_cases/preference_based_routing/README.md
index 4031069d..7b988bcc 100644
--- a/demos/use_cases/preference_based_routing/README.md
+++ b/demos/use_cases/preference_based_routing/README.md
@@ -14,9 +14,9 @@ Make sure your machine is up to date with [latest version of archgw]([url](https
 2. start archgw in the foreground
 ```bash
 (venv) $ archgw up --service archgw --foreground
-2025-05-30 18:00:09,953 - cli.main - INFO - Starting archgw cli version: 0.3.4
+2025-05-30 18:00:09,953 - cli.main - INFO - Starting archgw cli version: 0.3.5
 2025-05-30 18:00:09,953 - cli.main - INFO - Validating /Users/adilhafeez/src/intelligent-prompt-gateway/demos/use_cases/preference_based_routing/arch_config.yaml
-2025-05-30 18:00:10,422 - cli.core - INFO - Starting arch gateway, image name: archgw, tag: katanemo/archgw:0.3.4
+2025-05-30 18:00:10,422 - cli.core - INFO - Starting arch gateway, image name: archgw, tag: katanemo/archgw:0.3.5
 2025-05-30 18:00:10,662 - cli.core - INFO - archgw status: running, health status: starting
 2025-05-30 18:00:11,712 - cli.core - INFO - archgw status: running, health status: starting
 2025-05-30 18:00:12,761 - cli.core - INFO - archgw is running and is healthy!
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 4790156e..1f78ac50 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -15,7 +15,7 @@ from sphinxawesome_theme.postprocess import Icons
 project = "Arch Docs"
 copyright = "2025, Katanemo Labs, Inc"
 author = "Katanemo Labs, Inc"
-release = " v0.3.4"
+release = " v0.3.5"
 
 # -- General configuration ---------------------------------------------------
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
diff --git a/docs/source/get_started/quickstart.rst b/docs/source/get_started/quickstart.rst
index bec37d90..67337e5b 100644
--- a/docs/source/get_started/quickstart.rst
+++ b/docs/source/get_started/quickstart.rst
@@ -25,7 +25,7 @@ Arch's CLI allows you to manage and interact with the Arch gateway efficiently.
 
    $ python -m venv venv
    $ source venv/bin/activate   # On Windows, use: venv\Scripts\activate
-   $ pip install archgw==0.3.4
+   $ pip install archgw==0.3.5
 
 
 Build AI Agent with Arch Gateway
diff --git a/model_server/poetry.lock b/model_server/poetry.lock
index 2debff66..01e10d8c 100644
--- a/model_server/poetry.lock
+++ b/model_server/poetry.lock
@@ -2,13 +2,13 @@
 
 [[package]]
 name = "accelerate"
-version = "1.8.1"
+version = "1.9.0"
 description = "Accelerate"
 optional = false
 python-versions = ">=3.9.0"
 files = [
-    {file = "accelerate-1.8.1-py3-none-any.whl", hash = "sha256:c47b8994498875a2b1286e945bd4d20e476956056c7941d512334f4eb44ff991"},
-    {file = "accelerate-1.8.1.tar.gz", hash = "sha256:f60df931671bc4e75077b852990469d4991ce8bd3a58e72375c3c95132034db9"},
+    {file = "accelerate-1.9.0-py3-none-any.whl", hash = "sha256:c24739a97ade1d54af4549a65f8b6b046adc87e2b3e4d6c66516e32c53d5a8f1"},
+    {file = "accelerate-1.9.0.tar.gz", hash = "sha256:0e8c61f81af7bf37195b6175a545ed292617dd90563c88f49020aea5b6a0b47f"},
 ]
 
 [package.dependencies]
@@ -29,7 +29,7 @@ sagemaker = ["sagemaker"]
 test-dev = ["bitsandbytes", "datasets", "diffusers", "evaluate", "scikit-learn", "scipy", "timm", "torchdata (>=0.8.0)", "torchpippy (>=0.2.0)", "tqdm", "transformers"]
 test-fp8 = ["torchao"]
 test-prod = ["parameterized", "pytest (>=7.2.0,<=8.0.0)", "pytest-order", "pytest-subtests", "pytest-xdist"]
-test-trackers = ["comet-ml", "dvclive", "matplotlib", "mlflow", "swanlab", "tensorboard", "wandb"]
+test-trackers = ["comet-ml", "dvclive", "matplotlib", "mlflow", "swanlab", "tensorboard", "trackio", "wandb"]
 testing = ["bitsandbytes", "datasets", "diffusers", "evaluate", "parameterized", "pytest (>=7.2.0,<=8.0.0)", "pytest-order", "pytest-subtests", "pytest-xdist", "scikit-learn", "scipy", "timm", "torchdata (>=0.8.0)", "torchpippy (>=0.2.0)", "tqdm", "transformers"]
 
 [[package]]
@@ -82,15 +82,26 @@ typing_extensions = {version = ">=4", markers = "python_version < \"3.11\""}
 [package.extras]
 tests = ["mypy (>=1.14.0)", "pytest", "pytest-asyncio"]
 
+[[package]]
+name = "backports-asyncio-runner"
+version = "1.2.0"
+description = "Backport of asyncio.Runner, a context manager that controls event loop life cycle."
+optional = false
+python-versions = "<3.11,>=3.8"
+files = [
+    {file = "backports_asyncio_runner-1.2.0-py3-none-any.whl", hash = "sha256:0da0a936a8aeb554eccb426dc55af3ba63bcdc69fa1a600b5bb305413a4477b5"},
+    {file = "backports_asyncio_runner-1.2.0.tar.gz", hash = "sha256:a5aa7b2b7d8f8bfcaa2b57313f70792df84e32a2a746f585213373f900b42162"},
+]
+
 [[package]]
 name = "certifi"
-version = "2025.7.9"
+version = "2025.7.14"
 description = "Python package for providing Mozilla's CA Bundle."
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "certifi-2025.7.9-py3-none-any.whl", hash = "sha256:d842783a14f8fdd646895ac26f719a061408834473cfc10203f6a575beb15d39"},
-    {file = "certifi-2025.7.9.tar.gz", hash = "sha256:c1d2ec05395148ee10cf672ffc28cd37ea0ab0d99f9cc74c43e588cbd111b079"},
+    {file = "certifi-2025.7.14-py3-none-any.whl", hash = "sha256:6b31f564a415d79ee77df69d757bb49a5bb53bd9f756cbbe24394ffd6fc1f4b2"},
+    {file = "certifi-2025.7.14.tar.gz", hash = "sha256:8ea99dbdfaaf2ba2f9bac77b9249ef62ec5218e7c2b2e903378ed5fccf765995"},
 ]
 
 [[package]]
@@ -324,13 +335,13 @@ typing = ["typing-extensions (>=4.12.2)"]
 
 [[package]]
 name = "fsspec"
-version = "2025.5.1"
+version = "2025.7.0"
 description = "File-system specification"
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "fsspec-2025.5.1-py3-none-any.whl", hash = "sha256:24d3a2e663d5fc735ab256263c4075f374a174c3410c0b25e5bd1970bceaa462"},
-    {file = "fsspec-2025.5.1.tar.gz", hash = "sha256:2e55e47a540b91843b755e83ded97c6e897fa0942b11490113f09e9c443c2475"},
+    {file = "fsspec-2025.7.0-py3-none-any.whl", hash = "sha256:8b012e39f63c7d5f10474de957f3ab793b47b45ae7d39f2fb735f8bbe25c0e21"},
+    {file = "fsspec-2025.7.0.tar.gz", hash = "sha256:786120687ffa54b8283d942929540d8bc5ccfa820deb555a2b5d0ed2b737bf58"},
 ]
 
 [package.extras]
@@ -338,7 +349,7 @@ abfs = ["adlfs"]
 adl = ["adlfs"]
 arrow = ["pyarrow (>=1)"]
 dask = ["dask", "distributed"]
-dev = ["pre-commit", "ruff"]
+dev = ["pre-commit", "ruff (>=0.5)"]
 doc = ["numpydoc", "sphinx", "sphinx-design", "sphinx-rtd-theme", "yarl"]
 dropbox = ["dropbox", "dropboxdrivefs", "requests"]
 full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "dask", "distributed", "dropbox", "dropboxdrivefs", "fusepy", "gcsfs", "libarchive-c", "ocifs", "panel", "paramiko", "pyarrow (>=1)", "pygit2", "requests", "s3fs", "smbprotocol", "tqdm"]
@@ -1023,13 +1034,13 @@ files = [
 
 [[package]]
 name = "openai"
-version = "1.95.1"
+version = "1.97.0"
 description = "The official Python library for the openai API"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "openai-1.95.1-py3-none-any.whl", hash = "sha256:8bbdfeceef231b1ddfabbc232b179d79f8b849aab5a7da131178f8d10e0f162f"},
-    {file = "openai-1.95.1.tar.gz", hash = "sha256:f089b605282e2a2b6776090b4b46563ac1da77f56402a222597d591e2dcc1086"},
+    {file = "openai-1.97.0-py3-none-any.whl", hash = "sha256:a1c24d96f4609f3f7f51c9e1c2606d97cc6e334833438659cfd687e9c972c610"},
+    {file = "openai-1.97.0.tar.gz", hash = "sha256:0be349569ccaa4fb54f97bb808423fd29ccaeb1246ee1be762e0c81a47bae0aa"},
 ]
 
 [package.dependencies]
@@ -1498,16 +1509,17 @@ dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "requests
 
 [[package]]
 name = "pytest-asyncio"
-version = "1.0.0"
+version = "1.1.0"
 description = "Pytest support for asyncio"
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "pytest_asyncio-1.0.0-py3-none-any.whl", hash = "sha256:4f024da9f1ef945e680dc68610b52550e36590a67fd31bb3b4943979a1f90ef3"},
-    {file = "pytest_asyncio-1.0.0.tar.gz", hash = "sha256:d15463d13f4456e1ead2594520216b225a16f781e144f8fdf6c5bb4667c48b3f"},
+    {file = "pytest_asyncio-1.1.0-py3-none-any.whl", hash = "sha256:5fe2d69607b0bd75c656d1211f969cadba035030156745ee09e7d71740e58ecf"},
+    {file = "pytest_asyncio-1.1.0.tar.gz", hash = "sha256:796aa822981e01b68c12e4827b8697108f7205020f24b5793b3c41555dab68ea"},
 ]
 
 [package.dependencies]
+backports-asyncio-runner = {version = ">=1.1,<2", markers = "python_version < \"3.11\""}
 pytest = ">=8.2,<9"
 
 [package.extras]
diff --git a/model_server/pyproject.toml b/model_server/pyproject.toml
index 76807337..f85016e6 100644
--- a/model_server/pyproject.toml
+++ b/model_server/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "archgw_modelserver"
-version = "0.3.4"
+version = "0.3.5"
 description = "A model server for serving models"
 authors = ["Katanemo Labs, Inc <info@katanemo.com>"]
 license = "Apache 2.0"

From d341f4365b1ef79355f5c184a571c5248b7515a0 Mon Sep 17 00:00:00 2001
From: Adil Hafeez <adil@katanemo.com>
Date: Mon, 21 Jul 2025 18:31:19 -0700
Subject: [PATCH 2/7] In request path use same format for usage preferences as
 arch_config (#533)

---
 crates/brightstaff/src/handlers/mod.rs        |   1 -
 .../brightstaff/src/handlers/preferences.rs   | 135 ------------------
 crates/brightstaff/src/main.rs                |   7 -
 .../brightstaff/src/router/router_model_v1.rs | 122 ++++++++++------
 crates/common/src/configuration.rs            |   5 +-
 5 files changed, 83 insertions(+), 187 deletions(-)
 delete mode 100644 crates/brightstaff/src/handlers/preferences.rs

diff --git a/crates/brightstaff/src/handlers/mod.rs b/crates/brightstaff/src/handlers/mod.rs
index febab6c2..6de38b5b 100644
--- a/crates/brightstaff/src/handlers/mod.rs
+++ b/crates/brightstaff/src/handlers/mod.rs
@@ -1,3 +1,2 @@
 pub mod chat_completions;
 pub mod models;
-pub mod preferences;
diff --git a/crates/brightstaff/src/handlers/preferences.rs b/crates/brightstaff/src/handlers/preferences.rs
deleted file mode 100644
index a9c5a65d..00000000
--- a/crates/brightstaff/src/handlers/preferences.rs
+++ /dev/null
@@ -1,135 +0,0 @@
-use bytes::Bytes;
-use common::configuration::{LlmProvider, ModelUsagePreference};
-use http_body_util::{combinators::BoxBody, BodyExt, Full};
-use hyper::{Request, Response, StatusCode};
-use serde_json;
-use std::{collections::HashMap, sync::Arc};
-use tracing::{info, warn};
-
-pub async fn list_preferences(
-    llm_providers: Arc<tokio::sync::RwLock<Vec<LlmProvider>>>,
-) -> Response<BoxBody<Bytes, hyper::Error>> {
-    let prov = llm_providers.read().await;
-    // convert the LlmProvider to UsageBasedProvider
-    let providers_with_usage = prov
-        .iter()
-        .map(|provider| ModelUsagePreference {
-            name: provider.name.clone(),
-            model: provider.model.clone().unwrap_or_default(),
-            usage: provider.usage.clone(),
-        })
-        .collect::<Vec<ModelUsagePreference>>();
-
-    match serde_json::to_string(&providers_with_usage) {
-        Ok(json) => {
-            let body = Full::new(Bytes::from(json))
-                .map_err(|never| match never {})
-                .boxed();
-            Response::builder()
-                .status(StatusCode::OK)
-                .header("Content-Type", "application/json")
-                .body(body)
-                .unwrap()
-        }
-        Err(_) => {
-            let body = Full::new(Bytes::from_static(
-                b"{\"error\":\"Failed to serialize models\"}",
-            ))
-            .map_err(|never| match never {})
-            .boxed();
-            Response::builder()
-                .status(StatusCode::INTERNAL_SERVER_ERROR)
-                .header("Content-Type", "application/json")
-                .body(body)
-                .unwrap()
-        }
-    }
-}
-
-pub async fn update_preferences(
-    request: Request<hyper::body::Incoming>,
-    llm_providers: Arc<tokio::sync::RwLock<Vec<LlmProvider>>>,
-) -> Result<Response<BoxBody<Bytes, hyper::Error>>, hyper::Error> {
-    let request_body = request.collect().await?.to_bytes();
-
-    let usage: Vec<ModelUsagePreference> = match serde_json::from_slice(&request_body) {
-        Ok(usage) => usage,
-        Err(_) => {
-            let response_body = Full::new(Bytes::from_static(b"Invalid request body: "))
-                .map_err(|never| match never {})
-                .boxed();
-            return Ok(Response::builder()
-                .status(StatusCode::BAD_REQUEST)
-                .header("Content-Type", "text/plain")
-                .body(response_body)
-                .unwrap());
-        }
-    };
-
-    let usage_model_map: HashMap<String, ModelUsagePreference> =
-        usage.into_iter().map(|u| (u.model.clone(), u)).collect();
-
-    info!(
-        "Updating usage preferences for models: {:?}",
-        usage_model_map.keys()
-    );
-
-    let mut llm_providers = llm_providers.write().await;
-
-    // ensure that models coming in the request are valid
-    let llm_provider_names: Vec<String> = llm_providers
-        .iter()
-        .map(|provider| provider.name.clone())
-        .collect();
-
-    for model in usage_model_map.keys() {
-        if !llm_provider_names.contains(model) {
-            let model_not_found = format!("model not found: {}", model);
-            warn!("updating preferences: {}", model_not_found);
-            let response_body = Full::new(model_not_found.into())
-                .map_err(|never| match never {})
-                .boxed();
-            return Ok(Response::builder()
-                .status(StatusCode::BAD_REQUEST)
-                .header("Content-Type", "text/plain")
-                .body(response_body)
-                .unwrap());
-        }
-    }
-
-    let mut updated_models_list = Vec::new();
-    for provider in llm_providers.iter_mut() {
-        if let Some(usage_provider) = usage_model_map.get(&provider.name) {
-            provider.usage = usage_provider.usage.clone();
-            updated_models_list.push(ModelUsagePreference {
-                name: provider.name.clone(),
-                model: provider.model.clone().unwrap_or_default(),
-                usage: provider.usage.clone(),
-            });
-        }
-    }
-
-    if !updated_models_list.is_empty() {
-        // return list of updated models
-        let response_body = Full::new(Bytes::from(format!(
-            "{{\"updated_models\": {}}}",
-            serde_json::to_string(&updated_models_list).unwrap()
-        )))
-        .map_err(|never| match never {})
-        .boxed();
-        Ok(Response::builder()
-            .status(StatusCode::OK)
-            .header("Content-Type", "application/json")
-            .body(response_body)
-            .unwrap())
-    } else {
-        let response_body = Full::new(Bytes::from_static(b"Provider not found"))
-            .map_err(|never| match never {})
-            .boxed();
-        Ok(Response::builder()
-            .status(StatusCode::NOT_FOUND)
-            .header("Content-Type", "text/plain")
-            .body(response_body)
-            .unwrap())
-    }
-}
diff --git a/crates/brightstaff/src/main.rs b/crates/brightstaff/src/main.rs
index dbcc9124..b5bf0204 100644
--- a/crates/brightstaff/src/main.rs
+++ b/crates/brightstaff/src/main.rs
@@ -1,6 +1,5 @@
 use brightstaff::handlers::chat_completions::chat_completions;
 use brightstaff::handlers::models::list_models;
-use brightstaff::handlers::preferences::{list_preferences, update_preferences};
 use brightstaff::router::llm_router::RouterService;
 use brightstaff::utils::tracing::init_tracer;
 use bytes::Bytes;
@@ -116,12 +115,6 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
                             .with_context(parent_cx)
                             .await
                     }
-                    (&Method::GET, "/v1/router/preferences") => {
-                        Ok(list_preferences(llm_providers).await)
-                    }
-                    (&Method::PUT, "/v1/router/preferences") => {
-                        update_preferences(req, llm_providers).await
-                    }
                     (&Method::GET, "/v1/models") => Ok(list_models(llm_providers).await),
                     (&Method::OPTIONS, "/v1/models") => {
                         let mut response = Response::new(empty());
diff --git a/crates/brightstaff/src/router/router_model_v1.rs b/crates/brightstaff/src/router/router_model_v1.rs
index dc0e1563..bd06b525 100644
--- a/crates/brightstaff/src/router/router_model_v1.rs
+++ b/crates/brightstaff/src/router/router_model_v1.rs
@@ -73,7 +73,7 @@ impl RouterModel for RouterModelV1 {
     fn generate_request(
         &self,
         messages: &[Message],
-        usage_preferences: &Option<Vec<ModelUsagePreference>>,
+        usage_preferences_from_request: &Option<Vec<ModelUsagePreference>>,
     ) -> ChatCompletionsRequest {
         // remove system prompt, tool calls, tool call response and messages without content
         // if content is empty its likely a tool call
@@ -150,31 +150,17 @@ impl RouterModel for RouterModelV1 {
             })
             .collect::<Vec<Message>>();
 
-        let llm_route_json = usage_preferences
-            .as_ref()
-            .map(|prefs| {
-                let llm_route: Vec<RoutingPreference> = prefs
-                    .iter()
-                    .map(|pref| RoutingPreference {
-                        name: pref.name.clone(),
-                        description: pref.usage.clone().unwrap_or_default(),
-                    })
-                    .collect();
-                serde_json::to_string(&llm_route).unwrap_or_default()
-            })
-            .unwrap_or_else(|| self.llm_route_json_str.clone());
-
-        let messages_content = ARCH_ROUTER_V1_SYSTEM_PROMPT
-            .replace("{routes}", &llm_route_json)
-            .replace(
-                "{conversation}",
-                &serde_json::to_string(&selected_conversation_list).unwrap_or_default(),
-            );
+        // Generate the router request message based on the usage preferences.
+        // If preferences are passed in request then we use them otherwise we use the default routing model preferences.
+        let router_message = match convert_to_router_preferences(usage_preferences_from_request) {
+            Some(prefs) => generate_router_message(&prefs, &selected_conversation_list),
+            None => generate_router_message(&self.llm_route_json_str, &selected_conversation_list),
+        };
 
         ChatCompletionsRequest {
             model: self.routing_model.clone(),
             messages: vec![Message {
-                content: Some(ContentType::Text(messages_content)),
+                content: Some(ContentType::Text(router_message)),
                 role: USER_ROLE.to_string(),
             }],
             temperature: Some(0.01),
@@ -201,12 +187,18 @@ impl RouterModel for RouterModelV1 {
 
         if let Some(usage_preferences) = usage_preferences {
             // If usage preferences are defined, we need to find the model that matches the selected route
-            let matching_preference = usage_preferences
+            let model_name: Option<String> = usage_preferences
                 .iter()
-                .find(|pref| pref.name == selected_route);
+                .map(|pref| {
+                    pref.routing_preferences
+                        .iter()
+                        .find(|routing_pref| routing_pref.name == selected_route)
+                        .map(|_| pref.model.clone())
+                })
+                .find_map(|model| model);
 
-            if let Some(preference) = matching_preference {
-                return Ok(Some((selected_route, preference.model.clone())));
+            if let Some(model_name) = model_name {
+                return Ok(Some((selected_route, model_name)));
             } else {
                 warn!(
                     "No matching model found for route: {}, usage preferences: {:?}",
@@ -216,7 +208,7 @@ impl RouterModel for RouterModelV1 {
             }
         }
 
-        // If no usage preferences are defined, we return the route with the routing model
+        // If no usage preferences are passed in request then use the default routing model preferences
         if let Some(model) = self.llm_route_to_model_map.get(&selected_route).cloned() {
             return Ok(Some((selected_route, model)));
         }
@@ -234,6 +226,37 @@ impl RouterModel for RouterModelV1 {
     }
 }
 
+fn generate_router_message(prefs: &str, selected_conversation_list: &Vec<Message>) -> String {
+    ARCH_ROUTER_V1_SYSTEM_PROMPT
+        .replace("{routes}", prefs)
+        .replace(
+            "{conversation}",
+            &serde_json::to_string(&selected_conversation_list).unwrap_or_default(),
+        )
+}
+
+fn convert_to_router_preferences(
+    prefs_from_request: &Option<Vec<ModelUsagePreference>>,
+) -> Option<String> {
+    if let Some(usage_preferences) = prefs_from_request {
+        let routing_preferences = usage_preferences
+            .iter()
+            .flat_map(|pref| {
+                pref.routing_preferences
+                    .iter()
+                    .map(|routing_pref| RoutingPreference {
+                        name: routing_pref.name.clone(),
+                        description: routing_pref.description.clone(),
+                    })
+            })
+            .collect::<Vec<RoutingPreference>>();
+
+        return Some(serde_json::to_string(&routing_preferences).unwrap_or_default());
+    }
+
+    None
+}
+
 fn fix_json_response(body: &str) -> String {
     let mut updated_body = body.to_string();
 
@@ -299,7 +322,8 @@ Based on your analysis, provide your response in the following JSON formats if y
             ]
         }
         "#;
-        let llm_routes = serde_json::from_str::<HashMap<String, Vec<RoutingPreference>>>(routes_str).unwrap();
+        let llm_routes =
+            serde_json::from_str::<HashMap<String, Vec<RoutingPreference>>>(routes_str).unwrap();
         let routing_model = "test-model".to_string();
         let router = RouterModelV1::new(llm_routes, routing_model.clone(), usize::MAX);
 
@@ -356,7 +380,8 @@ Based on your analysis, provide your response in the following JSON formats if y
             ]
         }
         "#;
-        let llm_routes = serde_json::from_str::<HashMap<String, Vec<RoutingPreference>>>(routes_str).unwrap();
+        let llm_routes =
+            serde_json::from_str::<HashMap<String, Vec<RoutingPreference>>>(routes_str).unwrap();
         let routing_model = "test-model".to_string();
         let router = RouterModelV1::new(llm_routes, routing_model.clone(), usize::MAX);
 
@@ -379,9 +404,11 @@ Based on your analysis, provide your response in the following JSON formats if y
         let conversation: Vec<Message> = serde_json::from_str(conversation_str).unwrap();
 
         let usage_preferences = Some(vec![ModelUsagePreference {
-            name: "code-generation".to_string(),
             model: "claude/claude-3-7-sonnet".to_string(),
-            usage: Some("generating new code snippets, functions, or boilerplate based on user prompts or requirements".to_string()),
+            routing_preferences: vec![RoutingPreference {
+                name: "code-generation".to_string(),
+                description: "generating new code snippets, functions, or boilerplate based on user prompts or requirements".to_string(),
+            }],
         }]);
         let req = router.generate_request(&conversation, &usage_preferences);
 
@@ -419,7 +446,8 @@ Based on your analysis, provide your response in the following JSON formats if y
             ]
         }
         "#;
-        let llm_routes = serde_json::from_str::<HashMap<String, Vec<RoutingPreference>>>(routes_str).unwrap();
+        let llm_routes =
+            serde_json::from_str::<HashMap<String, Vec<RoutingPreference>>>(routes_str).unwrap();
         let routing_model = "test-model".to_string();
         let router = RouterModelV1::new(llm_routes, routing_model.clone(), 235);
 
@@ -478,7 +506,8 @@ Based on your analysis, provide your response in the following JSON formats if y
             ]
         }
         "#;
-        let llm_routes = serde_json::from_str::<HashMap<String, Vec<RoutingPreference>>>(routes_str).unwrap();
+        let llm_routes =
+            serde_json::from_str::<HashMap<String, Vec<RoutingPreference>>>(routes_str).unwrap();
 
         let routing_model = "test-model".to_string();
         let router = RouterModelV1::new(llm_routes, routing_model.clone(), 200);
@@ -538,7 +567,8 @@ Based on your analysis, provide your response in the following JSON formats if y
             ]
         }
         "#;
-        let llm_routes = serde_json::from_str::<HashMap<String, Vec<RoutingPreference>>>(routes_str).unwrap();
+        let llm_routes =
+            serde_json::from_str::<HashMap<String, Vec<RoutingPreference>>>(routes_str).unwrap();
         let routing_model = "test-model".to_string();
         let router = RouterModelV1::new(llm_routes, routing_model.clone(), 230);
 
@@ -604,7 +634,8 @@ Based on your analysis, provide your response in the following JSON formats if y
             ]
         }
         "#;
-        let llm_routes = serde_json::from_str::<HashMap<String, Vec<RoutingPreference>>>(routes_str).unwrap();
+        let llm_routes =
+            serde_json::from_str::<HashMap<String, Vec<RoutingPreference>>>(routes_str).unwrap();
         let routing_model = "test-model".to_string();
         let router = RouterModelV1::new(llm_routes, routing_model.clone(), usize::MAX);
 
@@ -672,7 +703,8 @@ Based on your analysis, provide your response in the following JSON formats if y
             ]
         }
         "#;
-        let llm_routes = serde_json::from_str::<HashMap<String, Vec<RoutingPreference>>>(routes_str).unwrap();
+        let llm_routes =
+            serde_json::from_str::<HashMap<String, Vec<RoutingPreference>>>(routes_str).unwrap();
         let routing_model = "test-model".to_string();
         let router = RouterModelV1::new(llm_routes, routing_model.clone(), usize::MAX);
 
@@ -747,14 +779,18 @@ Based on your analysis, provide your response in the following JSON formats if y
             ]
         }
         "#;
-        let llm_routes = serde_json::from_str::<HashMap<String, Vec<RoutingPreference>>>(routes_str).unwrap();
+        let llm_routes =
+            serde_json::from_str::<HashMap<String, Vec<RoutingPreference>>>(routes_str).unwrap();
 
         let router = RouterModelV1::new(llm_routes, "test-model".to_string(), 2000);
 
         // Case 1: Valid JSON with non-empty route
         let input = r#"{"route": "Image generation"}"#;
         let result = router.parse_response(input, &None).unwrap();
-        assert_eq!(result, Some(("Image generation".to_string(), "gpt-4o".to_string())));
+        assert_eq!(
+            result,
+            Some(("Image generation".to_string(), "gpt-4o".to_string()))
+        );
 
         // Case 2: Valid JSON with empty route
         let input = r#"{"route": ""}"#;
@@ -784,11 +820,17 @@ Based on your analysis, provide your response in the following JSON formats if y
         // Case 6: Single quotes and \n in JSON
         let input = "{'route': 'Image generation'}\\n";
         let result = router.parse_response(input, &None).unwrap();
-        assert_eq!(result, Some(("Image generation".to_string(), "gpt-4o".to_string())));
+        assert_eq!(
+            result,
+            Some(("Image generation".to_string(), "gpt-4o".to_string()))
+        );
 
         // Case 7: Code block marker
         let input = "```json\n{\"route\": \"Image generation\"}\n```";
         let result = router.parse_response(input, &None).unwrap();
-        assert_eq!(result, Some(("Image generation".to_string(), "gpt-4o".to_string())));
+        assert_eq!(
+            result,
+            Some(("Image generation".to_string(), "gpt-4o".to_string()))
+        );
     }
 }
diff --git a/crates/common/src/configuration.rs b/crates/common/src/configuration.rs
index 0693c09b..186691dc 100644
--- a/crates/common/src/configuration.rs
+++ b/crates/common/src/configuration.rs
@@ -1,6 +1,5 @@
 use hermesllm::providers::openai::types::{ModelDetail, ModelObject, Models};
 use serde::{Deserialize, Serialize};
-use serde_with::skip_serializing_none;
 use std::collections::HashMap;
 use std::fmt::Display;
 
@@ -178,12 +177,10 @@ impl Display for LlmProviderType {
     }
 }
 
-#[skip_serializing_none]
 #[derive(Serialize, Deserialize, Debug)]
 pub struct ModelUsagePreference {
-    pub name: String,
     pub model: String,
-    pub usage: Option<String>,
+    pub routing_preferences: Vec<RoutingPreference>,
 }
 
 #[derive(Debug, Clone, Serialize, Deserialize)]

From 92a425facd2dc8d4f4c63f7165a31f167d151cde Mon Sep 17 00:00:00 2001
From: Adil Hafeez <adil@katanemo.com>
Date: Tue, 22 Jul 2025 12:48:20 -0700
Subject: [PATCH 3/7] release 0.3.6 (#536)

---
 .github/workflows/e2e_archgw.yml                     |  2 +-
 .github/workflows/e2e_test_currency_convert.yml      |  2 +-
 .../workflows/e2e_test_preference_based_routing.yml  |  2 +-
 .github/workflows/validate_arch_config.yml           |  2 +-
 README.md                                            |  4 ++--
 arch/tools/README.md                                 |  2 +-
 arch/tools/cli/consts.py                             |  2 +-
 arch/tools/poetry.lock                               |  4 ++--
 arch/tools/pyproject.toml                            |  4 ++--
 demos/use_cases/preference_based_routing/README.md   |  4 ++--
 docs/source/conf.py                                  |  2 +-
 docs/source/get_started/quickstart.rst               |  2 +-
 model_server/poetry.lock                             | 12 ++++++------
 model_server/pyproject.toml                          |  2 +-
 14 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/.github/workflows/e2e_archgw.yml b/.github/workflows/e2e_archgw.yml
index 633a32d8..dccb196e 100644
--- a/.github/workflows/e2e_archgw.yml
+++ b/.github/workflows/e2e_archgw.yml
@@ -24,7 +24,7 @@ jobs:
 
       - name: build arch docker image
         run: |
-          cd ../../ && docker build -f arch/Dockerfile . -t katanemo/archgw -t katanemo/archgw:0.3.5 -t katanemo/archgw:latest
+          cd ../../ && docker build -f arch/Dockerfile . -t katanemo/archgw -t katanemo/archgw:0.3.6 -t katanemo/archgw:latest
 
       - name: start archgw
         env:
diff --git a/.github/workflows/e2e_test_currency_convert.yml b/.github/workflows/e2e_test_currency_convert.yml
index de29ed72..3e0500f6 100644
--- a/.github/workflows/e2e_test_currency_convert.yml
+++ b/.github/workflows/e2e_test_currency_convert.yml
@@ -24,7 +24,7 @@ jobs:
 
       - name: build arch docker image
         run: |
-          docker build  -f arch/Dockerfile . -t katanemo/archgw -t katanemo/archgw:0.3.5
+          docker build  -f arch/Dockerfile . -t katanemo/archgw -t katanemo/archgw:0.3.6
 
       - name: install poetry
         run: |
diff --git a/.github/workflows/e2e_test_preference_based_routing.yml b/.github/workflows/e2e_test_preference_based_routing.yml
index c1ab7050..3719449a 100644
--- a/.github/workflows/e2e_test_preference_based_routing.yml
+++ b/.github/workflows/e2e_test_preference_based_routing.yml
@@ -24,7 +24,7 @@ jobs:
 
       - name: build arch docker image
         run: |
-          docker build  -f arch/Dockerfile . -t katanemo/archgw -t katanemo/archgw:0.3.5
+          docker build  -f arch/Dockerfile . -t katanemo/archgw -t katanemo/archgw:0.3.6
 
       - name: install poetry
         run: |
diff --git a/.github/workflows/validate_arch_config.yml b/.github/workflows/validate_arch_config.yml
index c3dbfeb3..e948e922 100644
--- a/.github/workflows/validate_arch_config.yml
+++ b/.github/workflows/validate_arch_config.yml
@@ -24,7 +24,7 @@ jobs:
 
       - name: build arch docker image
         run: |
-          docker build  -f arch/Dockerfile . -t katanemo/archgw -t katanemo/archgw:0.3.5
+          docker build  -f arch/Dockerfile . -t katanemo/archgw -t katanemo/archgw:0.3.6
 
       - name: validate arch config
         run: |
diff --git a/README.md b/README.md
index 3f4199a2..4848e9f8 100644
--- a/README.md
+++ b/README.md
@@ -82,7 +82,7 @@ Arch's CLI allows you to manage and interact with the Arch gateway efficiently.
 ```console
 $ python3.12 -m venv venv
 $ source venv/bin/activate   # On Windows, use: venv\Scripts\activate
-$ pip install archgw==0.3.5
+$ pip install archgw==0.3.6
 ```
 
 ### Build Agentic Apps with Arch Gateway
@@ -148,7 +148,7 @@ endpoints:
 ```sh
 
 $ archgw up arch_config.yaml
-2024-12-05 16:56:27,979 - cli.main - INFO - Starting archgw cli version: 0.3.5
+2024-12-05 16:56:27,979 - cli.main - INFO - Starting archgw cli version: 0.3.6
 2024-12-05 16:56:28,485 - cli.utils - INFO - Schema validation successful!
 2024-12-05 16:56:28,485 - cli.main - INFO - Starting arch model server and arch gateway
 2024-12-05 16:56:51,647 - cli.core - INFO - Container is healthy!
diff --git a/arch/tools/README.md b/arch/tools/README.md
index e4e45284..002e8939 100644
--- a/arch/tools/README.md
+++ b/arch/tools/README.md
@@ -19,7 +19,7 @@ source venv/bin/activate
 
 ### Step 3: Run the build script
 ```bash
-pip install archgw==0.3.5
+pip install archgw==0.3.6
 ```
 
 ## Uninstall Instructions: archgw CLI
diff --git a/arch/tools/cli/consts.py b/arch/tools/cli/consts.py
index 9114f53f..db15afb3 100644
--- a/arch/tools/cli/consts.py
+++ b/arch/tools/cli/consts.py
@@ -10,4 +10,4 @@ SERVICE_NAME_MODEL_SERVER = "model_server"
 SERVICE_ALL = "all"
 MODEL_SERVER_LOG_FILE = "~/archgw_logs/modelserver.log"
 ARCHGW_DOCKER_NAME = "archgw"
-ARCHGW_DOCKER_IMAGE = os.getenv("ARCHGW_DOCKER_IMAGE", "katanemo/archgw:0.3.5")
+ARCHGW_DOCKER_IMAGE = os.getenv("ARCHGW_DOCKER_IMAGE", "katanemo/archgw:0.3.6")
diff --git a/arch/tools/poetry.lock b/arch/tools/poetry.lock
index 239f825e..7b078369 100644
--- a/arch/tools/poetry.lock
+++ b/arch/tools/poetry.lock
@@ -2,7 +2,7 @@
 
 [[package]]
 name = "archgw_modelserver"
-version = "0.3.5"
+version = "0.3.6"
 description = "A model server for serving models"
 optional = false
 python-versions = "*"
@@ -576,4 +576,4 @@ files = [
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.10"
-content-hash = "e86085ff732244cb68d2e3f7f4c2903f4a8a50cc7e0963324c2506f0de90df11"
+content-hash = "d66f922881475b01508e74b362e554f8aa667c805b038d62311c2d99eaaef08d"
diff --git a/arch/tools/pyproject.toml b/arch/tools/pyproject.toml
index cf75165c..0d903608 100644
--- a/arch/tools/pyproject.toml
+++ b/arch/tools/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "archgw"
-version = "0.3.5"
+version = "0.3.6"
 description = "Python-based CLI tool to manage Arch Gateway."
 authors = ["Katanemo Labs, Inc."]
 packages = [
@@ -10,7 +10,7 @@ readme = "README.md"
 
 [tool.poetry.dependencies]
 python = "^3.10"
-archgw_modelserver = "^0.3.5"
+archgw_modelserver = "^0.3.6"
 click = "^8.1.7"
 jinja2 = "^3.1.4"
 jsonschema = "^4.23.0"
diff --git a/demos/use_cases/preference_based_routing/README.md b/demos/use_cases/preference_based_routing/README.md
index 7b988bcc..3edd6854 100644
--- a/demos/use_cases/preference_based_routing/README.md
+++ b/demos/use_cases/preference_based_routing/README.md
@@ -14,9 +14,9 @@ Make sure your machine is up to date with [latest version of archgw]([url](https
 2. start archgw in the foreground
 ```bash
 (venv) $ archgw up --service archgw --foreground
-2025-05-30 18:00:09,953 - cli.main - INFO - Starting archgw cli version: 0.3.5
+2025-05-30 18:00:09,953 - cli.main - INFO - Starting archgw cli version: 0.3.6
 2025-05-30 18:00:09,953 - cli.main - INFO - Validating /Users/adilhafeez/src/intelligent-prompt-gateway/demos/use_cases/preference_based_routing/arch_config.yaml
-2025-05-30 18:00:10,422 - cli.core - INFO - Starting arch gateway, image name: archgw, tag: katanemo/archgw:0.3.5
+2025-05-30 18:00:10,422 - cli.core - INFO - Starting arch gateway, image name: archgw, tag: katanemo/archgw:0.3.6
 2025-05-30 18:00:10,662 - cli.core - INFO - archgw status: running, health status: starting
 2025-05-30 18:00:11,712 - cli.core - INFO - archgw status: running, health status: starting
 2025-05-30 18:00:12,761 - cli.core - INFO - archgw is running and is healthy!
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 1f78ac50..449d136a 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -15,7 +15,7 @@ from sphinxawesome_theme.postprocess import Icons
 project = "Arch Docs"
 copyright = "2025, Katanemo Labs, Inc"
 author = "Katanemo Labs, Inc"
-release = " v0.3.5"
+release = " v0.3.6"
 
 # -- General configuration ---------------------------------------------------
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
diff --git a/docs/source/get_started/quickstart.rst b/docs/source/get_started/quickstart.rst
index 67337e5b..7cd8308d 100644
--- a/docs/source/get_started/quickstart.rst
+++ b/docs/source/get_started/quickstart.rst
@@ -25,7 +25,7 @@ Arch's CLI allows you to manage and interact with the Arch gateway efficiently.
 
    $ python -m venv venv
    $ source venv/bin/activate   # On Windows, use: venv\Scripts\activate
-   $ pip install archgw==0.3.5
+   $ pip install archgw==0.3.6
 
 
 Build AI Agent with Arch Gateway
diff --git a/model_server/poetry.lock b/model_server/poetry.lock
index 01e10d8c..ae4ebdfb 100644
--- a/model_server/poetry.lock
+++ b/model_server/poetry.lock
@@ -1034,13 +1034,13 @@ files = [
 
 [[package]]
 name = "openai"
-version = "1.97.0"
+version = "1.97.1"
 description = "The official Python library for the openai API"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "openai-1.97.0-py3-none-any.whl", hash = "sha256:a1c24d96f4609f3f7f51c9e1c2606d97cc6e334833438659cfd687e9c972c610"},
-    {file = "openai-1.97.0.tar.gz", hash = "sha256:0be349569ccaa4fb54f97bb808423fd29ccaeb1246ee1be762e0c81a47bae0aa"},
+    {file = "openai-1.97.1-py3-none-any.whl", hash = "sha256:4e96bbdf672ec3d44968c9ea39d2c375891db1acc1794668d8149d5fa6000606"},
+    {file = "openai-1.97.1.tar.gz", hash = "sha256:a744b27ae624e3d4135225da9b1c89c107a2a7e5bc4c93e5b7b5214772ce7a4e"},
 ]
 
 [package.dependencies]
@@ -2031,13 +2031,13 @@ telegram = ["requests"]
 
 [[package]]
 name = "transformers"
-version = "4.53.2"
+version = "4.53.3"
 description = "State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow"
 optional = false
 python-versions = ">=3.9.0"
 files = [
-    {file = "transformers-4.53.2-py3-none-any.whl", hash = "sha256:db8f4819bb34f000029c73c3c557e7d06fc1b8e612ec142eecdae3947a9c78bf"},
-    {file = "transformers-4.53.2.tar.gz", hash = "sha256:6c3ed95edfb1cba71c4245758f1b4878c93bf8cde77d076307dacb2cbbd72be2"},
+    {file = "transformers-4.53.3-py3-none-any.whl", hash = "sha256:5aba81c92095806b6baf12df35d756cf23b66c356975fb2a7fa9e536138d7c75"},
+    {file = "transformers-4.53.3.tar.gz", hash = "sha256:b2eda1a261de79b78b97f7888fe2005fc0c3fabf5dad33d52cc02983f9f675d8"},
 ]
 
 [package.dependencies]
diff --git a/model_server/pyproject.toml b/model_server/pyproject.toml
index f85016e6..c36f1033 100644
--- a/model_server/pyproject.toml
+++ b/model_server/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "archgw_modelserver"
-version = "0.3.5"
+version = "0.3.6"
 description = "A model server for serving models"
 authors = ["Katanemo Labs, Inc <info@katanemo.com>"]
 license = "Apache 2.0"

From 04c7e5a17533f90f9d88fc8c175ce7d4f071ff36 Mon Sep 17 00:00:00 2001
From: Adil Hafeez <adil@katanemo.com>
Date: Fri, 25 Jul 2025 01:22:06 -0700
Subject: [PATCH 4/7] bug fix - allow image content to pass through (#539)

fixes https://github.com/katanemo/archgw/issues/535
---
 .../src/handlers/chat_completions.rs          |  2 +
 .../hermesllm/src/providers/openai/types.rs   | 64 +++++++++++++++++++
 demos/use_cases/llm_routing/arch_config.yaml  |  3 +
 3 files changed, 69 insertions(+)

diff --git a/crates/brightstaff/src/handlers/chat_completions.rs b/crates/brightstaff/src/handlers/chat_completions.rs
index bd5cab79..37da961f 100644
--- a/crates/brightstaff/src/handlers/chat_completions.rs
+++ b/crates/brightstaff/src/handlers/chat_completions.rs
@@ -32,6 +32,8 @@ pub async fn chat_completions(
 
     let chat_request_bytes = request.collect().await?.to_bytes();
 
+    debug!("Received request body (raw utf8): {}", String::from_utf8_lossy(&chat_request_bytes));
+
     let chat_request_parsed = serde_json::from_slice::<serde_json::Value>(&chat_request_bytes)
         .inspect_err(|err| {
             warn!(
diff --git a/crates/hermesllm/src/providers/openai/types.rs b/crates/hermesllm/src/providers/openai/types.rs
index d1c4430c..7dea64df 100644
--- a/crates/hermesllm/src/providers/openai/types.rs
+++ b/crates/hermesllm/src/providers/openai/types.rs
@@ -35,9 +35,16 @@ pub enum MultiPartContentType {
     ImageUrl,
 }
 
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
+pub struct ImageUrl {
+    pub url: String,
+}
+
+#[skip_serializing_none]
 #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
 pub struct MultiPartContent {
     pub text: Option<String>,
+    pub image_url: Option<ImageUrl>,
     #[serde(rename = "type")]
     pub content_type: MultiPartContentType,
 }
@@ -307,10 +314,12 @@ mod tests {
             MultiPartContent {
                 text: Some("This is a text part.".to_string()),
                 content_type: MultiPartContentType::Text,
+                image_url: None,
             },
             MultiPartContent {
                 text: Some("https://example.com/image.png".to_string()),
                 content_type: MultiPartContentType::ImageUrl,
+                image_url: None,
             },
         ]);
         assert_eq!(multi_part_content.to_string(), "This is a text part.");
@@ -364,6 +373,61 @@ mod tests {
         }
     }
 
+    #[test]
+    fn test_chat_completions_request_image_content() {
+        const CHAT_COMPLETIONS_REQUEST: &str = r#"
+            {
+              "stream": true,
+              "model": "openai/gpt-4o",
+              "messages": [
+                {
+                  "role": "user",
+                  "content": [
+                    {
+                      "type": "text",
+                      "text": "describe this photo pls"
+                    },
+                    {
+                      "type": "image_url",
+                      "image_url": {
+                        "url": "data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQABAAD/...=="
+                      }
+                    }
+                  ]
+                }
+              ]
+            }"#;
+
+        let chat_completions_request: ChatCompletionsRequest =
+            serde_json::from_str(CHAT_COMPLETIONS_REQUEST).unwrap();
+        assert_eq!(chat_completions_request.model, "openai/gpt-4o");
+        if let Some(ContentType::MultiPart(multi_part_content)) =
+            chat_completions_request.messages[0].content.as_ref()
+        {
+            assert_eq!(multi_part_content.len(), 2);
+            assert_eq!(
+                multi_part_content[0].content_type,
+                MultiPartContentType::Text
+            );
+            assert_eq!(
+                multi_part_content[0].text,
+                Some("describe this photo pls".to_string())
+            );
+            assert_eq!(
+                multi_part_content[1].content_type,
+                MultiPartContentType::ImageUrl
+            );
+            assert_eq!(
+                multi_part_content[1].image_url,
+                Some(ImageUrl {
+                    url: "data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQABAAD/...==".to_string(),
+                })
+            );
+        } else {
+            panic!("Expected MultiPartContent");
+        }
+    }
+
     #[test]
     fn test_sse_streaming() {
         let json_data = r#"data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1700000000,"model":"gpt-3.5-turbo","choices":[{"index":0,"delta":{"role":"assistant"},"finish_reason":null}]}
diff --git a/demos/use_cases/llm_routing/arch_config.yaml b/demos/use_cases/llm_routing/arch_config.yaml
index cb3a42e6..addaae66 100644
--- a/demos/use_cases/llm_routing/arch_config.yaml
+++ b/demos/use_cases/llm_routing/arch_config.yaml
@@ -12,6 +12,9 @@ llm_providers:
   - access_key: $OPENAI_API_KEY
     model: openai/gpt-4o-mini
 
+  - access_key: $OPENAI_API_KEY
+    model: openai/gpt-4.1
+
   - access_key: $OPENAI_API_KEY
     model: openai/gpt-4o
     default: true

From ac3fb4cb5babcc6e9a253d57a8f74c4301ae3af0 Mon Sep 17 00:00:00 2001
From: Adil Hafeez <adil@katanemo.com>
Date: Fri, 25 Jul 2025 19:08:24 -0700
Subject: [PATCH 5/7] release 0.3.7 (#542)

---
 .github/workflows/e2e_archgw.yml              |   2 +-
 .../workflows/e2e_test_currency_convert.yml   |   2 +-
 .../e2e_test_preference_based_routing.yml     |   2 +-
 .github/workflows/validate_arch_config.yml    |   2 +-
 README.md                                     |   4 +-
 arch/supervisord.conf                         |   4 +-
 arch/tools/README.md                          |   2 +-
 arch/tools/cli/consts.py                      |   2 +-
 arch/tools/poetry.lock                        |   4 +-
 arch/tools/pyproject.toml                     |   4 +-
 .../preference_based_routing/README.md        |   4 +-
 docs/source/conf.py                           |   2 +-
 docs/source/get_started/quickstart.rst        |   2 +-
 model_server/poetry.lock                      | 151 +++++++++---------
 model_server/pyproject.toml                   |   2 +-
 15 files changed, 95 insertions(+), 94 deletions(-)

diff --git a/.github/workflows/e2e_archgw.yml b/.github/workflows/e2e_archgw.yml
index dccb196e..4c3bade3 100644
--- a/.github/workflows/e2e_archgw.yml
+++ b/.github/workflows/e2e_archgw.yml
@@ -24,7 +24,7 @@ jobs:
 
       - name: build arch docker image
         run: |
-          cd ../../ && docker build -f arch/Dockerfile . -t katanemo/archgw -t katanemo/archgw:0.3.6 -t katanemo/archgw:latest
+          cd ../../ && docker build -f arch/Dockerfile . -t katanemo/archgw -t katanemo/archgw:0.3.7 -t katanemo/archgw:latest
 
       - name: start archgw
         env:
diff --git a/.github/workflows/e2e_test_currency_convert.yml b/.github/workflows/e2e_test_currency_convert.yml
index 3e0500f6..352245f0 100644
--- a/.github/workflows/e2e_test_currency_convert.yml
+++ b/.github/workflows/e2e_test_currency_convert.yml
@@ -24,7 +24,7 @@ jobs:
 
       - name: build arch docker image
         run: |
-          docker build  -f arch/Dockerfile . -t katanemo/archgw -t katanemo/archgw:0.3.6
+          docker build  -f arch/Dockerfile . -t katanemo/archgw -t katanemo/archgw:0.3.7
 
       - name: install poetry
         run: |
diff --git a/.github/workflows/e2e_test_preference_based_routing.yml b/.github/workflows/e2e_test_preference_based_routing.yml
index 3719449a..db8cb3d5 100644
--- a/.github/workflows/e2e_test_preference_based_routing.yml
+++ b/.github/workflows/e2e_test_preference_based_routing.yml
@@ -24,7 +24,7 @@ jobs:
 
       - name: build arch docker image
         run: |
-          docker build  -f arch/Dockerfile . -t katanemo/archgw -t katanemo/archgw:0.3.6
+          docker build  -f arch/Dockerfile . -t katanemo/archgw -t katanemo/archgw:0.3.7
 
       - name: install poetry
         run: |
diff --git a/.github/workflows/validate_arch_config.yml b/.github/workflows/validate_arch_config.yml
index e948e922..901600ab 100644
--- a/.github/workflows/validate_arch_config.yml
+++ b/.github/workflows/validate_arch_config.yml
@@ -24,7 +24,7 @@ jobs:
 
       - name: build arch docker image
         run: |
-          docker build  -f arch/Dockerfile . -t katanemo/archgw -t katanemo/archgw:0.3.6
+          docker build  -f arch/Dockerfile . -t katanemo/archgw -t katanemo/archgw:0.3.7
 
       - name: validate arch config
         run: |
diff --git a/README.md b/README.md
index 4848e9f8..f7481ee5 100644
--- a/README.md
+++ b/README.md
@@ -82,7 +82,7 @@ Arch's CLI allows you to manage and interact with the Arch gateway efficiently.
 ```console
 $ python3.12 -m venv venv
 $ source venv/bin/activate   # On Windows, use: venv\Scripts\activate
-$ pip install archgw==0.3.6
+$ pip install archgw==0.3.7
 ```
 
 ### Build Agentic Apps with Arch Gateway
@@ -148,7 +148,7 @@ endpoints:
 ```sh
 
 $ archgw up arch_config.yaml
-2024-12-05 16:56:27,979 - cli.main - INFO - Starting archgw cli version: 0.3.6
+2024-12-05 16:56:27,979 - cli.main - INFO - Starting archgw cli version: 0.3.7
 2024-12-05 16:56:28,485 - cli.utils - INFO - Schema validation successful!
 2024-12-05 16:56:28,485 - cli.main - INFO - Starting arch model server and arch gateway
 2024-12-05 16:56:51,647 - cli.core - INFO - Container is healthy!
diff --git a/arch/supervisord.conf b/arch/supervisord.conf
index dfb4d0d2..bec147cc 100644
--- a/arch/supervisord.conf
+++ b/arch/supervisord.conf
@@ -2,14 +2,14 @@
 nodaemon=true
 
 [program:brightstaff]
-command=sh -c "RUST_LOG=info /app/brightstaff 2>&1 | tee /var/log/brightstaff.log"
+command=sh -c "RUST_LOG=debug /app/brightstaff 2>&1 | tee /var/log/brightstaff.log"
 stdout_logfile=/dev/stdout
 redirect_stderr=true
 stdout_logfile_maxbytes=0
 stderr_logfile_maxbytes=0
 
 [program:envoy]
-command=/bin/sh -c "python /app/config_generator.py && envsubst < /etc/envoy/envoy.yaml > /etc/envoy.env_sub.yaml && envoy -c /etc/envoy.env_sub.yaml --component-log-level wasm:info 2>&1 | tee /var/log//envoy.log"
+command=/bin/sh -c "python /app/config_generator.py && envsubst < /etc/envoy/envoy.yaml > /etc/envoy.env_sub.yaml && envoy -c /etc/envoy.env_sub.yaml --component-log-level wasm:debug 2>&1 | tee /var/log//envoy.log"
 stdout_logfile=/dev/stdout
 redirect_stderr=true
 stdout_logfile_maxbytes=0
diff --git a/arch/tools/README.md b/arch/tools/README.md
index 002e8939..7a33998e 100644
--- a/arch/tools/README.md
+++ b/arch/tools/README.md
@@ -19,7 +19,7 @@ source venv/bin/activate
 
 ### Step 3: Run the build script
 ```bash
-pip install archgw==0.3.6
+pip install archgw==0.3.7
 ```
 
 ## Uninstall Instructions: archgw CLI
diff --git a/arch/tools/cli/consts.py b/arch/tools/cli/consts.py
index db15afb3..213f0134 100644
--- a/arch/tools/cli/consts.py
+++ b/arch/tools/cli/consts.py
@@ -10,4 +10,4 @@ SERVICE_NAME_MODEL_SERVER = "model_server"
 SERVICE_ALL = "all"
 MODEL_SERVER_LOG_FILE = "~/archgw_logs/modelserver.log"
 ARCHGW_DOCKER_NAME = "archgw"
-ARCHGW_DOCKER_IMAGE = os.getenv("ARCHGW_DOCKER_IMAGE", "katanemo/archgw:0.3.6")
+ARCHGW_DOCKER_IMAGE = os.getenv("ARCHGW_DOCKER_IMAGE", "katanemo/archgw:0.3.7")
diff --git a/arch/tools/poetry.lock b/arch/tools/poetry.lock
index 7b078369..a8d5e85f 100644
--- a/arch/tools/poetry.lock
+++ b/arch/tools/poetry.lock
@@ -2,7 +2,7 @@
 
 [[package]]
 name = "archgw_modelserver"
-version = "0.3.6"
+version = "0.3.7"
 description = "A model server for serving models"
 optional = false
 python-versions = "*"
@@ -576,4 +576,4 @@ files = [
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.10"
-content-hash = "d66f922881475b01508e74b362e554f8aa667c805b038d62311c2d99eaaef08d"
+content-hash = "1875c613e62e116d557ad2d30491891557b4114a99c7c65b22b26d690e9e268b"
diff --git a/arch/tools/pyproject.toml b/arch/tools/pyproject.toml
index 0d903608..c62b8656 100644
--- a/arch/tools/pyproject.toml
+++ b/arch/tools/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "archgw"
-version = "0.3.6"
+version = "0.3.7"
 description = "Python-based CLI tool to manage Arch Gateway."
 authors = ["Katanemo Labs, Inc."]
 packages = [
@@ -10,7 +10,7 @@ readme = "README.md"
 
 [tool.poetry.dependencies]
 python = "^3.10"
-archgw_modelserver = "^0.3.6"
+archgw_modelserver = "^0.3.7"
 click = "^8.1.7"
 jinja2 = "^3.1.4"
 jsonschema = "^4.23.0"
diff --git a/demos/use_cases/preference_based_routing/README.md b/demos/use_cases/preference_based_routing/README.md
index 3edd6854..1ac6b59b 100644
--- a/demos/use_cases/preference_based_routing/README.md
+++ b/demos/use_cases/preference_based_routing/README.md
@@ -14,9 +14,9 @@ Make sure your machine is up to date with [latest version of archgw]([url](https
 2. start archgw in the foreground
 ```bash
 (venv) $ archgw up --service archgw --foreground
-2025-05-30 18:00:09,953 - cli.main - INFO - Starting archgw cli version: 0.3.6
+2025-05-30 18:00:09,953 - cli.main - INFO - Starting archgw cli version: 0.3.7
 2025-05-30 18:00:09,953 - cli.main - INFO - Validating /Users/adilhafeez/src/intelligent-prompt-gateway/demos/use_cases/preference_based_routing/arch_config.yaml
-2025-05-30 18:00:10,422 - cli.core - INFO - Starting arch gateway, image name: archgw, tag: katanemo/archgw:0.3.6
+2025-05-30 18:00:10,422 - cli.core - INFO - Starting arch gateway, image name: archgw, tag: katanemo/archgw:0.3.7
 2025-05-30 18:00:10,662 - cli.core - INFO - archgw status: running, health status: starting
 2025-05-30 18:00:11,712 - cli.core - INFO - archgw status: running, health status: starting
 2025-05-30 18:00:12,761 - cli.core - INFO - archgw is running and is healthy!
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 449d136a..4e2fd142 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -15,7 +15,7 @@ from sphinxawesome_theme.postprocess import Icons
 project = "Arch Docs"
 copyright = "2025, Katanemo Labs, Inc"
 author = "Katanemo Labs, Inc"
-release = " v0.3.6"
+release = " v0.3.7"
 
 # -- General configuration ---------------------------------------------------
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
diff --git a/docs/source/get_started/quickstart.rst b/docs/source/get_started/quickstart.rst
index 7cd8308d..7027c67a 100644
--- a/docs/source/get_started/quickstart.rst
+++ b/docs/source/get_started/quickstart.rst
@@ -25,7 +25,7 @@ Arch's CLI allows you to manage and interact with the Arch gateway efficiently.
 
    $ python -m venv venv
    $ source venv/bin/activate   # On Windows, use: venv\Scripts\activate
-   $ pip install archgw==0.3.6
+   $ pip install archgw==0.3.7
 
 
 Build AI Agent with Arch Gateway
diff --git a/model_server/poetry.lock b/model_server/poetry.lock
index ae4ebdfb..26e65aed 100644
--- a/model_server/poetry.lock
+++ b/model_server/poetry.lock
@@ -391,66 +391,66 @@ grpc = ["grpcio (>=1.44.0,<2.0.0)"]
 
 [[package]]
 name = "grpcio"
-version = "1.73.1"
+version = "1.74.0"
 description = "HTTP/2-based RPC framework"
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "grpcio-1.73.1-cp310-cp310-linux_armv7l.whl", hash = "sha256:2d70f4ddd0a823436c2624640570ed6097e40935c9194482475fe8e3d9754d55"},
-    {file = "grpcio-1.73.1-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:3841a8a5a66830261ab6a3c2a3dc539ed84e4ab019165f77b3eeb9f0ba621f26"},
-    {file = "grpcio-1.73.1-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:628c30f8e77e0258ab788750ec92059fc3d6628590fb4b7cea8c102503623ed7"},
-    {file = "grpcio-1.73.1-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:67a0468256c9db6d5ecb1fde4bf409d016f42cef649323f0a08a72f352d1358b"},
-    {file = "grpcio-1.73.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:68b84d65bbdebd5926eb5c53b0b9ec3b3f83408a30e4c20c373c5337b4219ec5"},
-    {file = "grpcio-1.73.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:c54796ca22b8349cc594d18b01099e39f2b7ffb586ad83217655781a350ce4da"},
-    {file = "grpcio-1.73.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:75fc8e543962ece2f7ecd32ada2d44c0c8570ae73ec92869f9af8b944863116d"},
-    {file = "grpcio-1.73.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:6a6037891cd2b1dd1406b388660522e1565ed340b1fea2955b0234bdd941a862"},
-    {file = "grpcio-1.73.1-cp310-cp310-win32.whl", hash = "sha256:cce7265b9617168c2d08ae570fcc2af4eaf72e84f8c710ca657cc546115263af"},
-    {file = "grpcio-1.73.1-cp310-cp310-win_amd64.whl", hash = "sha256:6a2b372e65fad38842050943f42ce8fee00c6f2e8ea4f7754ba7478d26a356ee"},
-    {file = "grpcio-1.73.1-cp311-cp311-linux_armv7l.whl", hash = "sha256:ba2cea9f7ae4bc21f42015f0ec98f69ae4179848ad744b210e7685112fa507a1"},
-    {file = "grpcio-1.73.1-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:d74c3f4f37b79e746271aa6cdb3a1d7e4432aea38735542b23adcabaaee0c097"},
-    {file = "grpcio-1.73.1-cp311-cp311-manylinux_2_17_aarch64.whl", hash = "sha256:5b9b1805a7d61c9e90541cbe8dfe0a593dfc8c5c3a43fe623701b6a01b01d710"},
-    {file = "grpcio-1.73.1-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b3215f69a0670a8cfa2ab53236d9e8026bfb7ead5d4baabe7d7dc11d30fda967"},
-    {file = "grpcio-1.73.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc5eccfd9577a5dc7d5612b2ba90cca4ad14c6d949216c68585fdec9848befb1"},
-    {file = "grpcio-1.73.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:dc7d7fd520614fce2e6455ba89791458020a39716951c7c07694f9dbae28e9c0"},
-    {file = "grpcio-1.73.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:105492124828911f85127e4825d1c1234b032cb9d238567876b5515d01151379"},
-    {file = "grpcio-1.73.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:610e19b04f452ba6f402ac9aa94eb3d21fbc94553368008af634812c4a85a99e"},
-    {file = "grpcio-1.73.1-cp311-cp311-win32.whl", hash = "sha256:d60588ab6ba0ac753761ee0e5b30a29398306401bfbceffe7d68ebb21193f9d4"},
-    {file = "grpcio-1.73.1-cp311-cp311-win_amd64.whl", hash = "sha256:6957025a4608bb0a5ff42abd75bfbb2ed99eda29d5992ef31d691ab54b753643"},
-    {file = "grpcio-1.73.1-cp312-cp312-linux_armv7l.whl", hash = "sha256:921b25618b084e75d424a9f8e6403bfeb7abef074bb6c3174701e0f2542debcf"},
-    {file = "grpcio-1.73.1-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:277b426a0ed341e8447fbf6c1d6b68c952adddf585ea4685aa563de0f03df887"},
-    {file = "grpcio-1.73.1-cp312-cp312-manylinux_2_17_aarch64.whl", hash = "sha256:96c112333309493c10e118d92f04594f9055774757f5d101b39f8150f8c25582"},
-    {file = "grpcio-1.73.1-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f48e862aed925ae987eb7084409a80985de75243389dc9d9c271dd711e589918"},
-    {file = "grpcio-1.73.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:83a6c2cce218e28f5040429835fa34a29319071079e3169f9543c3fbeff166d2"},
-    {file = "grpcio-1.73.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:65b0458a10b100d815a8426b1442bd17001fdb77ea13665b2f7dc9e8587fdc6b"},
-    {file = "grpcio-1.73.1-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:0a9f3ea8dce9eae9d7cb36827200133a72b37a63896e0e61a9d5ec7d61a59ab1"},
-    {file = "grpcio-1.73.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:de18769aea47f18e782bf6819a37c1c528914bfd5683b8782b9da356506190c8"},
-    {file = "grpcio-1.73.1-cp312-cp312-win32.whl", hash = "sha256:24e06a5319e33041e322d32c62b1e728f18ab8c9dbc91729a3d9f9e3ed336642"},
-    {file = "grpcio-1.73.1-cp312-cp312-win_amd64.whl", hash = "sha256:303c8135d8ab176f8038c14cc10d698ae1db9c480f2b2823f7a987aa2a4c5646"},
-    {file = "grpcio-1.73.1-cp313-cp313-linux_armv7l.whl", hash = "sha256:b310824ab5092cf74750ebd8a8a8981c1810cb2b363210e70d06ef37ad80d4f9"},
-    {file = "grpcio-1.73.1-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:8f5a6df3fba31a3485096ac85b2e34b9666ffb0590df0cd044f58694e6a1f6b5"},
-    {file = "grpcio-1.73.1-cp313-cp313-manylinux_2_17_aarch64.whl", hash = "sha256:052e28fe9c41357da42250a91926a3e2f74c046575c070b69659467ca5aa976b"},
-    {file = "grpcio-1.73.1-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1c0bf15f629b1497436596b1cbddddfa3234273490229ca29561209778ebe182"},
-    {file = "grpcio-1.73.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0ab860d5bfa788c5a021fba264802e2593688cd965d1374d31d2b1a34cacd854"},
-    {file = "grpcio-1.73.1-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:ad1d958c31cc91ab050bd8a91355480b8e0683e21176522bacea225ce51163f2"},
-    {file = "grpcio-1.73.1-cp313-cp313-musllinux_1_1_i686.whl", hash = "sha256:f43ffb3bd415c57224c7427bfb9e6c46a0b6e998754bfa0d00f408e1873dcbb5"},
-    {file = "grpcio-1.73.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:686231cdd03a8a8055f798b2b54b19428cdf18fa1549bee92249b43607c42668"},
-    {file = "grpcio-1.73.1-cp313-cp313-win32.whl", hash = "sha256:89018866a096e2ce21e05eabed1567479713ebe57b1db7cbb0f1e3b896793ba4"},
-    {file = "grpcio-1.73.1-cp313-cp313-win_amd64.whl", hash = "sha256:4a68f8c9966b94dff693670a5cf2b54888a48a5011c5d9ce2295a1a1465ee84f"},
-    {file = "grpcio-1.73.1-cp39-cp39-linux_armv7l.whl", hash = "sha256:b4adc97d2d7f5c660a5498bda978ebb866066ad10097265a5da0511323ae9f50"},
-    {file = "grpcio-1.73.1-cp39-cp39-macosx_11_0_universal2.whl", hash = "sha256:c45a28a0cfb6ddcc7dc50a29de44ecac53d115c3388b2782404218db51cb2df3"},
-    {file = "grpcio-1.73.1-cp39-cp39-manylinux_2_17_aarch64.whl", hash = "sha256:10af9f2ab98a39f5b6c1896c6fc2036744b5b41d12739d48bed4c3e15b6cf900"},
-    {file = "grpcio-1.73.1-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:45cf17dcce5ebdb7b4fe9e86cb338fa99d7d1bb71defc78228e1ddf8d0de8cbb"},
-    {file = "grpcio-1.73.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1c502c2e950fc7e8bf05c047e8a14522ef7babac59abbfde6dbf46b7a0d9c71e"},
-    {file = "grpcio-1.73.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:6abfc0f9153dc4924536f40336f88bd4fe7bd7494f028675e2e04291b8c2c62a"},
-    {file = "grpcio-1.73.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:ed451a0e39c8e51eb1612b78686839efd1a920666d1666c1adfdb4fd51680c0f"},
-    {file = "grpcio-1.73.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:07f08705a5505c9b5b0cbcbabafb96462b5a15b7236bbf6bbcc6b0b91e1cbd7e"},
-    {file = "grpcio-1.73.1-cp39-cp39-win32.whl", hash = "sha256:ad5c958cc3d98bb9d71714dc69f1c13aaf2f4b53e29d4cc3f1501ef2e4d129b2"},
-    {file = "grpcio-1.73.1-cp39-cp39-win_amd64.whl", hash = "sha256:42f0660bce31b745eb9d23f094a332d31f210dcadd0fc8e5be7e4c62a87ce86b"},
-    {file = "grpcio-1.73.1.tar.gz", hash = "sha256:7fce2cd1c0c1116cf3850564ebfc3264fba75d3c74a7414373f1238ea365ef87"},
+    {file = "grpcio-1.74.0-cp310-cp310-linux_armv7l.whl", hash = "sha256:85bd5cdf4ed7b2d6438871adf6afff9af7096486fcf51818a81b77ef4dd30907"},
+    {file = "grpcio-1.74.0-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:68c8ebcca945efff9d86d8d6d7bfb0841cf0071024417e2d7f45c5e46b5b08eb"},
+    {file = "grpcio-1.74.0-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:e154d230dc1bbbd78ad2fdc3039fa50ad7ffcf438e4eb2fa30bce223a70c7486"},
+    {file = "grpcio-1.74.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e8978003816c7b9eabe217f88c78bc26adc8f9304bf6a594b02e5a49b2ef9c11"},
+    {file = "grpcio-1.74.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c3d7bd6e3929fd2ea7fbc3f562e4987229ead70c9ae5f01501a46701e08f1ad9"},
+    {file = "grpcio-1.74.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:136b53c91ac1d02c8c24201bfdeb56f8b3ac3278668cbb8e0ba49c88069e1bdc"},
+    {file = "grpcio-1.74.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:fe0f540750a13fd8e5da4b3eaba91a785eea8dca5ccd2bc2ffe978caa403090e"},
+    {file = "grpcio-1.74.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:4e4181bfc24413d1e3a37a0b7889bea68d973d4b45dd2bc68bb766c140718f82"},
+    {file = "grpcio-1.74.0-cp310-cp310-win32.whl", hash = "sha256:1733969040989f7acc3d94c22f55b4a9501a30f6aaacdbccfaba0a3ffb255ab7"},
+    {file = "grpcio-1.74.0-cp310-cp310-win_amd64.whl", hash = "sha256:9e912d3c993a29df6c627459af58975b2e5c897d93287939b9d5065f000249b5"},
+    {file = "grpcio-1.74.0-cp311-cp311-linux_armv7l.whl", hash = "sha256:69e1a8180868a2576f02356565f16635b99088da7df3d45aaa7e24e73a054e31"},
+    {file = "grpcio-1.74.0-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:8efe72fde5500f47aca1ef59495cb59c885afe04ac89dd11d810f2de87d935d4"},
+    {file = "grpcio-1.74.0-cp311-cp311-manylinux_2_17_aarch64.whl", hash = "sha256:a8f0302f9ac4e9923f98d8e243939a6fb627cd048f5cd38595c97e38020dffce"},
+    {file = "grpcio-1.74.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2f609a39f62a6f6f05c7512746798282546358a37ea93c1fcbadf8b2fed162e3"},
+    {file = "grpcio-1.74.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c98e0b7434a7fa4e3e63f250456eaef52499fba5ae661c58cc5b5477d11e7182"},
+    {file = "grpcio-1.74.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:662456c4513e298db6d7bd9c3b8df6f75f8752f0ba01fb653e252ed4a59b5a5d"},
+    {file = "grpcio-1.74.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:3d14e3c4d65e19d8430a4e28ceb71ace4728776fd6c3ce34016947474479683f"},
+    {file = "grpcio-1.74.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:1bf949792cee20d2078323a9b02bacbbae002b9e3b9e2433f2741c15bdeba1c4"},
+    {file = "grpcio-1.74.0-cp311-cp311-win32.whl", hash = "sha256:55b453812fa7c7ce2f5c88be3018fb4a490519b6ce80788d5913f3f9d7da8c7b"},
+    {file = "grpcio-1.74.0-cp311-cp311-win_amd64.whl", hash = "sha256:86ad489db097141a907c559988c29718719aa3e13370d40e20506f11b4de0d11"},
+    {file = "grpcio-1.74.0-cp312-cp312-linux_armv7l.whl", hash = "sha256:8533e6e9c5bd630ca98062e3a1326249e6ada07d05acf191a77bc33f8948f3d8"},
+    {file = "grpcio-1.74.0-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:2918948864fec2a11721d91568effffbe0a02b23ecd57f281391d986847982f6"},
+    {file = "grpcio-1.74.0-cp312-cp312-manylinux_2_17_aarch64.whl", hash = "sha256:60d2d48b0580e70d2e1954d0d19fa3c2e60dd7cbed826aca104fff518310d1c5"},
+    {file = "grpcio-1.74.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3601274bc0523f6dc07666c0e01682c94472402ac2fd1226fd96e079863bfa49"},
+    {file = "grpcio-1.74.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:176d60a5168d7948539def20b2a3adcce67d72454d9ae05969a2e73f3a0feee7"},
+    {file = "grpcio-1.74.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:e759f9e8bc908aaae0412642afe5416c9f983a80499448fcc7fab8692ae044c3"},
+    {file = "grpcio-1.74.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:9e7c4389771855a92934b2846bd807fc25a3dfa820fd912fe6bd8136026b2707"},
+    {file = "grpcio-1.74.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:cce634b10aeab37010449124814b05a62fb5f18928ca878f1bf4750d1f0c815b"},
+    {file = "grpcio-1.74.0-cp312-cp312-win32.whl", hash = "sha256:885912559974df35d92219e2dc98f51a16a48395f37b92865ad45186f294096c"},
+    {file = "grpcio-1.74.0-cp312-cp312-win_amd64.whl", hash = "sha256:42f8fee287427b94be63d916c90399ed310ed10aadbf9e2e5538b3e497d269bc"},
+    {file = "grpcio-1.74.0-cp313-cp313-linux_armv7l.whl", hash = "sha256:2bc2d7d8d184e2362b53905cb1708c84cb16354771c04b490485fa07ce3a1d89"},
+    {file = "grpcio-1.74.0-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:c14e803037e572c177ba54a3e090d6eb12efd795d49327c5ee2b3bddb836bf01"},
+    {file = "grpcio-1.74.0-cp313-cp313-manylinux_2_17_aarch64.whl", hash = "sha256:f6ec94f0e50eb8fa1744a731088b966427575e40c2944a980049798b127a687e"},
+    {file = "grpcio-1.74.0-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:566b9395b90cc3d0d0c6404bc8572c7c18786ede549cdb540ae27b58afe0fb91"},
+    {file = "grpcio-1.74.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e1ea6176d7dfd5b941ea01c2ec34de9531ba494d541fe2057c904e601879f249"},
+    {file = "grpcio-1.74.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:64229c1e9cea079420527fa8ac45d80fc1e8d3f94deaa35643c381fa8d98f362"},
+    {file = "grpcio-1.74.0-cp313-cp313-musllinux_1_1_i686.whl", hash = "sha256:0f87bddd6e27fc776aacf7ebfec367b6d49cad0455123951e4488ea99d9b9b8f"},
+    {file = "grpcio-1.74.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:3b03d8f2a07f0fea8c8f74deb59f8352b770e3900d143b3d1475effcb08eec20"},
+    {file = "grpcio-1.74.0-cp313-cp313-win32.whl", hash = "sha256:b6a73b2ba83e663b2480a90b82fdae6a7aa6427f62bf43b29912c0cfd1aa2bfa"},
+    {file = "grpcio-1.74.0-cp313-cp313-win_amd64.whl", hash = "sha256:fd3c71aeee838299c5887230b8a1822795325ddfea635edd82954c1eaa831e24"},
+    {file = "grpcio-1.74.0-cp39-cp39-linux_armv7l.whl", hash = "sha256:4bc5fca10aaf74779081e16c2bcc3d5ec643ffd528d9e7b1c9039000ead73bae"},
+    {file = "grpcio-1.74.0-cp39-cp39-macosx_11_0_universal2.whl", hash = "sha256:6bab67d15ad617aff094c382c882e0177637da73cbc5532d52c07b4ee887a87b"},
+    {file = "grpcio-1.74.0-cp39-cp39-manylinux_2_17_aarch64.whl", hash = "sha256:655726919b75ab3c34cdad39da5c530ac6fa32696fb23119e36b64adcfca174a"},
+    {file = "grpcio-1.74.0-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1a2b06afe2e50ebfd46247ac3ba60cac523f54ec7792ae9ba6073c12daf26f0a"},
+    {file = "grpcio-1.74.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5f251c355167b2360537cf17bea2cf0197995e551ab9da6a0a59b3da5e8704f9"},
+    {file = "grpcio-1.74.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:8f7b5882fb50632ab1e48cb3122d6df55b9afabc265582808036b6e51b9fd6b7"},
+    {file = "grpcio-1.74.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:834988b6c34515545b3edd13e902c1acdd9f2465d386ea5143fb558f153a7176"},
+    {file = "grpcio-1.74.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:22b834cef33429ca6cc28303c9c327ba9a3fafecbf62fae17e9a7b7163cc43ac"},
+    {file = "grpcio-1.74.0-cp39-cp39-win32.whl", hash = "sha256:7d95d71ff35291bab3f1c52f52f474c632db26ea12700c2ff0ea0532cb0b5854"},
+    {file = "grpcio-1.74.0-cp39-cp39-win_amd64.whl", hash = "sha256:ecde9ab49f58433abe02f9ed076c7b5be839cf0153883a6d23995937a82392fa"},
+    {file = "grpcio-1.74.0.tar.gz", hash = "sha256:80d1f4fbb35b0742d3e3d3bb654b7381cd5f015f8497279a1e9c21ba623e01b1"},
 ]
 
 [package.extras]
-protobuf = ["grpcio-tools (>=1.73.1)"]
+protobuf = ["grpcio-tools (>=1.74.0)"]
 
 [[package]]
 name = "h11"
@@ -531,19 +531,19 @@ zstd = ["zstandard (>=0.18.0)"]
 
 [[package]]
 name = "huggingface-hub"
-version = "0.33.4"
+version = "0.34.1"
 description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
 optional = false
 python-versions = ">=3.8.0"
 files = [
-    {file = "huggingface_hub-0.33.4-py3-none-any.whl", hash = "sha256:09f9f4e7ca62547c70f8b82767eefadd2667f4e116acba2e3e62a5a81815a7bb"},
-    {file = "huggingface_hub-0.33.4.tar.gz", hash = "sha256:6af13478deae120e765bfd92adad0ae1aec1ad8c439b46f23058ad5956cbca0a"},
+    {file = "huggingface_hub-0.34.1-py3-none-any.whl", hash = "sha256:60d843dcb7bc335145b20e7d2f1dfe93910f6787b2b38a936fb772ce2a83757c"},
+    {file = "huggingface_hub-0.34.1.tar.gz", hash = "sha256:6978ed89ef981de3c78b75bab100a214843be1cc9d24f8e9c0dc4971808ef1b1"},
 ]
 
 [package.dependencies]
 filelock = "*"
 fsspec = ">=2023.5.0"
-hf-xet = {version = ">=1.1.2,<2.0.0", markers = "platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"arm64\" or platform_machine == \"aarch64\""}
+hf-xet = {version = ">=1.1.3,<2.0.0", markers = "platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"arm64\" or platform_machine == \"aarch64\""}
 packaging = ">=20.9"
 pyyaml = ">=5.1"
 requests = "*"
@@ -551,16 +551,16 @@ tqdm = ">=4.42.1"
 typing-extensions = ">=3.7.4.3"
 
 [package.extras]
-all = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "authlib (>=1.3.2)", "fastapi", "gradio (>=4.0.0)", "httpx", "itsdangerous", "jedi", "libcst (==1.4.0)", "mypy (==1.15.0)", "mypy (>=1.14.1,<1.15.0)", "numpy", "pytest (>=8.1.1,<8.2.2)", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-mock", "pytest-rerunfailures", "pytest-vcr", "pytest-xdist", "ruff (>=0.9.0)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)", "urllib3 (<2.0)"]
+all = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "authlib (>=1.3.2)", "fastapi", "gradio (>=4.0.0)", "httpx", "itsdangerous", "jedi", "libcst (>=1.4.0)", "mypy (==1.15.0)", "mypy (>=1.14.1,<1.15.0)", "numpy", "pytest (>=8.1.1,<8.2.2)", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-mock", "pytest-rerunfailures", "pytest-vcr", "pytest-xdist", "ruff (>=0.9.0)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)", "urllib3 (<2.0)"]
 cli = ["InquirerPy (==0.3.4)"]
-dev = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "authlib (>=1.3.2)", "fastapi", "gradio (>=4.0.0)", "httpx", "itsdangerous", "jedi", "libcst (==1.4.0)", "mypy (==1.15.0)", "mypy (>=1.14.1,<1.15.0)", "numpy", "pytest (>=8.1.1,<8.2.2)", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-mock", "pytest-rerunfailures", "pytest-vcr", "pytest-xdist", "ruff (>=0.9.0)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)", "urllib3 (<2.0)"]
+dev = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "authlib (>=1.3.2)", "fastapi", "gradio (>=4.0.0)", "httpx", "itsdangerous", "jedi", "libcst (>=1.4.0)", "mypy (==1.15.0)", "mypy (>=1.14.1,<1.15.0)", "numpy", "pytest (>=8.1.1,<8.2.2)", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-mock", "pytest-rerunfailures", "pytest-vcr", "pytest-xdist", "ruff (>=0.9.0)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)", "urllib3 (<2.0)"]
 fastai = ["fastai (>=2.4)", "fastcore (>=1.3.27)", "toml"]
 hf-transfer = ["hf-transfer (>=0.1.4)"]
 hf-xet = ["hf-xet (>=1.1.2,<2.0.0)"]
 inference = ["aiohttp"]
 mcp = ["aiohttp", "mcp (>=1.8.0)", "typer"]
 oauth = ["authlib (>=1.3.2)", "fastapi", "httpx", "itsdangerous"]
-quality = ["libcst (==1.4.0)", "mypy (==1.15.0)", "mypy (>=1.14.1,<1.15.0)", "ruff (>=0.9.0)"]
+quality = ["libcst (>=1.4.0)", "mypy (==1.15.0)", "mypy (>=1.14.1,<1.15.0)", "ruff (>=0.9.0)"]
 tensorflow = ["graphviz", "pydot", "tensorflow"]
 tensorflow-testing = ["keras (<3.0)", "tensorflow"]
 testing = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "authlib (>=1.3.2)", "fastapi", "gradio (>=4.0.0)", "httpx", "itsdangerous", "jedi", "numpy", "pytest (>=8.1.1,<8.2.2)", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-mock", "pytest-rerunfailures", "pytest-vcr", "pytest-xdist", "soundfile", "urllib3 (<2.0)"]
@@ -2031,18 +2031,18 @@ telegram = ["requests"]
 
 [[package]]
 name = "transformers"
-version = "4.53.3"
+version = "4.54.0"
 description = "State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow"
 optional = false
 python-versions = ">=3.9.0"
 files = [
-    {file = "transformers-4.53.3-py3-none-any.whl", hash = "sha256:5aba81c92095806b6baf12df35d756cf23b66c356975fb2a7fa9e536138d7c75"},
-    {file = "transformers-4.53.3.tar.gz", hash = "sha256:b2eda1a261de79b78b97f7888fe2005fc0c3fabf5dad33d52cc02983f9f675d8"},
+    {file = "transformers-4.54.0-py3-none-any.whl", hash = "sha256:c96e607f848625965b76c677b2c2576f2c7b7097c1c5292b281919d90675a25e"},
+    {file = "transformers-4.54.0.tar.gz", hash = "sha256:843da4d66a573cef3d1b2e7a1d767e77da054621e69d9f3faff761e55a1f8203"},
 ]
 
 [package.dependencies]
 filelock = "*"
-huggingface-hub = ">=0.30.0,<1.0"
+huggingface-hub = ">=0.34.0,<1.0"
 numpy = ">=1.17"
 packaging = ">=20.0"
 pyyaml = ">=5.1"
@@ -2054,15 +2054,15 @@ tqdm = ">=4.27"
 
 [package.extras]
 accelerate = ["accelerate (>=0.26.0)"]
-all = ["Pillow (>=10.0.1,<=15.0)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.26.0)", "accelerate (>=0.26.0)", "av", "codecarbon (>=2.8.1)", "flax (>=0.4.1,<=0.7.0)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "kenlm", "keras-nlp (>=0.3.1,<0.14.0)", "kernels (>=0.6.1,<0.7)", "librosa", "num2words", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "phonemizer", "protobuf", "pyctcdecode (>=0.4.0)", "ray[tune] (>=2.7.0)", "scipy (<1.13.0)", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "timm (<=1.0.11)", "tokenizers (>=0.21,<0.22)", "torch (>=2.1)", "torchaudio", "torchvision"]
+all = ["Pillow (>=10.0.1,<=15.0)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.26.0)", "accelerate (>=0.26.0)", "av", "codecarbon (>=2.8.1)", "flax (>=0.4.1,<=0.7.0)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "kenlm", "keras-nlp (>=0.3.1,<0.14.0)", "kernels (>=0.6.1,<0.7)", "librosa", "mistral-common[opencv] (>=1.6.3)", "num2words", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "phonemizer", "protobuf", "pyctcdecode (>=0.4.0)", "ray[tune] (>=2.7.0)", "scipy (<1.13.0)", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "timm (!=1.0.18,<=1.0.19)", "tokenizers (>=0.21,<0.22)", "torch (>=2.1)", "torchaudio", "torchvision"]
 audio = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)"]
 benchmark = ["optimum-benchmark (>=0.3.0)"]
 codecarbon = ["codecarbon (>=2.8.1)"]
 deepspeed = ["accelerate (>=0.26.0)", "deepspeed (>=0.9.3)"]
-deepspeed-testing = ["GitPython (<3.1.19)", "accelerate (>=0.26.0)", "beautifulsoup4", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "datasets (!=2.5.0)", "deepspeed (>=0.9.3)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "nltk (<=3.8.1)", "optuna", "parameterized", "protobuf", "psutil", "pydantic", "pytest (>=7.2.0)", "pytest-asyncio", "pytest-order", "pytest-rerunfailures", "pytest-rich", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.11.2)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "sentencepiece (>=0.1.91,!=0.1.92)", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorboard", "timeout-decorator"]
-dev = ["GitPython (<3.1.19)", "GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.26.0)", "accelerate (>=0.26.0)", "av", "beautifulsoup4", "codecarbon (>=2.8.1)", "cookiecutter (==1.7.3)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "datasets (!=2.5.0)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "flax (>=0.4.1,<=0.7.0)", "fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "kenlm", "keras-nlp (>=0.3.1,<0.14.0)", "kernels (>=0.6.1,<0.7)", "libcst", "librosa", "nltk (<=3.8.1)", "num2words", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "pandas (<2.3.0)", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic", "pytest (>=7.2.0)", "pytest-asyncio", "pytest-order", "pytest-rerunfailures", "pytest-rich", "pytest-timeout", "pytest-xdist", "ray[tune] (>=2.7.0)", "rhoknp (>=1.1.0,<1.3.1)", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.11.2)", "ruff (==0.11.2)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "scipy (<1.13.0)", "sentencepiece (>=0.1.91,!=0.1.92)", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "sudachidict_core (>=20220729)", "sudachipy (>=0.6.6)", "tensorboard", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "timeout-decorator", "timm (<=1.0.11)", "tokenizers (>=0.21,<0.22)", "torch (>=2.1)", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic_lite (>=1.0.7)", "urllib3 (<2.0.0)"]
-dev-tensorflow = ["GitPython (<3.1.19)", "GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "beautifulsoup4", "cookiecutter (==1.7.3)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "datasets (!=2.5.0)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "kenlm", "keras-nlp (>=0.3.1,<0.14.0)", "libcst", "librosa", "nltk (<=3.8.1)", "onnxconverter-common", "onnxconverter-common", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "pandas (<2.3.0)", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic", "pytest (>=7.2.0)", "pytest-asyncio", "pytest-order", "pytest-rerunfailures", "pytest-rich", "pytest-timeout", "pytest-xdist", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.11.2)", "ruff (==0.11.2)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorboard", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "tf2onnx", "timeout-decorator", "tokenizers (>=0.21,<0.22)", "urllib3 (<2.0.0)"]
-dev-torch = ["GitPython (<3.1.19)", "GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.26.0)", "beautifulsoup4", "codecarbon (>=2.8.1)", "cookiecutter (==1.7.3)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "datasets (!=2.5.0)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "kenlm", "kernels (>=0.6.1,<0.7)", "libcst", "librosa", "nltk (<=3.8.1)", "num2words", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "optuna", "pandas (<2.3.0)", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic", "pytest (>=7.2.0)", "pytest-asyncio", "pytest-order", "pytest-rerunfailures", "pytest-rich", "pytest-timeout", "pytest-xdist", "ray[tune] (>=2.7.0)", "rhoknp (>=1.1.0,<1.3.1)", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.11.2)", "ruff (==0.11.2)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "sudachidict_core (>=20220729)", "sudachipy (>=0.6.6)", "tensorboard", "timeout-decorator", "timm (<=1.0.11)", "tokenizers (>=0.21,<0.22)", "torch (>=2.1)", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic_lite (>=1.0.7)", "urllib3 (<2.0.0)"]
+deepspeed-testing = ["GitPython (<3.1.19)", "accelerate (>=0.26.0)", "beautifulsoup4", "cookiecutter (==1.7.3)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "deepspeed (>=0.9.3)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "mistral-common[opencv] (>=1.6.3)", "nltk (<=3.8.1)", "optuna", "parameterized", "protobuf", "psutil", "pydantic (>=2)", "pytest (>=7.2.0)", "pytest-asyncio", "pytest-order", "pytest-rerunfailures", "pytest-rich", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.11.2)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "sentencepiece (>=0.1.91,!=0.1.92)", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorboard", "timeout-decorator"]
+dev = ["GitPython (<3.1.19)", "GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.26.0)", "accelerate (>=0.26.0)", "av", "beautifulsoup4", "codecarbon (>=2.8.1)", "cookiecutter (==1.7.3)", "cookiecutter (==1.7.3)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "flax (>=0.4.1,<=0.7.0)", "fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "kenlm", "keras-nlp (>=0.3.1,<0.14.0)", "kernels (>=0.6.1,<0.7)", "libcst", "librosa", "mistral-common[opencv] (>=1.6.3)", "mistral-common[opencv] (>=1.6.3)", "nltk (<=3.8.1)", "num2words", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "pandas (<2.3.0)", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic (>=2)", "pytest (>=7.2.0)", "pytest-asyncio", "pytest-order", "pytest-rerunfailures", "pytest-rich", "pytest-timeout", "pytest-xdist", "ray[tune] (>=2.7.0)", "rhoknp (>=1.1.0,<1.3.1)", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.11.2)", "ruff (==0.11.2)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "scipy (<1.13.0)", "sentencepiece (>=0.1.91,!=0.1.92)", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "sudachidict_core (>=20220729)", "sudachipy (>=0.6.6)", "tensorboard", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "timeout-decorator", "timm (!=1.0.18,<=1.0.19)", "tokenizers (>=0.21,<0.22)", "torch (>=2.1)", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic_lite (>=1.0.7)", "urllib3 (<2.0.0)"]
+dev-tensorflow = ["GitPython (<3.1.19)", "GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "beautifulsoup4", "cookiecutter (==1.7.3)", "cookiecutter (==1.7.3)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "kenlm", "keras-nlp (>=0.3.1,<0.14.0)", "libcst", "librosa", "mistral-common[opencv] (>=1.6.3)", "nltk (<=3.8.1)", "onnxconverter-common", "onnxconverter-common", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "pandas (<2.3.0)", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic (>=2)", "pytest (>=7.2.0)", "pytest-asyncio", "pytest-order", "pytest-rerunfailures", "pytest-rich", "pytest-timeout", "pytest-xdist", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.11.2)", "ruff (==0.11.2)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorboard", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "tf2onnx", "timeout-decorator", "tokenizers (>=0.21,<0.22)", "urllib3 (<2.0.0)"]
+dev-torch = ["GitPython (<3.1.19)", "GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.26.0)", "beautifulsoup4", "codecarbon (>=2.8.1)", "cookiecutter (==1.7.3)", "cookiecutter (==1.7.3)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "kenlm", "kernels (>=0.6.1,<0.7)", "libcst", "librosa", "mistral-common[opencv] (>=1.6.3)", "nltk (<=3.8.1)", "num2words", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "optuna", "pandas (<2.3.0)", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic (>=2)", "pytest (>=7.2.0)", "pytest-asyncio", "pytest-order", "pytest-rerunfailures", "pytest-rich", "pytest-timeout", "pytest-xdist", "ray[tune] (>=2.7.0)", "rhoknp (>=1.1.0,<1.3.1)", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.11.2)", "ruff (==0.11.2)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "sudachidict_core (>=20220729)", "sudachipy (>=0.6.6)", "tensorboard", "timeout-decorator", "timm (!=1.0.18,<=1.0.19)", "tokenizers (>=0.21,<0.22)", "torch (>=2.1)", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic_lite (>=1.0.7)", "urllib3 (<2.0.0)"]
 flax = ["flax (>=0.4.1,<=0.7.0)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "optax (>=0.0.8,<=0.1.4)", "scipy (<1.13.0)"]
 flax-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)"]
 ftfy = ["ftfy"]
@@ -2070,6 +2070,7 @@ hf-xet = ["hf_xet"]
 hub-kernels = ["kernels (>=0.6.1,<0.7)"]
 integrations = ["kernels (>=0.6.1,<0.7)", "optuna", "ray[tune] (>=2.7.0)", "sigopt"]
 ja = ["fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "rhoknp (>=1.1.0,<1.3.1)", "sudachidict_core (>=20220729)", "sudachipy (>=0.6.6)", "unidic (>=1.0.2)", "unidic_lite (>=1.0.7)"]
+mistral-common = ["mistral-common[opencv] (>=1.6.3)"]
 modelcreation = ["cookiecutter (==1.7.3)"]
 natten = ["natten (>=0.14.6,<0.15.0)"]
 num2words = ["num2words"]
@@ -2077,27 +2078,27 @@ onnx = ["onnxconverter-common", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1
 onnxruntime = ["onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)"]
 open-telemetry = ["opentelemetry-api", "opentelemetry-exporter-otlp", "opentelemetry-sdk"]
 optuna = ["optuna"]
-quality = ["GitPython (<3.1.19)", "datasets (!=2.5.0)", "libcst", "pandas (<2.3.0)", "rich", "ruff (==0.11.2)", "urllib3 (<2.0.0)"]
+quality = ["GitPython (<3.1.19)", "datasets (>=2.15.0)", "libcst", "pandas (<2.3.0)", "rich", "ruff (==0.11.2)", "urllib3 (<2.0.0)"]
 ray = ["ray[tune] (>=2.7.0)"]
-retrieval = ["datasets (!=2.5.0)", "faiss-cpu"]
+retrieval = ["datasets (>=2.15.0)", "faiss-cpu"]
 ruff = ["ruff (==0.11.2)"]
 sagemaker = ["sagemaker (>=2.31.0)"]
 sentencepiece = ["protobuf", "sentencepiece (>=0.1.91,!=0.1.92)"]
-serving = ["fastapi", "pydantic", "starlette", "uvicorn"]
+serving = ["accelerate (>=0.26.0)", "fastapi", "openai", "pydantic (>=2)", "starlette", "torch (>=2.1)", "uvicorn"]
 sigopt = ["sigopt"]
 sklearn = ["scikit-learn"]
 speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)", "torchaudio"]
-testing = ["GitPython (<3.1.19)", "beautifulsoup4", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "nltk (<=3.8.1)", "parameterized", "psutil", "pydantic", "pytest (>=7.2.0)", "pytest-asyncio", "pytest-order", "pytest-rerunfailures", "pytest-rich", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.11.2)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorboard", "timeout-decorator"]
+testing = ["GitPython (<3.1.19)", "beautifulsoup4", "cookiecutter (==1.7.3)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "mistral-common[opencv] (>=1.6.3)", "nltk (<=3.8.1)", "parameterized", "psutil", "pydantic (>=2)", "pytest (>=7.2.0)", "pytest-asyncio", "pytest-order", "pytest-rerunfailures", "pytest-rich", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.11.2)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorboard", "timeout-decorator"]
 tf = ["keras-nlp (>=0.3.1,<0.14.0)", "onnxconverter-common", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx"]
 tf-cpu = ["keras (>2.9,<2.16)", "keras-nlp (>=0.3.1,<0.14.0)", "onnxconverter-common", "tensorflow-cpu (>2.9,<2.16)", "tensorflow-probability (<0.24)", "tensorflow-text (<2.16)", "tf2onnx"]
 tf-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)"]
 tiktoken = ["blobfile", "tiktoken"]
-timm = ["timm (<=1.0.11)"]
+timm = ["timm (!=1.0.18,<=1.0.19)"]
 tokenizers = ["tokenizers (>=0.21,<0.22)"]
 torch = ["accelerate (>=0.26.0)", "torch (>=2.1)"]
 torch-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)", "torchaudio"]
 torch-vision = ["Pillow (>=10.0.1,<=15.0)", "torchvision"]
-torchhub = ["filelock", "huggingface-hub (>=0.30.0,<1.0)", "importlib_metadata", "numpy (>=1.17)", "packaging (>=20.0)", "protobuf", "regex (!=2019.12.17)", "requests", "sentencepiece (>=0.1.91,!=0.1.92)", "tokenizers (>=0.21,<0.22)", "torch (>=2.1)", "tqdm (>=4.27)"]
+torchhub = ["filelock", "huggingface-hub (>=0.34.0,<1.0)", "importlib_metadata", "numpy (>=1.17)", "packaging (>=20.0)", "protobuf", "regex (!=2019.12.17)", "requests", "sentencepiece (>=0.1.91,!=0.1.92)", "tokenizers (>=0.21,<0.22)", "torch (>=2.1)", "tqdm (>=4.27)"]
 video = ["av"]
 vision = ["Pillow (>=10.0.1,<=15.0)"]
 
diff --git a/model_server/pyproject.toml b/model_server/pyproject.toml
index c36f1033..7abe7a40 100644
--- a/model_server/pyproject.toml
+++ b/model_server/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "archgw_modelserver"
-version = "0.3.6"
+version = "0.3.7"
 description = "A model server for serving models"
 authors = ["Katanemo Labs, Inc <info@katanemo.com>"]
 license = "Apache 2.0"

From d2157248649d8a3720d174113f82a8598629bb04 Mon Sep 17 00:00:00 2001
From: Musa <malikmusa1323@gmail.com>
Date: Sun, 27 Jul 2025 09:26:12 -0700
Subject: [PATCH 6/7] Update llm_provider.rst (#543)

---
 docs/source/concepts/llm_provider.rst | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/docs/source/concepts/llm_provider.rst b/docs/source/concepts/llm_provider.rst
index 1c4b2aaf..eabdaa96 100644
--- a/docs/source/concepts/llm_provider.rst
+++ b/docs/source/concepts/llm_provider.rst
@@ -38,27 +38,29 @@ Adding custom LLM Provider
 We support any OpenAI compliant LLM for example mistral, openai, ollama etc. We also offer first class support for OpenAI, Anthropic, DeepSeek, Mistral, Groq, and Ollama based models.
 You can easily configure an LLM that communicates over the OpenAI API interface, by following the below guide.
 
-For example following code block shows you how to add an ollama-supported LLM in the `arch_config.yaml` file.
+For example following code block shows you how to add an ollama-supported LLM in the ``arch_config.yaml`` file.
 
 .. code-block:: yaml
-    llm_providers:
-    - model: some_custom_llm_provider/llama3.2
-      provider_interface: openai
-      base_url: http://host.docker.internal:11434
 
-And in the following code block shows you how to add mistral llm provider in the `arch_config.yaml` file.
+    llm_providers:
+      - model: some_custom_llm_provider/llama3.2
+        provider_interface: openai
+        base_url: http://host.docker.internal:11434
+
+And in the following code block shows you how to add mistral llm provider in the ``arch_config.yaml`` file.
 
 .. code-block:: yaml
+
     llm_providers:
-    - name: mistral/ministral-3b-latest
-      access_key: $MISTRAL_API_KEY
+      - name: mistral/ministral-3b-latest
+        access_key: $MISTRAL_API_KEY
 
 Example: Using the OpenAI Python SDK
 ------------------------------------
 
 .. code-block:: python
 
-   from openai import OpenAI
+    from openai import OpenAI
 
     # Initialize the Arch client
     client = OpenAI(base_url="http://127.0.0.1:2000/")

From 5d5762ac20a7a2e23630da5d41018d00992304a8 Mon Sep 17 00:00:00 2001
From: Matt Kafonek <kafonek@gmail.com>
Date: Mon, 28 Jul 2025 03:48:35 -0400
Subject: [PATCH 7/7] swap out python to sys.executable to support uv tool
 install workflows (#544)

---
 model_server/src/cli.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/model_server/src/cli.py b/model_server/src/cli.py
index 5245873e..d0d9e9e1 100644
--- a/model_server/src/cli.py
+++ b/model_server/src/cli.py
@@ -72,7 +72,7 @@ def start_server(port=51000, foreground=False):
     if foreground:
         process = subprocess.Popen(
             [
-                "python",
+                sys.executable,
                 "-m",
                 "uvicorn",
                 "src.main:app",
@@ -85,7 +85,7 @@ def start_server(port=51000, foreground=False):
     else:
         process = subprocess.Popen(
             [
-                "python",
+                sys.executable,
                 "-m",
                 "uvicorn",
                 "src.main:app",