From e3ddd92f033c12bf40ad250a6235cb1157ac6627 Mon Sep 17 00:00:00 2001
From: Adil Hafeez <adil.hafeez@gmail.com>
Date: Fri, 13 Jun 2025 15:29:58 -0700
Subject: [PATCH] release 0.3.2

---
 .github/workflows/e2e_archgw.yml                     |  2 +-
 .github/workflows/e2e_test_currency_convert.yml      |  2 +-
 .../workflows/e2e_test_preference_based_routing.yml  |  2 +-
 .github/workflows/validate_arch_config.yml           |  2 +-
 README.md                                            |  2 +-
 arch/tools/README.md                                 |  2 +-
 arch/tools/cli/consts.py                             |  2 +-
 arch/tools/pyproject.toml                            |  4 ++--
 build_filter_image.sh                                |  2 +-
 crates/common/src/tokenizer.rs                       | 12 +++++++++---
 demos/use_cases/preference_based_routing/README.md   |  4 ++--
 .../preference_based_routing/arch_config.yaml        | 12 ++++++------
 docs/source/conf.py                                  |  2 +-
 docs/source/get_started/quickstart.rst               |  2 +-
 model_server/pyproject.toml                          |  2 +-
 15 files changed, 30 insertions(+), 24 deletions(-)

diff --git a/.github/workflows/e2e_archgw.yml b/.github/workflows/e2e_archgw.yml
index b4c6d4ed..1cdc3378 100644
--- a/.github/workflows/e2e_archgw.yml
+++ b/.github/workflows/e2e_archgw.yml
@@ -24,7 +24,7 @@ jobs:
 
       - name: build arch docker image
         run: |
-          cd ../../ && docker build -f arch/Dockerfile . -t katanemo/archgw -t katanemo/archgw:0.3.1 -t katanemo/archgw:latest
+          cd ../../ && docker build -f arch/Dockerfile . -t katanemo/archgw -t katanemo/archgw:0.3.2 -t katanemo/archgw:latest
 
       - name: start archgw
         env:
diff --git a/.github/workflows/e2e_test_currency_convert.yml b/.github/workflows/e2e_test_currency_convert.yml
index 00f1d038..2aae62f7 100644
--- a/.github/workflows/e2e_test_currency_convert.yml
+++ b/.github/workflows/e2e_test_currency_convert.yml
@@ -24,7 +24,7 @@ jobs:
 
       - name: build arch docker image
         run: |
-          docker build  -f arch/Dockerfile . -t katanemo/archgw -t katanemo/archgw:0.3.1
+          docker build  -f arch/Dockerfile . -t katanemo/archgw -t katanemo/archgw:0.3.2
 
       - name: install poetry
         run: |
diff --git a/.github/workflows/e2e_test_preference_based_routing.yml b/.github/workflows/e2e_test_preference_based_routing.yml
index de416c7d..8d581270 100644
--- a/.github/workflows/e2e_test_preference_based_routing.yml
+++ b/.github/workflows/e2e_test_preference_based_routing.yml
@@ -24,7 +24,7 @@ jobs:
 
       - name: build arch docker image
         run: |
-          docker build  -f arch/Dockerfile . -t katanemo/archgw -t katanemo/archgw:0.3.1
+          docker build  -f arch/Dockerfile . -t katanemo/archgw -t katanemo/archgw:0.3.2
 
       - name: install poetry
         run: |
diff --git a/.github/workflows/validate_arch_config.yml b/.github/workflows/validate_arch_config.yml
index e1d85747..f58547e9 100644
--- a/.github/workflows/validate_arch_config.yml
+++ b/.github/workflows/validate_arch_config.yml
@@ -24,7 +24,7 @@ jobs:
 
       - name: build arch docker image
         run: |
-          docker build  -f arch/Dockerfile . -t katanemo/archgw -t katanemo/archgw:0.3.1
+          docker build  -f arch/Dockerfile . -t katanemo/archgw -t katanemo/archgw:0.3.2
 
       - name: validate arch config
         run: |
diff --git a/README.md b/README.md
index e4edf530..736dfcb2 100644
--- a/README.md
+++ b/README.md
@@ -81,7 +81,7 @@ Arch's CLI allows you to manage and interact with the Arch gateway efficiently.
 ```console
 $ python -m venv venv
 $ source venv/bin/activate   # On Windows, use: venv\Scripts\activate
-$ pip install archgw==0.3.1
+$ pip install archgw==0.3.2
 ```
 
 ### Build AI Agent with Arch Gateway
diff --git a/arch/tools/README.md b/arch/tools/README.md
index d53e901a..b72c0db9 100644
--- a/arch/tools/README.md
+++ b/arch/tools/README.md
@@ -19,7 +19,7 @@ source venv/bin/activate
 
 ### Step 3: Run the build script
 ```bash
-pip install archgw==0.3.1
+pip install archgw==0.3.2
 ```
 
 ## Uninstall Instructions: archgw CLI
diff --git a/arch/tools/cli/consts.py b/arch/tools/cli/consts.py
index ed0efad5..ffe4d886 100644
--- a/arch/tools/cli/consts.py
+++ b/arch/tools/cli/consts.py
@@ -10,4 +10,4 @@ SERVICE_NAME_MODEL_SERVER = "model_server"
 SERVICE_ALL = "all"
 MODEL_SERVER_LOG_FILE = "~/archgw_logs/modelserver.log"
 ARCHGW_DOCKER_NAME = "archgw"
-ARCHGW_DOCKER_IMAGE = os.getenv("ARCHGW_DOCKER_IMAGE", "katanemo/archgw:0.3.1")
+ARCHGW_DOCKER_IMAGE = os.getenv("ARCHGW_DOCKER_IMAGE", "katanemo/archgw:0.3.2")
diff --git a/arch/tools/pyproject.toml b/arch/tools/pyproject.toml
index 467b866d..6b68b315 100644
--- a/arch/tools/pyproject.toml
+++ b/arch/tools/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "archgw"
-version = "0.3.1"
+version = "0.3.2"
 description = "Python-based CLI tool to manage Arch Gateway."
 authors = ["Katanemo Labs, Inc."]
 packages = [
@@ -10,7 +10,7 @@ readme = "README.md"
 
 [tool.poetry.dependencies]
 python = "^3.10"
-archgw_modelserver = "^0.3.1"
+archgw_modelserver = "^0.3.2"
 click = "^8.1.7"
 jinja2 = "^3.1.4"
 jsonschema = "^4.23.0"
diff --git a/build_filter_image.sh b/build_filter_image.sh
index 29413cde..ea3c5c31 100644
--- a/build_filter_image.sh
+++ b/build_filter_image.sh
@@ -1 +1 @@
-docker build  -f arch/Dockerfile . -t katanemo/archgw -t katanemo/archgw:0.3.1
+docker build  -f arch/Dockerfile . -t katanemo/archgw -t katanemo/archgw:0.3.2
diff --git a/crates/common/src/tokenizer.rs b/crates/common/src/tokenizer.rs
index c98dfdfe..ded885d5 100644
--- a/crates/common/src/tokenizer.rs
+++ b/crates/common/src/tokenizer.rs
@@ -6,15 +6,21 @@ pub fn token_count(model_name: &str, text: &str) -> Result<usize, String> {
     //HACK: add support for tokenizing mistral and other models
     //filed issue https://github.com/katanemo/arch/issues/222
 
-    let updated_model = match model_name.starts_with("gpt") {
+    let updated_model = match model_name.starts_with("gpt-4") {
         false => {
             debug!(
                 "tiktoken_rs: unsupported model: {}, using gpt-4 to compute token count",
                 model_name
             );
-            "gpt-4"
+            "gpt-4o"
+        }
+        true => {
+            if model_name.starts_with("gpt-4.1") {
+                "gpt-4o"
+            } else {
+                model_name
+            }
         }
-        true => model_name,
     };
 
     // Consideration: is it more expensive to instantiate the BPE object every time, or to contend the singleton?
diff --git a/demos/use_cases/preference_based_routing/README.md b/demos/use_cases/preference_based_routing/README.md
index 8883d2b3..84a71504 100644
--- a/demos/use_cases/preference_based_routing/README.md
+++ b/demos/use_cases/preference_based_routing/README.md
@@ -14,9 +14,9 @@ Make sure your machine is up to date with [latest version of archgw]([url](https
 2. start archgw in the foreground
 ```bash
 (venv) $ archgw up --service archgw --foreground
-2025-05-30 18:00:09,953 - cli.main - INFO - Starting archgw cli version: 0.3.1
+2025-05-30 18:00:09,953 - cli.main - INFO - Starting archgw cli version: 0.3.2
 2025-05-30 18:00:09,953 - cli.main - INFO - Validating /Users/adilhafeez/src/intelligent-prompt-gateway/demos/use_cases/preference_based_routing/arch_config.yaml
-2025-05-30 18:00:10,422 - cli.core - INFO - Starting arch gateway, image name: archgw, tag: katanemo/archgw:0.3.1
+2025-05-30 18:00:10,422 - cli.core - INFO - Starting arch gateway, image name: archgw, tag: katanemo/archgw:0.3.2
 2025-05-30 18:00:10,662 - cli.core - INFO - archgw status: running, health status: starting
 2025-05-30 18:00:11,712 - cli.core - INFO - archgw status: running, health status: starting
 2025-05-30 18:00:12,761 - cli.core - INFO - archgw is running and is healthy!
diff --git a/demos/use_cases/preference_based_routing/arch_config.yaml b/demos/use_cases/preference_based_routing/arch_config.yaml
index 41026481..9e5d6043 100644
--- a/demos/use_cases/preference_based_routing/arch_config.yaml
+++ b/demos/use_cases/preference_based_routing/arch_config.yaml
@@ -22,17 +22,17 @@ llm_providers:
     access_key: $OPENAI_API_KEY
     model: gpt-4o-mini
 
-  - name: gpt-4o
+  - name: gpt-4.1
     provider_interface: openai
     access_key: $OPENAI_API_KEY
-    model: gpt-4o
+    model: gpt-4.1
     default: true
 
   - name: code_generation
-    provider_interface: openai
-    access_key: $OPENAI_API_KEY
-    model: gpt-4o
-    usage: Generating new code snippets, functions, or boilerplate based on user prompts or requirements
+    access_key: $ANTHROPY_API_KEY
+    provider_interface: claude
+    model: claude-3-7-sonnet-latest
+    usage: generating new code snippets, functions, or boilerplate based on user prompts or requirements
 
   - name: code_understanding
     provider_interface: openai
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 3a351b35..2175d291 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -15,7 +15,7 @@ from sphinxawesome_theme.postprocess import Icons
 project = "Arch Docs"
 copyright = "2025, Katanemo Labs, Inc"
 author = "Katanemo Labs, Inc"
-release = " v0.3.1"
+release = " v0.3.2"
 
 # -- General configuration ---------------------------------------------------
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
diff --git a/docs/source/get_started/quickstart.rst b/docs/source/get_started/quickstart.rst
index 270d6799..560ec639 100644
--- a/docs/source/get_started/quickstart.rst
+++ b/docs/source/get_started/quickstart.rst
@@ -25,7 +25,7 @@ Arch's CLI allows you to manage and interact with the Arch gateway efficiently.
 
    $ python -m venv venv
    $ source venv/bin/activate   # On Windows, use: venv\Scripts\activate
-   $ pip install archgw==0.3.1
+   $ pip install archgw==0.3.2
 
 
 Build AI Agent with Arch Gateway
diff --git a/model_server/pyproject.toml b/model_server/pyproject.toml
index 43702b01..fca57d76 100644
--- a/model_server/pyproject.toml
+++ b/model_server/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "archgw_modelserver"
-version = "0.3.1"
+version = "0.3.2"
 description = "A model server for serving models"
 authors = ["Katanemo Labs, Inc <info@katanemo.com>"]
 license = "Apache 2.0"