From e3ddd92f033c12bf40ad250a6235cb1157ac6627 Mon Sep 17 00:00:00 2001 From: Adil Hafeez Date: Fri, 13 Jun 2025 15:29:58 -0700 Subject: [PATCH] release 0.3.2 --- .github/workflows/e2e_archgw.yml | 2 +- .github/workflows/e2e_test_currency_convert.yml | 2 +- .../workflows/e2e_test_preference_based_routing.yml | 2 +- .github/workflows/validate_arch_config.yml | 2 +- README.md | 2 +- arch/tools/README.md | 2 +- arch/tools/cli/consts.py | 2 +- arch/tools/pyproject.toml | 4 ++-- build_filter_image.sh | 2 +- crates/common/src/tokenizer.rs | 12 +++++++++--- demos/use_cases/preference_based_routing/README.md | 4 ++-- .../preference_based_routing/arch_config.yaml | 12 ++++++------ docs/source/conf.py | 2 +- docs/source/get_started/quickstart.rst | 2 +- model_server/pyproject.toml | 2 +- 15 files changed, 30 insertions(+), 24 deletions(-) diff --git a/.github/workflows/e2e_archgw.yml b/.github/workflows/e2e_archgw.yml index b4c6d4ed..1cdc3378 100644 --- a/.github/workflows/e2e_archgw.yml +++ b/.github/workflows/e2e_archgw.yml @@ -24,7 +24,7 @@ jobs: - name: build arch docker image run: | - cd ../../ && docker build -f arch/Dockerfile . -t katanemo/archgw -t katanemo/archgw:0.3.1 -t katanemo/archgw:latest + cd ../../ && docker build -f arch/Dockerfile . -t katanemo/archgw -t katanemo/archgw:0.3.2 -t katanemo/archgw:latest - name: start archgw env: diff --git a/.github/workflows/e2e_test_currency_convert.yml b/.github/workflows/e2e_test_currency_convert.yml index 00f1d038..2aae62f7 100644 --- a/.github/workflows/e2e_test_currency_convert.yml +++ b/.github/workflows/e2e_test_currency_convert.yml @@ -24,7 +24,7 @@ jobs: - name: build arch docker image run: | - docker build -f arch/Dockerfile . -t katanemo/archgw -t katanemo/archgw:0.3.1 + docker build -f arch/Dockerfile . -t katanemo/archgw -t katanemo/archgw:0.3.2 - name: install poetry run: | diff --git a/.github/workflows/e2e_test_preference_based_routing.yml b/.github/workflows/e2e_test_preference_based_routing.yml index de416c7d..8d581270 100644 --- a/.github/workflows/e2e_test_preference_based_routing.yml +++ b/.github/workflows/e2e_test_preference_based_routing.yml @@ -24,7 +24,7 @@ jobs: - name: build arch docker image run: | - docker build -f arch/Dockerfile . -t katanemo/archgw -t katanemo/archgw:0.3.1 + docker build -f arch/Dockerfile . -t katanemo/archgw -t katanemo/archgw:0.3.2 - name: install poetry run: | diff --git a/.github/workflows/validate_arch_config.yml b/.github/workflows/validate_arch_config.yml index e1d85747..f58547e9 100644 --- a/.github/workflows/validate_arch_config.yml +++ b/.github/workflows/validate_arch_config.yml @@ -24,7 +24,7 @@ jobs: - name: build arch docker image run: | - docker build -f arch/Dockerfile . -t katanemo/archgw -t katanemo/archgw:0.3.1 + docker build -f arch/Dockerfile . -t katanemo/archgw -t katanemo/archgw:0.3.2 - name: validate arch config run: | diff --git a/README.md b/README.md index e4edf530..736dfcb2 100644 --- a/README.md +++ b/README.md @@ -81,7 +81,7 @@ Arch's CLI allows you to manage and interact with the Arch gateway efficiently. ```console $ python -m venv venv $ source venv/bin/activate # On Windows, use: venv\Scripts\activate -$ pip install archgw==0.3.1 +$ pip install archgw==0.3.2 ``` ### Build AI Agent with Arch Gateway diff --git a/arch/tools/README.md b/arch/tools/README.md index d53e901a..b72c0db9 100644 --- a/arch/tools/README.md +++ b/arch/tools/README.md @@ -19,7 +19,7 @@ source venv/bin/activate ### Step 3: Run the build script ```bash -pip install archgw==0.3.1 +pip install archgw==0.3.2 ``` ## Uninstall Instructions: archgw CLI diff --git a/arch/tools/cli/consts.py b/arch/tools/cli/consts.py index ed0efad5..ffe4d886 100644 --- a/arch/tools/cli/consts.py +++ b/arch/tools/cli/consts.py @@ -10,4 +10,4 @@ SERVICE_NAME_MODEL_SERVER = "model_server" SERVICE_ALL = "all" MODEL_SERVER_LOG_FILE = "~/archgw_logs/modelserver.log" ARCHGW_DOCKER_NAME = "archgw" -ARCHGW_DOCKER_IMAGE = os.getenv("ARCHGW_DOCKER_IMAGE", "katanemo/archgw:0.3.1") +ARCHGW_DOCKER_IMAGE = os.getenv("ARCHGW_DOCKER_IMAGE", "katanemo/archgw:0.3.2") diff --git a/arch/tools/pyproject.toml b/arch/tools/pyproject.toml index 467b866d..6b68b315 100644 --- a/arch/tools/pyproject.toml +++ b/arch/tools/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "archgw" -version = "0.3.1" +version = "0.3.2" description = "Python-based CLI tool to manage Arch Gateway." authors = ["Katanemo Labs, Inc."] packages = [ @@ -10,7 +10,7 @@ readme = "README.md" [tool.poetry.dependencies] python = "^3.10" -archgw_modelserver = "^0.3.1" +archgw_modelserver = "^0.3.2" click = "^8.1.7" jinja2 = "^3.1.4" jsonschema = "^4.23.0" diff --git a/build_filter_image.sh b/build_filter_image.sh index 29413cde..ea3c5c31 100644 --- a/build_filter_image.sh +++ b/build_filter_image.sh @@ -1 +1 @@ -docker build -f arch/Dockerfile . -t katanemo/archgw -t katanemo/archgw:0.3.1 +docker build -f arch/Dockerfile . -t katanemo/archgw -t katanemo/archgw:0.3.2 diff --git a/crates/common/src/tokenizer.rs b/crates/common/src/tokenizer.rs index c98dfdfe..ded885d5 100644 --- a/crates/common/src/tokenizer.rs +++ b/crates/common/src/tokenizer.rs @@ -6,15 +6,21 @@ pub fn token_count(model_name: &str, text: &str) -> Result { //HACK: add support for tokenizing mistral and other models //filed issue https://github.com/katanemo/arch/issues/222 - let updated_model = match model_name.starts_with("gpt") { + let updated_model = match model_name.starts_with("gpt-4") { false => { debug!( "tiktoken_rs: unsupported model: {}, using gpt-4 to compute token count", model_name ); - "gpt-4" + "gpt-4o" + } + true => { + if model_name.starts_with("gpt-4.1") { + "gpt-4o" + } else { + model_name + } } - true => model_name, }; // Consideration: is it more expensive to instantiate the BPE object every time, or to contend the singleton? diff --git a/demos/use_cases/preference_based_routing/README.md b/demos/use_cases/preference_based_routing/README.md index 8883d2b3..84a71504 100644 --- a/demos/use_cases/preference_based_routing/README.md +++ b/demos/use_cases/preference_based_routing/README.md @@ -14,9 +14,9 @@ Make sure your machine is up to date with [latest version of archgw]([url](https 2. start archgw in the foreground ```bash (venv) $ archgw up --service archgw --foreground -2025-05-30 18:00:09,953 - cli.main - INFO - Starting archgw cli version: 0.3.1 +2025-05-30 18:00:09,953 - cli.main - INFO - Starting archgw cli version: 0.3.2 2025-05-30 18:00:09,953 - cli.main - INFO - Validating /Users/adilhafeez/src/intelligent-prompt-gateway/demos/use_cases/preference_based_routing/arch_config.yaml -2025-05-30 18:00:10,422 - cli.core - INFO - Starting arch gateway, image name: archgw, tag: katanemo/archgw:0.3.1 +2025-05-30 18:00:10,422 - cli.core - INFO - Starting arch gateway, image name: archgw, tag: katanemo/archgw:0.3.2 2025-05-30 18:00:10,662 - cli.core - INFO - archgw status: running, health status: starting 2025-05-30 18:00:11,712 - cli.core - INFO - archgw status: running, health status: starting 2025-05-30 18:00:12,761 - cli.core - INFO - archgw is running and is healthy! diff --git a/demos/use_cases/preference_based_routing/arch_config.yaml b/demos/use_cases/preference_based_routing/arch_config.yaml index 41026481..9e5d6043 100644 --- a/demos/use_cases/preference_based_routing/arch_config.yaml +++ b/demos/use_cases/preference_based_routing/arch_config.yaml @@ -22,17 +22,17 @@ llm_providers: access_key: $OPENAI_API_KEY model: gpt-4o-mini - - name: gpt-4o + - name: gpt-4.1 provider_interface: openai access_key: $OPENAI_API_KEY - model: gpt-4o + model: gpt-4.1 default: true - name: code_generation - provider_interface: openai - access_key: $OPENAI_API_KEY - model: gpt-4o - usage: Generating new code snippets, functions, or boilerplate based on user prompts or requirements + access_key: $ANTHROPY_API_KEY + provider_interface: claude + model: claude-3-7-sonnet-latest + usage: generating new code snippets, functions, or boilerplate based on user prompts or requirements - name: code_understanding provider_interface: openai diff --git a/docs/source/conf.py b/docs/source/conf.py index 3a351b35..2175d291 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -15,7 +15,7 @@ from sphinxawesome_theme.postprocess import Icons project = "Arch Docs" copyright = "2025, Katanemo Labs, Inc" author = "Katanemo Labs, Inc" -release = " v0.3.1" +release = " v0.3.2" # -- General configuration --------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration diff --git a/docs/source/get_started/quickstart.rst b/docs/source/get_started/quickstart.rst index 270d6799..560ec639 100644 --- a/docs/source/get_started/quickstart.rst +++ b/docs/source/get_started/quickstart.rst @@ -25,7 +25,7 @@ Arch's CLI allows you to manage and interact with the Arch gateway efficiently. $ python -m venv venv $ source venv/bin/activate # On Windows, use: venv\Scripts\activate - $ pip install archgw==0.3.1 + $ pip install archgw==0.3.2 Build AI Agent with Arch Gateway diff --git a/model_server/pyproject.toml b/model_server/pyproject.toml index 43702b01..fca57d76 100644 --- a/model_server/pyproject.toml +++ b/model_server/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "archgw_modelserver" -version = "0.3.1" +version = "0.3.2" description = "A model server for serving models" authors = ["Katanemo Labs, Inc "] license = "Apache 2.0"