mirror of
https://github.com/katanemo/plano.git
synced 2026-06-17 15:25:17 +02:00
Merge branch 'main' into adil/agent_format_demo
This commit is contained in:
commit
c3c161ca4c
20 changed files with 833 additions and 629 deletions
2
.github/workflows/e2e_archgw.yml
vendored
2
.github/workflows/e2e_archgw.yml
vendored
|
|
@ -30,7 +30,7 @@ jobs:
|
|||
|
||||
- name: build arch docker image
|
||||
run: |
|
||||
cd ../../ && docker build -f arch/Dockerfile . -t katanemo/archgw -t katanemo/archgw:0.3.15 -t katanemo/archgw:latest
|
||||
cd ../../ && docker build -f arch/Dockerfile . -t katanemo/archgw -t katanemo/archgw:0.3.16 -t katanemo/archgw:latest
|
||||
|
||||
- name: start archgw
|
||||
env:
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@ jobs:
|
|||
|
||||
- name: build arch docker image
|
||||
run: |
|
||||
docker build -f arch/Dockerfile . -t katanemo/archgw -t katanemo/archgw:0.3.15
|
||||
docker build -f arch/Dockerfile . -t katanemo/archgw -t katanemo/archgw:0.3.16
|
||||
|
||||
- name: install poetry
|
||||
run: |
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@ jobs:
|
|||
|
||||
- name: build arch docker image
|
||||
run: |
|
||||
docker build -f arch/Dockerfile . -t katanemo/archgw -t katanemo/archgw:0.3.15
|
||||
docker build -f arch/Dockerfile . -t katanemo/archgw -t katanemo/archgw:0.3.16
|
||||
|
||||
- name: install poetry
|
||||
run: |
|
||||
|
|
|
|||
2
.github/workflows/validate_arch_config.yml
vendored
2
.github/workflows/validate_arch_config.yml
vendored
|
|
@ -24,7 +24,7 @@ jobs:
|
|||
|
||||
- name: build arch docker image
|
||||
run: |
|
||||
docker build -f arch/Dockerfile . -t katanemo/archgw -t katanemo/archgw:0.3.15
|
||||
docker build -f arch/Dockerfile . -t katanemo/archgw -t katanemo/archgw:0.3.16
|
||||
|
||||
- name: validate arch config
|
||||
run: |
|
||||
|
|
|
|||
10
README.md
10
README.md
|
|
@ -4,8 +4,8 @@
|
|||
<div align="center">
|
||||
|
||||
|
||||
_Arch is a smart edge and LLM proxy server for agents._<br><br>
|
||||
Arch handles the *pesky low-level work* in building agentic apps — like applying guardrails, clarifying vague user input, routing prompts to the right agent, and unifying access to any LLM. It’s a language and framework friendly infrastructure layer designed to help you build and ship agentic apps faster.
|
||||
_Arch is a models-native (edge and service) proxy server for agents._<br><br>
|
||||
Arch handles the *pesky plumbing work* in building AI agents — like applying guardrails, routing prompts to the right agent, generating hyper-rich information traces for RL, and unifying access to any LLM. It’s a language and framework friendly infrastructure layer designed to help you build and ship agentic apps faster.
|
||||
|
||||
|
||||
[Quickstart](#Quickstart) •
|
||||
|
|
@ -24,7 +24,7 @@ _Arch is a smart edge and LLM proxy server for agents._<br><br>
|
|||
</div>
|
||||
|
||||
# About The Latest Release:
|
||||
[0.3.15] [Preference-aware multi LLM routing for Claude Code 2.0](demos/use_cases/claude_code_router/README.md) <br><img src="docs/source/_static/img/claude_code_router.png" alt="high-level network architecture for ArchGW" width="50%">
|
||||
[0.3.16] [Preference-aware multi LLM routing for Claude Code 2.0](demos/use_cases/claude_code_router/README.md) <br><img src="docs/source/_static/img/claude_code_router.png" alt="high-level network architecture for ArchGW" width="50%">
|
||||
|
||||
|
||||
# Overview
|
||||
|
|
@ -87,7 +87,7 @@ Arch's CLI allows you to manage and interact with the Arch gateway efficiently.
|
|||
```console
|
||||
$ python3.12 -m venv venv
|
||||
$ source venv/bin/activate # On Windows, use: venv\Scripts\activate
|
||||
$ pip install archgw==0.3.15
|
||||
$ pip install archgw==0.3.16
|
||||
```
|
||||
|
||||
### Use Arch as a LLM Router
|
||||
|
|
@ -276,7 +276,7 @@ endpoints:
|
|||
```sh
|
||||
|
||||
$ archgw up arch_config.yaml
|
||||
2024-12-05 16:56:27,979 - cli.main - INFO - Starting archgw cli version: 0.3.15
|
||||
2024-12-05 16:56:27,979 - cli.main - INFO - Starting archgw cli version: 0.3.16
|
||||
2024-12-05 16:56:28,485 - cli.utils - INFO - Schema validation successful!
|
||||
2024-12-05 16:56:28,485 - cli.main - INFO - Starting arch model server and arch gateway
|
||||
2024-12-05 16:56:51,647 - cli.core - INFO - Container is healthy!
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ source venv/bin/activate
|
|||
|
||||
### Step 3: Run the build script
|
||||
```bash
|
||||
pip install archgw==0.3.15
|
||||
pip install archgw==0.3.16
|
||||
```
|
||||
|
||||
## Uninstall Instructions: archgw CLI
|
||||
|
|
|
|||
|
|
@ -10,4 +10,4 @@ SERVICE_NAME_MODEL_SERVER = "model_server"
|
|||
SERVICE_ALL = "all"
|
||||
MODEL_SERVER_LOG_FILE = "~/archgw_logs/modelserver.log"
|
||||
ARCHGW_DOCKER_NAME = "archgw"
|
||||
ARCHGW_DOCKER_IMAGE = os.getenv("ARCHGW_DOCKER_IMAGE", "katanemo/archgw:0.3.15")
|
||||
ARCHGW_DOCKER_IMAGE = os.getenv("ARCHGW_DOCKER_IMAGE", "katanemo/archgw:0.3.16")
|
||||
|
|
|
|||
|
|
@ -35,8 +35,6 @@ def _get_gateway_ports(arch_config_file: str) -> list[int]:
|
|||
with open(arch_config_file) as f:
|
||||
arch_config_dict = yaml.safe_load(f)
|
||||
|
||||
print("arch config dict json string: ", json.dumps(arch_config_dict))
|
||||
|
||||
listeners, _, _ = convert_legacy_listeners(
|
||||
arch_config_dict.get("listeners"), arch_config_dict.get("llm_providers")
|
||||
)
|
||||
|
|
@ -82,15 +80,11 @@ def start_arch(arch_config_file, env, log_timeout=120, foreground=False):
|
|||
while True:
|
||||
all_listeners_healthy = True
|
||||
for port in gateway_ports:
|
||||
log.info(f"Checking health endpoint on port {port}")
|
||||
health_check_status = health_check_endpoint(
|
||||
f"http://localhost:{port}/healthz"
|
||||
)
|
||||
if health_check_status:
|
||||
log.info(f"Gateway on port {port} is healthy!")
|
||||
else:
|
||||
if not health_check_status:
|
||||
all_listeners_healthy = False
|
||||
log.info(f"Gateway on port {port} is not healthy yet.")
|
||||
|
||||
archgw_status = docker_container_status(ARCHGW_DOCKER_NAME)
|
||||
current_time = time.time()
|
||||
|
|
@ -111,7 +105,12 @@ def start_arch(arch_config_file, env, log_timeout=120, foreground=False):
|
|||
log.info("archgw is running and is healthy!")
|
||||
break
|
||||
else:
|
||||
log.info(f"archgw status: {archgw_status}, health status: starting")
|
||||
health_check_status_str = (
|
||||
"healthy" if health_check_status else "not healthy"
|
||||
)
|
||||
log.info(
|
||||
f"archgw status: {archgw_status}, health status: {health_check_status_str}"
|
||||
)
|
||||
time.sleep(1)
|
||||
|
||||
if foreground:
|
||||
|
|
|
|||
112
arch/tools/poetry.lock
generated
112
arch/tools/poetry.lock
generated
|
|
@ -2,7 +2,7 @@
|
|||
|
||||
[[package]]
|
||||
name = "archgw_modelserver"
|
||||
version = "0.3.15"
|
||||
version = "0.3.16"
|
||||
description = "A model server for serving models"
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
|
|
@ -15,23 +15,15 @@ url = "../../model_server"
|
|||
|
||||
[[package]]
|
||||
name = "attrs"
|
||||
version = "25.3.0"
|
||||
version = "25.4.0"
|
||||
description = "Classes Without Boilerplate"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
python-versions = ">=3.9"
|
||||
files = [
|
||||
{file = "attrs-25.3.0-py3-none-any.whl", hash = "sha256:427318ce031701fea540783410126f03899a97ffc6f61596ad581ac2e40e3bc3"},
|
||||
{file = "attrs-25.3.0.tar.gz", hash = "sha256:75d7cefc7fb576747b2c81b4442d4d4a1ce0900973527c011d1030fd3bf4af1b"},
|
||||
{file = "attrs-25.4.0-py3-none-any.whl", hash = "sha256:adcf7e2a1fb3b36ac48d97835bb6d8ade15b8dcce26aba8bf1d14847b57a3373"},
|
||||
{file = "attrs-25.4.0.tar.gz", hash = "sha256:16d5969b87f0859ef33a48b35d55ac1be6e42ae49d5e853b597db70c35c57e11"},
|
||||
]
|
||||
|
||||
[package.extras]
|
||||
benchmark = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-codspeed", "pytest-mypy-plugins", "pytest-xdist[psutil]"]
|
||||
cov = ["cloudpickle", "coverage[toml] (>=5.3)", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"]
|
||||
dev = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pre-commit-uv", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"]
|
||||
docs = ["cogapp", "furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier"]
|
||||
tests = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"]
|
||||
tests-mypy = ["mypy (>=1.11.1)", "pytest-mypy-plugins"]
|
||||
|
||||
[[package]]
|
||||
name = "click"
|
||||
version = "8.3.0"
|
||||
|
|
@ -76,13 +68,13 @@ test = ["pytest (>=6)"]
|
|||
|
||||
[[package]]
|
||||
name = "iniconfig"
|
||||
version = "2.1.0"
|
||||
version = "2.3.0"
|
||||
description = "brain-dead simple config-ini parsing"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
python-versions = ">=3.10"
|
||||
files = [
|
||||
{file = "iniconfig-2.1.0-py3-none-any.whl", hash = "sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760"},
|
||||
{file = "iniconfig-2.1.0.tar.gz", hash = "sha256:3abbd2e30b36733fee78f9c7f7308f2d0050e88f0087fd25c2645f63c773e1c7"},
|
||||
{file = "iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12"},
|
||||
{file = "iniconfig-2.3.0.tar.gz", hash = "sha256:c76315c77db068650d49c5b56314774a7804df16fee4402c1f19d6d15d8c4730"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -382,13 +374,13 @@ files = [
|
|||
|
||||
[[package]]
|
||||
name = "referencing"
|
||||
version = "0.36.2"
|
||||
version = "0.37.0"
|
||||
description = "JSON Referencing + Python"
|
||||
optional = false
|
||||
python-versions = ">=3.9"
|
||||
python-versions = ">=3.10"
|
||||
files = [
|
||||
{file = "referencing-0.36.2-py3-none-any.whl", hash = "sha256:e8699adbbf8b5c7de96d8ffa0eb5c158b3beafce084968e2ea8bb08c6794dcd0"},
|
||||
{file = "referencing-0.36.2.tar.gz", hash = "sha256:df2e89862cd09deabbdba16944cc3f10feb6b3e6f18e902f7cc25609a34775aa"},
|
||||
{file = "referencing-0.37.0-py3-none-any.whl", hash = "sha256:381329a9f99628c9069361716891d34ad94af76e461dcb0335825aecc7692231"},
|
||||
{file = "referencing-0.37.0.tar.gz", hash = "sha256:44aefc3142c5b842538163acb373e24cce6632bd54bdb01b21ad5863489f50d8"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
|
|
@ -582,43 +574,53 @@ type = ["importlib-metadata (>=7.0.2)", "jaraco.develop (>=7.21)", "mypy (>=1.12
|
|||
|
||||
[[package]]
|
||||
name = "tomli"
|
||||
version = "2.2.1"
|
||||
version = "2.3.0"
|
||||
description = "A lil' TOML parser"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
files = [
|
||||
{file = "tomli-2.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678e4fa69e4575eb77d103de3df8a895e1591b48e740211bd1067378c69e8249"},
|
||||
{file = "tomli-2.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6"},
|
||||
{file = "tomli-2.2.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ece47d672db52ac607a3d9599a9d48dcb2f2f735c6c2d1f34130085bb12b112a"},
|
||||
{file = "tomli-2.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6972ca9c9cc9f0acaa56a8ca1ff51e7af152a9f87fb64623e31d5c83700080ee"},
|
||||
{file = "tomli-2.2.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c954d2250168d28797dd4e3ac5cf812a406cd5a92674ee4c8f123c889786aa8e"},
|
||||
{file = "tomli-2.2.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8dd28b3e155b80f4d54beb40a441d366adcfe740969820caf156c019fb5c7ec4"},
|
||||
{file = "tomli-2.2.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:e59e304978767a54663af13c07b3d1af22ddee3bb2fb0618ca1593e4f593a106"},
|
||||
{file = "tomli-2.2.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:33580bccab0338d00994d7f16f4c4ec25b776af3ffaac1ed74e0b3fc95e885a8"},
|
||||
{file = "tomli-2.2.1-cp311-cp311-win32.whl", hash = "sha256:465af0e0875402f1d226519c9904f37254b3045fc5084697cefb9bdde1ff99ff"},
|
||||
{file = "tomli-2.2.1-cp311-cp311-win_amd64.whl", hash = "sha256:2d0f2fdd22b02c6d81637a3c95f8cd77f995846af7414c5c4b8d0545afa1bc4b"},
|
||||
{file = "tomli-2.2.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:4a8f6e44de52d5e6c657c9fe83b562f5f4256d8ebbfe4ff922c495620a7f6cea"},
|
||||
{file = "tomli-2.2.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8d57ca8095a641b8237d5b079147646153d22552f1c637fd3ba7f4b0b29167a8"},
|
||||
{file = "tomli-2.2.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4e340144ad7ae1533cb897d406382b4b6fede8890a03738ff1683af800d54192"},
|
||||
{file = "tomli-2.2.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:db2b95f9de79181805df90bedc5a5ab4c165e6ec3fe99f970d0e302f384ad222"},
|
||||
{file = "tomli-2.2.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:40741994320b232529c802f8bc86da4e1aa9f413db394617b9a256ae0f9a7f77"},
|
||||
{file = "tomli-2.2.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:400e720fe168c0f8521520190686ef8ef033fb19fc493da09779e592861b78c6"},
|
||||
{file = "tomli-2.2.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:02abe224de6ae62c19f090f68da4e27b10af2b93213d36cf44e6e1c5abd19fdd"},
|
||||
{file = "tomli-2.2.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b82ebccc8c8a36f2094e969560a1b836758481f3dc360ce9a3277c65f374285e"},
|
||||
{file = "tomli-2.2.1-cp312-cp312-win32.whl", hash = "sha256:889f80ef92701b9dbb224e49ec87c645ce5df3fa2cc548664eb8a25e03127a98"},
|
||||
{file = "tomli-2.2.1-cp312-cp312-win_amd64.whl", hash = "sha256:7fc04e92e1d624a4a63c76474610238576942d6b8950a2d7f908a340494e67e4"},
|
||||
{file = "tomli-2.2.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f4039b9cbc3048b2416cc57ab3bda989a6fcf9b36cf8937f01a6e731b64f80d7"},
|
||||
{file = "tomli-2.2.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:286f0ca2ffeeb5b9bd4fcc8d6c330534323ec51b2f52da063b11c502da16f30c"},
|
||||
{file = "tomli-2.2.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a92ef1a44547e894e2a17d24e7557a5e85a9e1d0048b0b5e7541f76c5032cb13"},
|
||||
{file = "tomli-2.2.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9316dc65bed1684c9a98ee68759ceaed29d229e985297003e494aa825ebb0281"},
|
||||
{file = "tomli-2.2.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e85e99945e688e32d5a35c1ff38ed0b3f41f43fad8df0bdf79f72b2ba7bc5272"},
|
||||
{file = "tomli-2.2.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ac065718db92ca818f8d6141b5f66369833d4a80a9d74435a268c52bdfa73140"},
|
||||
{file = "tomli-2.2.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:d920f33822747519673ee656a4b6ac33e382eca9d331c87770faa3eef562aeb2"},
|
||||
{file = "tomli-2.2.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a198f10c4d1b1375d7687bc25294306e551bf1abfa4eace6650070a5c1ae2744"},
|
||||
{file = "tomli-2.2.1-cp313-cp313-win32.whl", hash = "sha256:d3f5614314d758649ab2ab3a62d4f2004c825922f9e370b29416484086b264ec"},
|
||||
{file = "tomli-2.2.1-cp313-cp313-win_amd64.whl", hash = "sha256:a38aa0308e754b0e3c67e344754dff64999ff9b513e691d0e786265c93583c69"},
|
||||
{file = "tomli-2.2.1-py3-none-any.whl", hash = "sha256:cb55c73c5f4408779d0cf3eef9f762b9c9f147a77de7b258bef0a5628adc85cc"},
|
||||
{file = "tomli-2.2.1.tar.gz", hash = "sha256:cd45e1dc79c835ce60f7404ec8119f2eb06d38b1deba146f07ced3bbc44505ff"},
|
||||
{file = "tomli-2.3.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:88bd15eb972f3664f5ed4b57c1634a97153b4bac4479dcb6a495f41921eb7f45"},
|
||||
{file = "tomli-2.3.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:883b1c0d6398a6a9d29b508c331fa56adbcdff647f6ace4dfca0f50e90dfd0ba"},
|
||||
{file = "tomli-2.3.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d1381caf13ab9f300e30dd8feadb3de072aeb86f1d34a8569453ff32a7dea4bf"},
|
||||
{file = "tomli-2.3.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a0e285d2649b78c0d9027570d4da3425bdb49830a6156121360b3f8511ea3441"},
|
||||
{file = "tomli-2.3.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:0a154a9ae14bfcf5d8917a59b51ffd5a3ac1fd149b71b47a3a104ca4edcfa845"},
|
||||
{file = "tomli-2.3.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:74bf8464ff93e413514fefd2be591c3b0b23231a77f901db1eb30d6f712fc42c"},
|
||||
{file = "tomli-2.3.0-cp311-cp311-win32.whl", hash = "sha256:00b5f5d95bbfc7d12f91ad8c593a1659b6387b43f054104cda404be6bda62456"},
|
||||
{file = "tomli-2.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:4dc4ce8483a5d429ab602f111a93a6ab1ed425eae3122032db7e9acf449451be"},
|
||||
{file = "tomli-2.3.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d7d86942e56ded512a594786a5ba0a5e521d02529b3826e7761a05138341a2ac"},
|
||||
{file = "tomli-2.3.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:73ee0b47d4dad1c5e996e3cd33b8a76a50167ae5f96a2607cbe8cc773506ab22"},
|
||||
{file = "tomli-2.3.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:792262b94d5d0a466afb5bc63c7daa9d75520110971ee269152083270998316f"},
|
||||
{file = "tomli-2.3.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4f195fe57ecceac95a66a75ac24d9d5fbc98ef0962e09b2eddec5d39375aae52"},
|
||||
{file = "tomli-2.3.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e31d432427dcbf4d86958c184b9bfd1e96b5b71f8eb17e6d02531f434fd335b8"},
|
||||
{file = "tomli-2.3.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:7b0882799624980785240ab732537fcfc372601015c00f7fc367c55308c186f6"},
|
||||
{file = "tomli-2.3.0-cp312-cp312-win32.whl", hash = "sha256:ff72b71b5d10d22ecb084d345fc26f42b5143c5533db5e2eaba7d2d335358876"},
|
||||
{file = "tomli-2.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:1cb4ed918939151a03f33d4242ccd0aa5f11b3547d0cf30f7c74a408a5b99878"},
|
||||
{file = "tomli-2.3.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:5192f562738228945d7b13d4930baffda67b69425a7f0da96d360b0a3888136b"},
|
||||
{file = "tomli-2.3.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:be71c93a63d738597996be9528f4abe628d1adf5e6eb11607bc8fe1a510b5dae"},
|
||||
{file = "tomli-2.3.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c4665508bcbac83a31ff8ab08f424b665200c0e1e645d2bd9ab3d3e557b6185b"},
|
||||
{file = "tomli-2.3.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4021923f97266babc6ccab9f5068642a0095faa0a51a246a6a02fccbb3514eaf"},
|
||||
{file = "tomli-2.3.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a4ea38c40145a357d513bffad0ed869f13c1773716cf71ccaa83b0fa0cc4e42f"},
|
||||
{file = "tomli-2.3.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ad805ea85eda330dbad64c7ea7a4556259665bdf9d2672f5dccc740eb9d3ca05"},
|
||||
{file = "tomli-2.3.0-cp313-cp313-win32.whl", hash = "sha256:97d5eec30149fd3294270e889b4234023f2c69747e555a27bd708828353ab606"},
|
||||
{file = "tomli-2.3.0-cp313-cp313-win_amd64.whl", hash = "sha256:0c95ca56fbe89e065c6ead5b593ee64b84a26fca063b5d71a1122bf26e533999"},
|
||||
{file = "tomli-2.3.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:cebc6fe843e0733ee827a282aca4999b596241195f43b4cc371d64fc6639da9e"},
|
||||
{file = "tomli-2.3.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:4c2ef0244c75aba9355561272009d934953817c49f47d768070c3c94355c2aa3"},
|
||||
{file = "tomli-2.3.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c22a8bf253bacc0cf11f35ad9808b6cb75ada2631c2d97c971122583b129afbc"},
|
||||
{file = "tomli-2.3.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0eea8cc5c5e9f89c9b90c4896a8deefc74f518db5927d0e0e8d4a80953d774d0"},
|
||||
{file = "tomli-2.3.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:b74a0e59ec5d15127acdabd75ea17726ac4c5178ae51b85bfe39c4f8a278e879"},
|
||||
{file = "tomli-2.3.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:b5870b50c9db823c595983571d1296a6ff3e1b88f734a4c8f6fc6188397de005"},
|
||||
{file = "tomli-2.3.0-cp314-cp314-win32.whl", hash = "sha256:feb0dacc61170ed7ab602d3d972a58f14ee3ee60494292d384649a3dc38ef463"},
|
||||
{file = "tomli-2.3.0-cp314-cp314-win_amd64.whl", hash = "sha256:b273fcbd7fc64dc3600c098e39136522650c49bca95df2d11cf3b626422392c8"},
|
||||
{file = "tomli-2.3.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:940d56ee0410fa17ee1f12b817b37a4d4e4dc4d27340863cc67236c74f582e77"},
|
||||
{file = "tomli-2.3.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:f85209946d1fe94416debbb88d00eb92ce9cd5266775424ff81bc959e001acaf"},
|
||||
{file = "tomli-2.3.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a56212bdcce682e56b0aaf79e869ba5d15a6163f88d5451cbde388d48b13f530"},
|
||||
{file = "tomli-2.3.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c5f3ffd1e098dfc032d4d3af5c0ac64f6d286d98bc148698356847b80fa4de1b"},
|
||||
{file = "tomli-2.3.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:5e01decd096b1530d97d5d85cb4dff4af2d8347bd35686654a004f8dea20fc67"},
|
||||
{file = "tomli-2.3.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:8a35dd0e643bb2610f156cca8db95d213a90015c11fee76c946aa62b7ae7e02f"},
|
||||
{file = "tomli-2.3.0-cp314-cp314t-win32.whl", hash = "sha256:a1f7f282fe248311650081faafa5f4732bdbfef5d45fe3f2e702fbc6f2d496e0"},
|
||||
{file = "tomli-2.3.0-cp314-cp314t-win_amd64.whl", hash = "sha256:70a251f8d4ba2d9ac2542eecf008b3c8a9fc5c3f9f02c56a9d7952612be2fdba"},
|
||||
{file = "tomli-2.3.0-py3-none-any.whl", hash = "sha256:e95b1af3c5b07d9e643909b5abbec77cd9f1217e6d0bca72b0234736b9fb1f1b"},
|
||||
{file = "tomli-2.3.0.tar.gz", hash = "sha256:64be704a875d2a59753d80ee8a533c3fe183e3f06807ff7dc2232938ccb01549"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -635,4 +637,4 @@ files = [
|
|||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = "^3.10"
|
||||
content-hash = "70deeecce65e4760e1a50c3d232004842cbd6397ef45163d315269ac7de8e9fc"
|
||||
content-hash = "e354a32ce728458172faf8b95f36b3d4070f5055dbe6c9606d20882b45474c8c"
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
[tool.poetry]
|
||||
name = "archgw"
|
||||
version = "0.3.15"
|
||||
version = "0.3.16"
|
||||
description = "Python-based CLI tool to manage Arch Gateway."
|
||||
authors = ["Katanemo Labs, Inc."]
|
||||
packages = [
|
||||
|
|
@ -10,7 +10,7 @@ readme = "README.md"
|
|||
|
||||
[tool.poetry.dependencies]
|
||||
python = "^3.10"
|
||||
archgw_modelserver = "^0.3.15"
|
||||
archgw_modelserver = "^0.3.16"
|
||||
click = "^8.1.7"
|
||||
jinja2 = "^3.1.4"
|
||||
jsonschema = "^4.23.0"
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ python = ">=3.10,<3.13.3"
|
|||
pydantic = "^2.0"
|
||||
openai = "^1.0"
|
||||
pyyaml = "^6.0"
|
||||
archgw ="^0.3.15"
|
||||
archgw ="^0.3.16"
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
pytest = "^8.3"
|
||||
|
|
|
|||
|
|
@ -14,9 +14,9 @@ Make sure your machine is up to date with [latest version of archgw]([url](https
|
|||
2. start archgw in the foreground
|
||||
```bash
|
||||
(venv) $ archgw up --service archgw --foreground
|
||||
2025-05-30 18:00:09,953 - cli.main - INFO - Starting archgw cli version: 0.3.15
|
||||
2025-05-30 18:00:09,953 - cli.main - INFO - Starting archgw cli version: 0.3.16
|
||||
2025-05-30 18:00:09,953 - cli.main - INFO - Validating /Users/adilhafeez/src/intelligent-prompt-gateway/demos/use_cases/preference_based_routing/arch_config.yaml
|
||||
2025-05-30 18:00:10,422 - cli.core - INFO - Starting arch gateway, image name: archgw, tag: katanemo/archgw:0.3.15
|
||||
2025-05-30 18:00:10,422 - cli.core - INFO - Starting arch gateway, image name: archgw, tag: katanemo/archgw:0.3.16
|
||||
2025-05-30 18:00:10,662 - cli.core - INFO - archgw status: running, health status: starting
|
||||
2025-05-30 18:00:11,712 - cli.core - INFO - archgw status: running, health status: starting
|
||||
2025-05-30 18:00:12,761 - cli.core - INFO - archgw is running and is healthy!
|
||||
|
|
|
|||
Binary file not shown.
|
Before Width: | Height: | Size: 217 KiB After Width: | Height: | Size: 181 KiB |
|
|
@ -15,7 +15,7 @@ from sphinxawesome_theme.postprocess import Icons
|
|||
project = "Arch Docs"
|
||||
copyright = "2025, Katanemo Labs, Inc"
|
||||
author = "Katanemo Labs, Inc"
|
||||
release = " v0.3.15"
|
||||
release = " v0.3.16"
|
||||
|
||||
# -- General configuration ---------------------------------------------------
|
||||
# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
|
||||
|
|
|
|||
|
|
@ -12,8 +12,8 @@ Prerequisites
|
|||
Before you begin, ensure you have the following:
|
||||
|
||||
1. `Docker System <https://docs.docker.com/get-started/get-docker/>`_ (v24)
|
||||
2. `Docker compose <https://docs.docker.com/compose/install/>`_ (v2.29)
|
||||
3. `Python <https://www.python.org/downloads/>`_ (v3.12)
|
||||
2. `Docker Compose <https://docs.docker.com/compose/install/>`_ (v2.29)
|
||||
3. `Python <https://www.python.org/downloads/>`_ (v3.10+)
|
||||
|
||||
Arch's CLI allows you to manage and interact with the Arch gateway efficiently. To install the CLI, simply run the following command:
|
||||
|
||||
|
|
@ -25,7 +25,7 @@ Arch's CLI allows you to manage and interact with the Arch gateway efficiently.
|
|||
|
||||
$ python -m venv venv
|
||||
$ source venv/bin/activate # On Windows, use: venv\Scripts\activate
|
||||
$ pip install archgw==0.3.15
|
||||
$ pip install archgw==0.3.16
|
||||
|
||||
|
||||
Build AI Agent with Arch Gateway
|
||||
|
|
|
|||
|
|
@ -7,14 +7,9 @@ Welcome to Arch!
|
|||
|
||||
.. raw:: html
|
||||
|
||||
<div style="text-align: center; font-size: 1.25rem;">
|
||||
<br>
|
||||
<p>Build <strong>faster</strong>, <strong>multi-LLM</strong> agents for the <strong>enterprise</strong>.</p>
|
||||
</div>
|
||||
|
||||
<a href="https://www.producthunt.com/posts/arch-3?embed=true&utm_source=badge-top-post-badge&utm_medium=badge&utm_souce=badge-arch-3" target="_blank"><img src="https://api.producthunt.com/widgets/embed-image/v1/top-post-badge.svg?post_id=565761&theme=dark&period=daily&t=1742433071161" alt="Arch - Build fast, hyper-personalized agents with intelligent infra | Product Hunt" style="width: 250px; height: 54px;" width="250" height="54" /></a>
|
||||
|
||||
`Arch <https://github.com/katanemo/arch>`_ is a smart edge and AI gateway for AI-native apps - one that is natively designed to handle and process prompts, not just network traffic.
|
||||
`Arch <https://github.com/katanemo/arch>`_ is a models-native edge and LLM proxy/gateway for AI agents - one that is natively designed to handle and process prompts, not just network traffic.
|
||||
|
||||
Built by contributors to the widely adopted `Envoy Proxy <https://www.envoyproxy.io/>`_, Arch handles the *pesky low-level work* in building agentic apps — like applying guardrails, clarifying vague user input, routing prompts to the right agent, and unifying access to any LLM. It’s a language and framework friendly infrastructure layer designed to help you build and ship agentic apps faster.
|
||||
|
||||
|
|
@ -73,4 +68,5 @@ Built by contributors to the widely adopted `Envoy Proxy <https://www.envoyproxy
|
|||
:titlesonly:
|
||||
:maxdepth: 2
|
||||
|
||||
resources/deployment
|
||||
resources/configuration_reference
|
||||
|
|
|
|||
121
docs/source/resources/deployment.rst
Normal file
121
docs/source/resources/deployment.rst
Normal file
|
|
@ -0,0 +1,121 @@
|
|||
.. _deployment:
|
||||
|
||||
Deployment
|
||||
==========
|
||||
|
||||
This guide shows how to deploy Arch directly using Docker without the archgw CLI, including basic runtime checks for routing and health monitoring.
|
||||
|
||||
Docker Deployment
|
||||
-----------------
|
||||
|
||||
Below is a minimal, production-ready example showing how to deploy the Arch Docker image directly and run basic runtime checks. Adjust image names, tags, and the ``arch_config.yaml`` path to match your environment.
|
||||
|
||||
.. note::
|
||||
You will need to pass all required environment variables that are referenced in your ``arch_config.yaml`` file.
|
||||
|
||||
For ``arch_config.yaml``, you can use any sample configuration defined earlier in the documentation. For example, you can try the :ref:`LLM Routing <llm_router>` sample config.
|
||||
|
||||
Docker Compose Setup
|
||||
~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Create a ``docker-compose.yml`` file with the following configuration:
|
||||
|
||||
.. code-block:: yaml
|
||||
|
||||
# docker-compose.yml
|
||||
services:
|
||||
archgw:
|
||||
image: katanemo/archgw:0.3.16
|
||||
container_name: archgw
|
||||
ports:
|
||||
- "10000:10000" # ingress (client -> arch)
|
||||
- "12000:12000" # egress (arch -> upstream/llm proxy)
|
||||
volumes:
|
||||
- ./arch_config.yaml:/app/arch_config.yaml:ro
|
||||
environment:
|
||||
- OPENAI_API_KEY=${OPENAI_API_KEY:?error}
|
||||
- ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:?error}
|
||||
- MODEL_SERVER_PORT=51000
|
||||
|
||||
Starting the Stack
|
||||
~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Start the services from the directory containing ``docker-compose.yml`` and ``arch_config.yaml``:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
# Set required environment variables and start services
|
||||
OPENAI_API_KEY=xxx ANTHROPIC_API_KEY=yyy docker compose up -d
|
||||
|
||||
Check container health and logs:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
docker compose ps
|
||||
docker compose logs -f archgw
|
||||
|
||||
Runtime Tests
|
||||
-------------
|
||||
|
||||
Perform basic runtime tests to verify routing and functionality.
|
||||
|
||||
Gateway Smoke Test
|
||||
~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Test the chat completion endpoint with automatic routing:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
# Request handled by the gateway. 'model: "none"' lets Arch decide routing
|
||||
curl --header 'Content-Type: application/json' \
|
||||
--data '{"messages":[{"role":"user","content":"tell me a joke"}], "model":"none"}' \
|
||||
http://localhost:12000/v1/chat/completions | jq .model
|
||||
|
||||
Expected output:
|
||||
|
||||
.. code-block:: json
|
||||
|
||||
"gpt-4o-2024-08-06"
|
||||
|
||||
Model-Based Routing
|
||||
~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Test explicit provider and model routing:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
curl -s -H "Content-Type: application/json" \
|
||||
-d '{"messages":[{"role":"user","content":"Explain quantum computing"}], "model":"anthropic/claude-3-5-sonnet-20241022"}' \
|
||||
http://localhost:12000/v1/chat/completions | jq .model
|
||||
|
||||
Expected output:
|
||||
|
||||
.. code-block:: json
|
||||
|
||||
"claude-3-5-sonnet-20241022"
|
||||
|
||||
Troubleshooting
|
||||
---------------
|
||||
|
||||
Common Issues and Solutions
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
**Environment Variables**
|
||||
Ensure all environment variables (``OPENAI_API_KEY``, ``ANTHROPIC_API_KEY``, etc.) used by ``arch_config.yaml`` are set before starting services.
|
||||
|
||||
**TLS/Connection Errors**
|
||||
If you encounter TLS or connection errors to upstream providers:
|
||||
|
||||
- Check DNS resolution
|
||||
- Verify proxy settings
|
||||
- Confirm correct protocol and port in your ``arch_config`` endpoints
|
||||
|
||||
**Verbose Logging**
|
||||
To enable more detailed logs for debugging:
|
||||
|
||||
- Run archgw with a higher component log level
|
||||
- See the :ref:`Observability <observability>` guide for logging and monitoring details
|
||||
- Rebuild the image if required with updated log configuration
|
||||
|
||||
**CI/Automated Checks**
|
||||
For continuous integration or automated testing, you can use the curl commands above as health checks in your deployment pipeline.
|
||||
1162
model_server/poetry.lock
generated
1162
model_server/poetry.lock
generated
File diff suppressed because it is too large
Load diff
|
|
@ -1,6 +1,6 @@
|
|||
[tool.poetry]
|
||||
name = "archgw_modelserver"
|
||||
version = "0.3.15"
|
||||
version = "0.3.16"
|
||||
description = "A model server for serving models"
|
||||
authors = ["Katanemo Labs, Inc <info@katanemo.com>"]
|
||||
license = "Apache 2.0"
|
||||
|
|
|
|||
|
|
@ -501,7 +501,7 @@ def test_anthropic_client_with_coding_model_alias_and_tools():
|
|||
assert text_content or len(tool_use_blocks) > 0
|
||||
|
||||
|
||||
@pytest.mark.flaky(retries=0) # Disable retries to see the actual failure
|
||||
@pytest.mark.skip("flay test - to be fixed")
|
||||
def test_anthropic_client_with_coding_model_alias_and_tools_streaming():
|
||||
"""Test Anthropic client using 'coding-model' alias (maps to Bedrock) with coding question and tools - streaming"""
|
||||
logger.info(
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue