better model names (#517)

This commit is contained in:
Adil Hafeez 2025-07-11 16:42:16 -07:00 committed by GitHub
parent 4e2355965b
commit a7fddf30f9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
55 changed files with 979 additions and 483 deletions

40
.github/workflows/arch_tools_tests.yml vendored Normal file
View file

@ -0,0 +1,40 @@
name: arch tools tests
permissions:
contents: read
on:
push:
branches:
- main
pull_request:
jobs:
arch_tools_tests:
runs-on: ubuntu-latest-m
defaults:
run:
working-directory: ./arch/tools
steps:
- name: Checkout code
uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: "3.12"
- name: install poetry
run: |
export POETRY_VERSION=1.8.5
curl -sSL https://install.python-poetry.org | python3 -
export PATH="$HOME/.local/bin:$PATH"
- name: install arch tools
run: |
poetry install
- name: run tests
run: |
poetry run pytest

5
.gitignore vendored
View file

@ -102,13 +102,10 @@ venv.bak/
# mypy
.mypy_cache/
# VSCode stuff:
.vscode/
# MacOS Metadata
*.DS_Store
*.yaml_rendered
# =========================================

View file

@ -104,10 +104,8 @@ listeners:
timeout: 30s
llm_providers:
- name: gpt-4o
access_key: $OPENAI_API_KEY
provider: openai
model: gpt-4o
- access_key: $OPENAI_API_KEY
model: openai/gpt-4o
system_prompt: |
You are a helpful assistant.
@ -204,16 +202,12 @@ listeners:
timeout: 30s
llm_providers:
- name: gpt-4o
access_key: $OPENAI_API_KEY
provider: openai
model: gpt-4o
- access_key: $OPENAI_API_KEY
model: openai/gpt-4o
default: true
- name: mistral-3b
access_key: $MISTRAL_API_KEY
provider: openai
model: mistral-3b-latest
- access_key: $MISTRAL_API_KEY
model: mistral/mistral-3b-latest
```
#### Preference-based Routing
@ -230,17 +224,18 @@ listeners:
timeout: 30s
llm_providers:
- name: code_generation
- model: openai/gpt-4.1
access_key: $OPENAI_API_KEY
provider_interface: openai
model: gpt-4.1
usage: generating new code snippets, functions, or boilerplate based on user prompts or requirements
default: true
routing_preferences:
- name: code generation
description: generating new code snippets, functions, or boilerplate based on user prompts or requirements
- name: code_understanding
provider_interface: openai
- model: openai/gpt-4o-mini
access_key: $OPENAI_API_KEY
model: gpt-4o-mini
usage: understand and explain existing code snippets, functions, or libraries
routing_preferences:
- name: code understanding
description: understand and explain existing code snippets, functions, or libraries
```
Arch uses a lightweight 1.5B autoregressive model to map prompts (and conversation context) to these policies. This approach adapts to intent drift, supports multi-turn conversations, and avoids the brittleness of embedding-based classifiers or manual if/else chains. No retraining is required when adding new models or updating policies — routing is governed entirely by human-readable rules. You can learn more about the design, benchmarks, and methodology behind preference-based routing in our paper:

View file

@ -66,11 +66,16 @@ properties:
properties:
name:
type: string
# provider field is deprecated, use provider_interface instead
provider:
access_key:
type: string
model:
type: string
default:
type: boolean
base_url:
type: string
http_host:
type: string
enum:
- openai
provider_interface:
type: string
enum:
@ -81,29 +86,22 @@ properties:
- mistral
- openai
- gemini
access_key:
type: string
model:
type: string
default:
type: boolean
# endpoint field is deprecated, use base_url instead
endpoint:
type: string
base_url:
type: string
protocol:
type: string
enum:
- http
- https
http_host:
type: string
usage:
type: string
routing_preferences:
type: array
items:
type: object
properties:
name:
type: string
description:
type: string
additionalProperties: false
required:
- name
- description
additionalProperties: false
required:
- name
- model
overrides:
type: object
properties:

View file

@ -5,16 +5,16 @@ import yaml
from jsonschema import validate
from urllib.parse import urlparse
ENVOY_CONFIG_TEMPLATE_FILE = os.getenv(
"ENVOY_CONFIG_TEMPLATE_FILE", "envoy.template.yaml"
)
ARCH_CONFIG_FILE = os.getenv("ARCH_CONFIG_FILE", "/app/arch_config.yaml")
ENVOY_CONFIG_FILE_RENDERED = os.getenv(
"ENVOY_CONFIG_FILE_RENDERED", "/etc/envoy/envoy.yaml"
)
ARCH_CONFIG_SCHEMA_FILE = os.getenv(
"ARCH_CONFIG_SCHEMA_FILE", "arch_config_schema.yaml"
)
SUPPORTED_PROVIDERS = [
"arch",
"claude",
"deepseek",
"groq",
"mistral",
"openai",
"gemini",
]
def get_endpoint_and_port(endpoint, protocol):
@ -32,8 +32,22 @@ def get_endpoint_and_port(endpoint, protocol):
def validate_and_render_schema():
env = Environment(loader=FileSystemLoader("./"))
template = env.get_template("envoy.template.yaml")
ENVOY_CONFIG_TEMPLATE_FILE = os.getenv(
"ENVOY_CONFIG_TEMPLATE_FILE", "envoy.template.yaml"
)
ARCH_CONFIG_FILE = os.getenv("ARCH_CONFIG_FILE", "/app/arch_config.yaml")
ARCH_CONFIG_FILE_RENDERED = os.getenv(
"ARCH_CONFIG_FILE_RENDERED", "/app/arch_config_rendered.yaml"
)
ENVOY_CONFIG_FILE_RENDERED = os.getenv(
"ENVOY_CONFIG_FILE_RENDERED", "/etc/envoy/envoy.yaml"
)
ARCH_CONFIG_SCHEMA_FILE = os.getenv(
"ARCH_CONFIG_SCHEMA_FILE", "arch_config_schema.yaml"
)
env = Environment(loader=FileSystemLoader(os.getenv("TEMPLATE_ROOT", "./")))
template = env.get_template(ENVOY_CONFIG_TEMPLATE_FILE)
try:
validate_prompt_config(ARCH_CONFIG_FILE, ARCH_CONFIG_SCHEMA_FILE)
@ -82,6 +96,8 @@ def validate_and_render_schema():
updated_llm_providers = []
llm_provider_name_set = set()
llms_with_usage = []
model_name_keys = set()
model_usage_name_keys = set()
for llm_provider in config_yaml["llm_providers"]:
if llm_provider.get("usage", None):
llms_with_usage.append(llm_provider["name"])
@ -89,10 +105,52 @@ def validate_and_render_schema():
raise Exception(
f"Duplicate llm_provider name {llm_provider.get('name')}, please provide unique name for each llm_provider"
)
if llm_provider.get("name") is None:
model_name = llm_provider.get("model")
if model_name in model_name_keys:
raise Exception(
f"llm_provider name is required, please provide name for llm_provider"
f"Duplicate model name {model_name}, please provide unique model name for each llm_provider"
)
model_name_keys.add(model_name)
if llm_provider.get("name") is None:
llm_provider["name"] = model_name
model_name_tokens = model_name.split("/")
if len(model_name_tokens) < 2:
raise Exception(
f"Invalid model name {model_name}. Please provide model name in the format <provider>/<model_id>."
)
provider = model_name_tokens[0]
model_id = "/".join(model_name_tokens[1:])
if provider not in SUPPORTED_PROVIDERS:
if (
llm_provider.get("base_url", None) is None
or llm_provider.get("provider_interface", None) is None
):
raise Exception(
f"Must provide base_url and provider_interface for unsupported provider {provider} for model {model_name}. Supported providers are: {', '.join(SUPPORTED_PROVIDERS)}"
)
provider = llm_provider.get("provider_interface", None)
elif llm_provider.get("provider_interface", None) is not None:
raise Exception(
f"Please provide provider interface as part of model name {model_name} using the format <provider>/<model_id>. For example, use 'openai/gpt-3.5-turbo' instead of 'gpt-3.5-turbo' "
)
if model_id in model_name_keys:
raise Exception(
f"Duplicate model_id {model_id}, please provide unique model_id for each llm_provider"
)
model_name_keys.add(model_id)
for routing_preference in llm_provider.get("routing_preferences", []):
if routing_preference.get("name") in model_usage_name_keys:
raise Exception(
f"Duplicate routing preference name \"{routing_preference.get('name')}\", please provide unique name for each routing preference"
)
model_usage_name_keys.add(routing_preference.get("name"))
llm_provider["model"] = model_id
llm_provider["provider_interface"] = provider
llm_provider_name_set.add(llm_provider.get("name"))
provider = None
if llm_provider.get("provider") and llm_provider.get("provider_interface"):
@ -105,21 +163,14 @@ def validate_and_render_schema():
del llm_provider["provider"]
updated_llm_providers.append(llm_provider)
if llm_provider.get("endpoint") and llm_provider.get("base_url"):
raise Exception("Please provide either endpoint or base_url, not both")
if llm_provider.get("endpoint", None):
endpoint = llm_provider["endpoint"]
protocol = llm_provider.get("protocol", "http")
llm_provider["endpoint"], llm_provider["port"] = get_endpoint_and_port(
endpoint, protocol
)
llms_with_endpoint.append(llm_provider)
elif llm_provider.get("base_url", None):
if llm_provider.get("base_url", None):
base_url = llm_provider["base_url"]
urlparse_result = urlparse(base_url)
if llm_provider.get("port"):
raise Exception("Please provider port in base_url")
url_path = urlparse_result.path
if url_path and url_path != "/":
raise Exception(
f"Please provide base_url without path, got {base_url}. Use base_url like 'http://example.com' instead of 'http://example.com/path'."
)
if urlparse_result.scheme == "" or urlparse_result.scheme not in [
"http",
"https",
@ -140,7 +191,7 @@ def validate_and_render_schema():
llm_provider["protocol"] = protocol
llms_with_endpoint.append(llm_provider)
if len(llms_with_usage) > 0:
if len(model_usage_name_keys) > 0:
routing_llm_provider = config_yaml.get("routing", {}).get("llm_provider", None)
if routing_llm_provider and routing_llm_provider not in llm_provider_name_set:
raise Exception(
@ -198,6 +249,7 @@ def validate_and_render_schema():
agent_orchestrator = list(endpoints.keys())[0]
print("agent_orchestrator: ", agent_orchestrator)
data = {
"prompt_gateway_listener": prompt_gateway_listener,
"llm_gateway_listener": llm_gateway_listener,
@ -216,6 +268,9 @@ def validate_and_render_schema():
with open(ENVOY_CONFIG_FILE_RENDERED, "w") as file:
file.write(rendered)
with open(ARCH_CONFIG_FILE_RENDERED, "w") as file:
file.write(arch_config_string)
def validate_prompt_config(arch_config_file, arch_config_schema_file):
with open(arch_config_file, "r") as file:
@ -231,7 +286,7 @@ def validate_prompt_config(arch_config_file, arch_config_schema_file):
validate(config_yaml, config_schema_yaml)
except Exception as e:
print(
f"Error validating arch_config file: {arch_config_file}, schema file: {arch_config_schema_file}, error: {e.message}"
f"Error validating arch_config file: {arch_config_file}, schema file: {arch_config_schema_file}, error: {e}"
)
raise e

134
arch/tools/poetry.lock generated
View file

@ -57,6 +57,34 @@ files = [
{file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
]
[[package]]
name = "exceptiongroup"
version = "1.3.0"
description = "Backport of PEP 654 (exception groups)"
optional = false
python-versions = ">=3.7"
files = [
{file = "exceptiongroup-1.3.0-py3-none-any.whl", hash = "sha256:4d111e6e0c13d0644cad6ddaa7ed0261a0b36971f6d23e7ec9b4b9097da78a10"},
{file = "exceptiongroup-1.3.0.tar.gz", hash = "sha256:b241f5885f560bc56a59ee63ca4c6a8bfa46ae4ad651af316d4e81817bb9fd88"},
]
[package.dependencies]
typing-extensions = {version = ">=4.6.0", markers = "python_version < \"3.13\""}
[package.extras]
test = ["pytest (>=6)"]
[[package]]
name = "iniconfig"
version = "2.1.0"
description = "brain-dead simple config-ini parsing"
optional = false
python-versions = ">=3.8"
files = [
{file = "iniconfig-2.1.0-py3-none-any.whl", hash = "sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760"},
{file = "iniconfig-2.1.0.tar.gz", hash = "sha256:3abbd2e30b36733fee78f9c7f7308f2d0050e88f0087fd25c2645f63c773e1c7"},
]
[[package]]
name = "jinja2"
version = "3.1.6"
@ -179,6 +207,69 @@ files = [
{file = "markupsafe-3.0.2.tar.gz", hash = "sha256:ee55d3edf80167e48ea11a923c7386f4669df67d7994554387f84e7d8b0a2bf0"},
]
[[package]]
name = "packaging"
version = "25.0"
description = "Core utilities for Python packages"
optional = false
python-versions = ">=3.8"
files = [
{file = "packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484"},
{file = "packaging-25.0.tar.gz", hash = "sha256:d443872c98d677bf60f6a1f2f8c1cb748e8fe762d2bf9d3148b5599295b0fc4f"},
]
[[package]]
name = "pluggy"
version = "1.6.0"
description = "plugin and hook calling mechanisms for python"
optional = false
python-versions = ">=3.9"
files = [
{file = "pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746"},
{file = "pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3"},
]
[package.extras]
dev = ["pre-commit", "tox"]
testing = ["coverage", "pytest", "pytest-benchmark"]
[[package]]
name = "pygments"
version = "2.19.2"
description = "Pygments is a syntax highlighting package written in Python."
optional = false
python-versions = ">=3.8"
files = [
{file = "pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b"},
{file = "pygments-2.19.2.tar.gz", hash = "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887"},
]
[package.extras]
windows-terminal = ["colorama (>=0.4.6)"]
[[package]]
name = "pytest"
version = "8.4.1"
description = "pytest: simple powerful testing with Python"
optional = false
python-versions = ">=3.9"
files = [
{file = "pytest-8.4.1-py3-none-any.whl", hash = "sha256:539c70ba6fcead8e78eebbf1115e8b589e7565830d7d006a8723f19ac8a0afb7"},
{file = "pytest-8.4.1.tar.gz", hash = "sha256:7c67fd69174877359ed9371ec3af8a3d2b04741818c51e5e99cc1742251fa93c"},
]
[package.dependencies]
colorama = {version = ">=0.4", markers = "sys_platform == \"win32\""}
exceptiongroup = {version = ">=1", markers = "python_version < \"3.11\""}
iniconfig = ">=1"
packaging = ">=20"
pluggy = ">=1.5,<2"
pygments = ">=2.7.2"
tomli = {version = ">=1", markers = "python_version < \"3.11\""}
[package.extras]
dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "requests", "setuptools", "xmlschema"]
[[package]]
name = "pyyaml"
version = "6.0.2"
@ -430,6 +521,47 @@ enabler = ["pytest-enabler (>=2.2)"]
test = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "jaraco.test (>=5.5)", "packaging (>=24.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.*)", "pytest-home (>=0.5)", "pytest-perf", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel (>=0.44.0)"]
type = ["importlib-metadata (>=7.0.2)", "jaraco.develop (>=7.21)", "mypy (>=1.12,<1.14)", "pytest-mypy"]
[[package]]
name = "tomli"
version = "2.2.1"
description = "A lil' TOML parser"
optional = false
python-versions = ">=3.8"
files = [
{file = "tomli-2.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678e4fa69e4575eb77d103de3df8a895e1591b48e740211bd1067378c69e8249"},
{file = "tomli-2.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6"},
{file = "tomli-2.2.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ece47d672db52ac607a3d9599a9d48dcb2f2f735c6c2d1f34130085bb12b112a"},
{file = "tomli-2.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6972ca9c9cc9f0acaa56a8ca1ff51e7af152a9f87fb64623e31d5c83700080ee"},
{file = "tomli-2.2.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c954d2250168d28797dd4e3ac5cf812a406cd5a92674ee4c8f123c889786aa8e"},
{file = "tomli-2.2.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8dd28b3e155b80f4d54beb40a441d366adcfe740969820caf156c019fb5c7ec4"},
{file = "tomli-2.2.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:e59e304978767a54663af13c07b3d1af22ddee3bb2fb0618ca1593e4f593a106"},
{file = "tomli-2.2.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:33580bccab0338d00994d7f16f4c4ec25b776af3ffaac1ed74e0b3fc95e885a8"},
{file = "tomli-2.2.1-cp311-cp311-win32.whl", hash = "sha256:465af0e0875402f1d226519c9904f37254b3045fc5084697cefb9bdde1ff99ff"},
{file = "tomli-2.2.1-cp311-cp311-win_amd64.whl", hash = "sha256:2d0f2fdd22b02c6d81637a3c95f8cd77f995846af7414c5c4b8d0545afa1bc4b"},
{file = "tomli-2.2.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:4a8f6e44de52d5e6c657c9fe83b562f5f4256d8ebbfe4ff922c495620a7f6cea"},
{file = "tomli-2.2.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8d57ca8095a641b8237d5b079147646153d22552f1c637fd3ba7f4b0b29167a8"},
{file = "tomli-2.2.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4e340144ad7ae1533cb897d406382b4b6fede8890a03738ff1683af800d54192"},
{file = "tomli-2.2.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:db2b95f9de79181805df90bedc5a5ab4c165e6ec3fe99f970d0e302f384ad222"},
{file = "tomli-2.2.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:40741994320b232529c802f8bc86da4e1aa9f413db394617b9a256ae0f9a7f77"},
{file = "tomli-2.2.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:400e720fe168c0f8521520190686ef8ef033fb19fc493da09779e592861b78c6"},
{file = "tomli-2.2.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:02abe224de6ae62c19f090f68da4e27b10af2b93213d36cf44e6e1c5abd19fdd"},
{file = "tomli-2.2.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b82ebccc8c8a36f2094e969560a1b836758481f3dc360ce9a3277c65f374285e"},
{file = "tomli-2.2.1-cp312-cp312-win32.whl", hash = "sha256:889f80ef92701b9dbb224e49ec87c645ce5df3fa2cc548664eb8a25e03127a98"},
{file = "tomli-2.2.1-cp312-cp312-win_amd64.whl", hash = "sha256:7fc04e92e1d624a4a63c76474610238576942d6b8950a2d7f908a340494e67e4"},
{file = "tomli-2.2.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f4039b9cbc3048b2416cc57ab3bda989a6fcf9b36cf8937f01a6e731b64f80d7"},
{file = "tomli-2.2.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:286f0ca2ffeeb5b9bd4fcc8d6c330534323ec51b2f52da063b11c502da16f30c"},
{file = "tomli-2.2.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a92ef1a44547e894e2a17d24e7557a5e85a9e1d0048b0b5e7541f76c5032cb13"},
{file = "tomli-2.2.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9316dc65bed1684c9a98ee68759ceaed29d229e985297003e494aa825ebb0281"},
{file = "tomli-2.2.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e85e99945e688e32d5a35c1ff38ed0b3f41f43fad8df0bdf79f72b2ba7bc5272"},
{file = "tomli-2.2.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ac065718db92ca818f8d6141b5f66369833d4a80a9d74435a268c52bdfa73140"},
{file = "tomli-2.2.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:d920f33822747519673ee656a4b6ac33e382eca9d331c87770faa3eef562aeb2"},
{file = "tomli-2.2.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a198f10c4d1b1375d7687bc25294306e551bf1abfa4eace6650070a5c1ae2744"},
{file = "tomli-2.2.1-cp313-cp313-win32.whl", hash = "sha256:d3f5614314d758649ab2ab3a62d4f2004c825922f9e370b29416484086b264ec"},
{file = "tomli-2.2.1-cp313-cp313-win_amd64.whl", hash = "sha256:a38aa0308e754b0e3c67e344754dff64999ff9b513e691d0e786265c93583c69"},
{file = "tomli-2.2.1-py3-none-any.whl", hash = "sha256:cb55c73c5f4408779d0cf3eef9f762b9c9f147a77de7b258bef0a5628adc85cc"},
{file = "tomli-2.2.1.tar.gz", hash = "sha256:cd45e1dc79c835ce60f7404ec8119f2eb06d38b1deba146f07ced3bbc44505ff"},
]
[[package]]
name = "typing-extensions"
version = "4.14.1"
@ -444,4 +576,4 @@ files = [
[metadata]
lock-version = "2.0"
python-versions = "^3.10"
content-hash = "7d17c2f24f8ad4503a5248c3f7e1a74f458a5ea55a2fb63db382cb6abe6d52df"
content-hash = "83d32fa807f6c7058ecbfc43b777c4d4c637695025cf774ff10532bff8f6712b"

View file

@ -20,6 +20,13 @@ pyyaml = "^6.0.2"
[tool.poetry.scripts]
archgw = "cli.main:main"
[tool.poetry.group.dev.dependencies]
pytest = "^8.4.1"
[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"
[tool.pytest.ini_options]
addopts = ["-v"]

View file

@ -1,45 +0,0 @@
from fastapi import FastAPI
from pydantic import BaseModel
from typing import List, Dict, Set
app = FastAPI()
class User(BaseModel):
name: str = Field(
"John Doe", description="The name of the user."
) # Default value and description for name
location: int = None
age: int = Field(
30, description="The age of the user."
) # Default value and description for age
tags: Set[str] = Field(
default_factory=set, description="A set of tags associated with the user."
) # Default empty set and description for tags
metadata: Dict[str, int] = Field(
default_factory=dict,
description="A dictionary storing metadata about the user, with string keys and integer values.",
) # Default empty dict and description for metadata
@app.get("/agent/default")
async def default(request: User):
"""
This endpoint handles information extraction queries.
It can summarize, extract details, and perform various other information-related tasks.
"""
return {"info": f"Query: {request.name}, Count: {request.age}"}
@app.post("/agent/action")
async def reboot_network_device(device_id: str, confirmation: str):
"""
This endpoint reboots a network device based on the device ID.
Confirmation is required to proceed with the reboot.
Args:
device_id: The device_id that you want to reboot.
confirmation: The confirmation that the user wants to reboot.
metadata: Ignore this parameter
"""
return {"status": "Device rebooted", "device_id": device_id}

View file

@ -1,33 +0,0 @@
prompt_targets:
- name: default
path: /agent/default
description: "This endpoint handles information extraction queries.\n It can\
\ summarize, extract details, and perform various other information-related tasks."
parameters:
- name: query
type: str
description: Field from Pydantic model DefaultRequest
default_value: null
required: false
- name: count
type: int
description: Field from Pydantic model DefaultRequest
default_value: null
required: false
type: default
auto-llm-dispatch-on-response: true
- name: reboot_network_device
path: /agent/action
description: "This endpoint reboots a network device based on the device ID.\n \
\ Confirmation is required to proceed with the reboot."
parameters:
- name: device_id
type: str
description: Description for device_id
default_value: ''
required: true
- name: confirmation
type: int
description: Description for confirmation
default_value: ''
required: true

View file

@ -1,42 +0,0 @@
import pytest
from click.testing import CliRunner
from tools.cli.main import main # Import your CLI's entry point
import importlib.metadata
def get_version():
"""Helper function to fetch the version."""
try:
version = importlib.metadata.version("archgw")
return version
except importlib.metadata.PackageNotFoundError:
return None
@pytest.fixture
def runner():
"""Fixture to create a Click test runner."""
return CliRunner()
def test_version_option(runner):
"""Test the --version option."""
result = runner.invoke(main, ["--version"])
assert result.exit_code == 0
expected_version = get_version()
assert f"archgw cli version: {expected_version}" in result.output
def test_default_behavior(runner):
"""Test the default behavior when no command is provided."""
result = runner.invoke(main)
assert result.exit_code == 0
assert "Arch (The Intelligent Prompt Gateway) CLI" in result.output
assert "Usage:" in result.output # Ensure help text is shown
def test_invalid_command(runner):
"""Test that an invalid command returns an appropriate error message."""
result = runner.invoke(main, ["invalid_command"])
assert result.exit_code != 0 # Non-zero exit code for invalid command
assert "Error: No such command 'invalid_command'" in result.output

View file

@ -0,0 +1,272 @@
import pytest
from unittest import mock
import sys
from cli.config_generator import validate_and_render_schema
# Patch sys.path to allow import from cli/
import os
sys.path.insert(
0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "cli"))
)
@pytest.fixture(autouse=True)
def cleanup_env(monkeypatch):
# Clean up environment variables and mocks after each test
yield
monkeypatch.undo()
def test_validate_and_render_happy_path(monkeypatch):
monkeypatch.setenv("ARCH_CONFIG_FILE", "fake_arch_config.yaml")
monkeypatch.setenv("ARCH_CONFIG_SCHEMA_FILE", "fake_arch_config_schema.yaml")
monkeypatch.setenv("ENVOY_CONFIG_TEMPLATE_FILE", "./envoy.template.yaml")
monkeypatch.setenv("ARCH_CONFIG_FILE_RENDERED", "fake_arch_config_rendered.yaml")
monkeypatch.setenv("ENVOY_CONFIG_FILE_RENDERED", "fake_envoy.yaml")
monkeypatch.setenv("TEMPLATE_ROOT", "../")
arch_config = """
version: v0.1.0
listeners:
egress_traffic:
address: 0.0.0.0
port: 12000
message_format: openai
timeout: 30s
llm_providers:
- model: openai/gpt-4o-mini
access_key: $OPENAI_API_KEY
default: true
- model: openai/gpt-4o
access_key: $OPENAI_API_KEY
routing_preferences:
- name: code understanding
description: understand and explain existing code snippets, functions, or libraries
- model: openai/gpt-4.1
access_key: $OPENAI_API_KEY
routing_preferences:
- name: code generation
description: generating new code snippets, functions, or boilerplate based on user prompts or requirements
tracing:
random_sampling: 100
"""
arch_config_schema = ""
with open("../arch_config_schema.yaml", "r") as file:
arch_config_schema = file.read()
m_open = mock.mock_open()
# Provide enough file handles for all open() calls in validate_and_render_schema
m_open.side_effect = [
mock.mock_open(read_data="").return_value,
mock.mock_open(read_data=arch_config).return_value, # ARCH_CONFIG_FILE
mock.mock_open(
read_data=arch_config_schema
).return_value, # ARCH_CONFIG_SCHEMA_FILE
mock.mock_open(read_data=arch_config).return_value, # ARCH_CONFIG_FILE
mock.mock_open(
read_data=arch_config_schema
).return_value, # ARCH_CONFIG_SCHEMA_FILE
mock.mock_open().return_value, # ENVOY_CONFIG_FILE_RENDERED (write)
mock.mock_open().return_value, # ARCH_CONFIG_FILE_RENDERED (write)
]
with mock.patch("builtins.open", m_open):
with mock.patch("config_generator.Environment"):
validate_and_render_schema()
arch_config_test_cases = [
{
"id": "duplicate_provider_name",
"expected_error": "Duplicate llm_provider name",
"arch_config": """
version: v0.1.0
listeners:
egress_traffic:
address: 0.0.0.0
port: 12000
message_format: openai
timeout: 30s
llm_providers:
- name: test1
model: openai/gpt-4o
access_key: $OPENAI_API_KEY
- name: test1
model: openai/gpt-4o
access_key: $OPENAI_API_KEY
""",
},
{
"id": "provider_interface_with_model_id",
"expected_error": "Please provide provider interface as part of model name",
"arch_config": """
version: v0.1.0
listeners:
egress_traffic:
address: 0.0.0.0
port: 12000
message_format: openai
timeout: 30s
llm_providers:
- model: openai/gpt-4o
access_key: $OPENAI_API_KEY
provider_interface: openai
""",
},
{
"id": "duplicate_model_id",
"expected_error": "Duplicate model_id",
"arch_config": """
version: v0.1.0
listeners:
egress_traffic:
address: 0.0.0.0
port: 12000
message_format: openai
timeout: 30s
llm_providers:
- model: openai/gpt-4o
access_key: $OPENAI_API_KEY
- model: mistral/gpt-4o
""",
},
{
"id": "custom_provider_base_url",
"expected_error": "Must provide base_url and provider_interface",
"arch_config": """
version: v0.1.0
listeners:
egress_traffic:
address: 0.0.0.0
port: 12000
message_format: openai
timeout: 30s
llm_providers:
- model: custom/gpt-4o
""",
},
{
"id": "base_url_no_prefix",
"expected_error": "Please provide base_url without path",
"arch_config": """
version: v0.1.0
listeners:
egress_traffic:
address: 0.0.0.0
port: 12000
message_format: openai
timeout: 30s
llm_providers:
- model: custom/gpt-4o
base_url: "http://custom.com/test"
provider_interface: openai
""",
},
{
"id": "duplicate_routeing_preference_name",
"expected_error": "Duplicate routing preference name",
"arch_config": """
version: v0.1.0
listeners:
egress_traffic:
address: 0.0.0.0
port: 12000
message_format: openai
timeout: 30s
llm_providers:
- model: openai/gpt-4o-mini
access_key: $OPENAI_API_KEY
default: true
- model: openai/gpt-4o
access_key: $OPENAI_API_KEY
routing_preferences:
- name: code understanding
description: understand and explain existing code snippets, functions, or libraries
- model: openai/gpt-4.1
access_key: $OPENAI_API_KEY
routing_preferences:
- name: code understanding
description: generating new code snippets, functions, or boilerplate based on user prompts or requirements
tracing:
random_sampling: 100
""",
},
]
@pytest.mark.parametrize(
"arch_config_test_case",
arch_config_test_cases,
ids=[case["id"] for case in arch_config_test_cases],
)
def test_validate_and_render_schema_tests(monkeypatch, arch_config_test_case):
monkeypatch.setenv("ARCH_CONFIG_FILE", "fake_arch_config.yaml")
monkeypatch.setenv("ARCH_CONFIG_SCHEMA_FILE", "fake_arch_config_schema.yaml")
monkeypatch.setenv("ENVOY_CONFIG_TEMPLATE_FILE", "./envoy.template.yaml")
monkeypatch.setenv("ARCH_CONFIG_FILE_RENDERED", "fake_arch_config_rendered.yaml")
monkeypatch.setenv("ENVOY_CONFIG_FILE_RENDERED", "fake_envoy.yaml")
monkeypatch.setenv("TEMPLATE_ROOT", "../")
arch_config = arch_config_test_case["arch_config"]
expected_error = arch_config_test_case["expected_error"]
test_id = arch_config_test_case["id"]
arch_config_schema = ""
with open("../arch_config_schema.yaml", "r") as file:
arch_config_schema = file.read()
m_open = mock.mock_open()
# Provide enough file handles for all open() calls in validate_and_render_schema
m_open.side_effect = [
mock.mock_open(read_data="").return_value,
mock.mock_open(read_data=arch_config).return_value, # ARCH_CONFIG_FILE
mock.mock_open(
read_data=arch_config_schema
).return_value, # ARCH_CONFIG_SCHEMA_FILE
mock.mock_open(read_data=arch_config).return_value, # ARCH_CONFIG_FILE
mock.mock_open(
read_data=arch_config_schema
).return_value, # ARCH_CONFIG_SCHEMA_FILE
mock.mock_open().return_value, # ENVOY_CONFIG_FILE_RENDERED (write)
mock.mock_open().return_value, # ARCH_CONFIG_FILE_RENDERED (write)
]
with mock.patch("builtins.open", m_open):
with mock.patch("config_generator.Environment"):
with pytest.raises(Exception) as excinfo:
validate_and_render_schema()
assert expected_error in str(excinfo.value)

View file

@ -3,11 +3,22 @@
failed_files=()
for file in $(find . -name arch_config.yaml -o -name arch_config_full_reference.yaml); do
echo "Validating $file..."
if ! docker run --rm -v "$(pwd)/$file:/app/arch_config.yaml:ro" --entrypoint /bin/sh katanemo/archgw:latest -c "python config_generator.py" 2>&1 > /dev/null ; then
echo "Validating ${file}..."
touch $(pwd)/${file}_rendered
if ! docker run --rm -v "$(pwd)/${file}:/app/arch_config.yaml:ro" -v "$(pwd)/${file}_rendered:/app/arch_config_rendered.yaml:rw" --entrypoint /bin/sh katanemo/archgw:latest -c "python config_generator.py" 2>&1 > /dev/null ; then
echo "Validation failed for $file"
failed_files+=("$file")
fi
RENDERED_CHECKED_IN_FILE=$(echo $file | sed 's/\.yaml$/_rendered.yaml/')
if [ -f "$RENDERED_CHECKED_IN_FILE" ]; then
echo "Checking rendered file against checked-in version..."
if ! diff -q "${file}_rendered" "$RENDERED_CHECKED_IN_FILE" > /dev/null; then
echo "Rendered file ${file}_rendered does not match checked-in version ${RENDERED_CHECKED_IN_FILE}"
failed_files+=("${file}_rendered")
else
echo "Rendered file matches checked-in version."
fi
fi
done
# Print summary of failed files

21
crates/.vscode/launch.json vendored Normal file
View file

@ -0,0 +1,21 @@
{
"version": "0.2.0",
"configurations": [
{
"name": "Debug Brightstaff",
"type": "lldb",
"request": "launch",
"program": "${workspaceFolder}/target/debug/brightstaff",
"args": [],
"cwd": "${workspaceFolder}",
"stopOnEntry": false,
"sourceLanguages": ["rust"],
"env": {
"RUST_LOG": "debug",
"RUST_BACKTRACE": "1",
"ARCH_CONFIG_PATH_RENDERED": "../demos/use_cases/preference_based_routing/arch_config_rendered.yaml"
},
"preLaunchTask": "rust: cargo build"
}
]
}

21
crates/.vscode/tasks.json vendored Normal file
View file

@ -0,0 +1,21 @@
{
"version": "2.0.0",
"tasks": [
{
"type": "cargo",
"command": "build",
"args": [
"--bin",
"brightstaff"
],
"problemMatcher": [
"$rustc"
],
"group": {
"kind": "build",
"isDefault": true
},
"label": "rust: cargo build"
}
]
}

View file

@ -12,7 +12,7 @@ use hyper::{Request, Response, StatusCode};
use tokio::sync::mpsc;
use tokio_stream::wrappers::ReceiverStream;
use tokio_stream::StreamExt;
use tracing::{debug, info, trace, warn};
use tracing::{debug, info, warn};
use crate::router::llm_router::RouterService;
@ -81,8 +81,8 @@ pub async fn chat_completions(
}
}
trace!(
"arch-router request body: {}",
debug!(
"arch-router request received: {}",
&serde_json::to_string(&chat_completion_request).unwrap()
);
@ -102,9 +102,9 @@ pub async fn chat_completions(
.as_ref()
.and_then(|s| serde_yaml::from_str(s).ok());
debug!("usage preferences: {:?}", usage_preferences);
debug!("usage preferences from request: {:?}", usage_preferences);
let mut selected_llm = match router_service
let mut determined_route = match router_service
.determine_route(
&chat_completion_request.messages,
trace_parent.clone(),
@ -121,14 +121,14 @@ pub async fn chat_completions(
}
};
if selected_llm.is_none() {
if determined_route.is_none() {
debug!("No LLM model selected, using default from request");
selected_llm = Some(chat_completion_request.model.clone());
determined_route = Some(chat_completion_request.model.clone());
}
info!(
"sending request to llm provider: {} with llm model: {:?}",
llm_provider_endpoint, selected_llm
llm_provider_endpoint, determined_route
);
if let Some(trace_parent) = trace_parent {
@ -138,10 +138,10 @@ pub async fn chat_completions(
);
}
if let Some(selected_llm) = selected_llm {
if let Some(selected_route) = determined_route {
request_headers.insert(
ARCH_PROVIDER_HINT_HEADER,
header::HeaderValue::from_str(&selected_llm).unwrap(),
header::HeaderValue::from_str(&selected_route).unwrap(),
);
}

View file

@ -44,9 +44,13 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
let _tracer_provider = init_tracer();
let bind_address = env::var("BIND_ADDRESS").unwrap_or_else(|_| BIND_ADDRESS.to_string());
info!(
"current working directory: {}",
env::current_dir().unwrap().display()
);
// loading arch_config.yaml file
let arch_config_path =
env::var("ARCH_CONFIG_PATH").unwrap_or_else(|_| "./arch_config.yaml".to_string());
let arch_config_path = env::var("ARCH_CONFIG_PATH_RENDERED")
.unwrap_or_else(|_| "./arch_config_rendered.yaml".to_string());
info!("Loading arch_config.yaml from {}", arch_config_path);
let config_contents =

View file

@ -1,7 +1,7 @@
use std::{collections::HashMap, sync::Arc};
use std::sync::Arc;
use common::{
configuration::{LlmProvider, LlmRoute, ModelUsagePreference},
configuration::{LlmProvider, ModelUsagePreference, RoutingPreference},
consts::ARCH_PROVIDER_HINT_HEADER,
};
use hermesllm::providers::openai::types::{ChatCompletionsResponse, ContentType, Message};
@ -19,7 +19,6 @@ pub struct RouterService {
router_model: Arc<dyn RouterModel>,
routing_provider_name: String,
llm_usage_defined: bool,
llm_provider_map: HashMap<String, LlmProvider>,
}
#[derive(Debug, Error)]
@ -45,11 +44,14 @@ impl RouterService {
) -> Self {
let providers_with_usage = providers
.iter()
.filter(|provider| provider.usage.is_some())
.filter(|provider| provider.routing_preferences.is_some())
.cloned()
.collect::<Vec<LlmProvider>>();
let llm_routes: Vec<LlmRoute> = providers_with_usage.iter().map(LlmRoute::from).collect();
let llm_routes: Vec<RoutingPreference> = providers_with_usage
.iter()
.flat_map(|provider| provider.routing_preferences.clone().unwrap_or_default())
.collect();
let router_model = Arc::new(router_model_v1::RouterModelV1::new(
llm_routes,
@ -57,18 +59,12 @@ impl RouterService {
router_model_v1::MAX_TOKEN_LEN,
));
let llm_provider_map: HashMap<String, LlmProvider> = providers
.into_iter()
.map(|provider| (provider.name.clone(), provider))
.collect();
RouterService {
router_url,
client: reqwest::Client::new(),
router_model,
routing_provider_name,
llm_usage_defined: !providers_with_usage.is_empty(),
llm_provider_map,
}
}
@ -155,40 +151,21 @@ impl RouterService {
if let Some(ContentType::Text(content)) =
&chat_completion_response.choices[0].message.content
{
let mut selected_model: Option<String> = None;
if let Some(selected_llm_name) = self.router_model.parse_response(content)? {
if selected_llm_name != "other" {
if let Some(usage_preferences) = usage_preferences {
for usage in usage_preferences {
if usage.name == selected_llm_name {
selected_model = Some(usage.model);
break;
}
}
if selected_model.is_none() {
warn!(
"Selected LLM model not found in usage preferences: {}",
selected_llm_name
);
}
} else if let Some(provider) = self.llm_provider_map.get(&selected_llm_name) {
selected_model = provider.model.clone();
} else {
warn!(
"Selected LLM model not found in provider map: {}",
selected_llm_name
);
}
}
}
let route_name = self.router_model.parse_response(content)?;
info!(
"router response: {}, selected_model: {:?}, response time: {}ms",
content.replace("\n", "\\n"),
selected_model,
route_name,
router_response_time.as_millis()
);
Ok(selected_model)
if let Some(ref route) = route_name {
if route == "other" {
return Ok(None);
}
}
Ok(route_name)
} else {
Ok(None)
}

View file

@ -1,5 +1,5 @@
use common::{
configuration::{LlmRoute, ModelUsagePreference},
configuration::{ModelUsagePreference, RoutingPreference},
consts::{SYSTEM_ROLE, TOOL_ROLE, USER_ROLE},
};
use hermesllm::providers::openai::types::{ChatCompletionsRequest, ContentType, Message};
@ -36,7 +36,11 @@ pub struct RouterModelV1 {
max_token_length: usize,
}
impl RouterModelV1 {
pub fn new(llm_routes: Vec<LlmRoute>, routing_model: String, max_token_length: usize) -> Self {
pub fn new(
llm_routes: Vec<RoutingPreference>,
routing_model: String,
max_token_length: usize,
) -> Self {
let llm_route_json_str =
serde_json::to_string(&llm_routes).unwrap_or_else(|_| "[]".to_string());
RouterModelV1 {
@ -138,9 +142,9 @@ impl RouterModel for RouterModelV1 {
let llm_route_json = usage_preferences
.as_ref()
.map(|prefs| {
let llm_route: Vec<LlmRoute> = prefs
let llm_route: Vec<RoutingPreference> = prefs
.iter()
.map(|pref| LlmRoute {
.map(|pref| RoutingPreference {
name: pref.name.clone(),
description: pref.usage.clone().unwrap_or_default(),
})
@ -255,7 +259,7 @@ Based on your analysis, provide your response in the following JSON formats if y
{"name": "Speech Recognition", "description": "Converting spoken language into written text"}
]
"#;
let llm_routes = serde_json::from_str::<Vec<LlmRoute>>(routes_str).unwrap();
let llm_routes = serde_json::from_str::<Vec<RoutingPreference>>(routes_str).unwrap();
let routing_model = "test-model".to_string();
let router = RouterModelV1::new(llm_routes, routing_model.clone(), usize::MAX);
@ -314,7 +318,7 @@ Based on your analysis, provide your response in the following JSON formats if y
{"name": "Speech Recognition", "description": "Converting spoken language into written text"}
]
"#;
let llm_routes = serde_json::from_str::<Vec<LlmRoute>>(routes_str).unwrap();
let llm_routes = serde_json::from_str::<Vec<RoutingPreference>>(routes_str).unwrap();
let routing_model = "test-model".to_string();
let router = RouterModelV1::new(llm_routes, routing_model.clone(), usize::MAX);
@ -379,7 +383,7 @@ Based on your analysis, provide your response in the following JSON formats if y
{"name": "Speech Recognition", "description": "Converting spoken language into written text"}
]
"#;
let llm_routes = serde_json::from_str::<Vec<LlmRoute>>(routes_str).unwrap();
let llm_routes = serde_json::from_str::<Vec<RoutingPreference>>(routes_str).unwrap();
let routing_model = "test-model".to_string();
let router = RouterModelV1::new(llm_routes, routing_model.clone(), 235);
@ -440,7 +444,7 @@ Based on your analysis, provide your response in the following JSON formats if y
{"name": "Speech Recognition", "description": "Converting spoken language into written text"}
]
"#;
let llm_routes = serde_json::from_str::<Vec<LlmRoute>>(routes_str).unwrap();
let llm_routes = serde_json::from_str::<Vec<RoutingPreference>>(routes_str).unwrap();
let routing_model = "test-model".to_string();
let router = RouterModelV1::new(llm_routes, routing_model.clone(), 200);
@ -501,7 +505,7 @@ Based on your analysis, provide your response in the following JSON formats if y
{"name": "Speech Recognition", "description": "Converting spoken language into written text"}
]
"#;
let llm_routes = serde_json::from_str::<Vec<LlmRoute>>(routes_str).unwrap();
let llm_routes = serde_json::from_str::<Vec<RoutingPreference>>(routes_str).unwrap();
let routing_model = "test-model".to_string();
let router = RouterModelV1::new(llm_routes, routing_model.clone(), 230);
@ -569,7 +573,7 @@ Based on your analysis, provide your response in the following JSON formats if y
{"name": "Speech Recognition", "description": "Converting spoken language into written text"}
]
"#;
let llm_routes = serde_json::from_str::<Vec<LlmRoute>>(routes_str).unwrap();
let llm_routes = serde_json::from_str::<Vec<RoutingPreference>>(routes_str).unwrap();
let routing_model = "test-model".to_string();
let router = RouterModelV1::new(llm_routes, routing_model.clone(), usize::MAX);
@ -639,7 +643,7 @@ Based on your analysis, provide your response in the following JSON formats if y
{"name": "Speech Recognition", "description": "Converting spoken language into written text"}
]
"#;
let llm_routes = serde_json::from_str::<Vec<LlmRoute>>(routes_str).unwrap();
let llm_routes = serde_json::from_str::<Vec<RoutingPreference>>(routes_str).unwrap();
let routing_model = "test-model".to_string();
let router = RouterModelV1::new(llm_routes, routing_model.clone(), usize::MAX);
@ -716,7 +720,7 @@ Based on your analysis, provide your response in the following JSON formats if y
{"name": "Speech Recognition", "description": "Converting spoken language into written text"}
]
"#;
let llm_routes = serde_json::from_str::<Vec<LlmRoute>>(routes_str).unwrap();
let llm_routes = serde_json::from_str::<Vec<RoutingPreference>>(routes_str).unwrap();
let router = RouterModelV1::new(llm_routes, "test-model".to_string(), 2000);

View file

@ -187,24 +187,11 @@ pub struct ModelUsagePreference {
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LlmRoute {
pub struct RoutingPreference {
pub name: String,
pub description: String,
}
impl From<&LlmProvider> for LlmRoute {
fn from(provider: &LlmProvider) -> Self {
Self {
name: provider.name.to_string(),
description: provider
.usage
.as_ref()
.cloned()
.unwrap_or_else(|| "No description available".to_string()),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
//TODO: use enum for model, but if there is a new model, we need to update the code
pub struct LlmProvider {
@ -218,6 +205,7 @@ pub struct LlmProvider {
pub port: Option<u16>,
pub rate_limits: Option<LlmRatelimit>,
pub usage: Option<String>,
pub routing_preferences: Option<Vec<RoutingPreference>>,
}
pub trait IntoModels {
@ -256,6 +244,7 @@ impl Default for LlmProvider {
port: None,
rate_limits: None,
usage: None,
routing_preferences: None,
}
}
}
@ -368,7 +357,7 @@ mod test {
#[test]
fn test_deserialize_configuration() {
let ref_config = fs::read_to_string(
"../../docs/source/resources/includes/arch_config_full_reference.yaml",
"../../docs/source/resources/includes/arch_config_full_reference_rendered.yaml",
)
.expect("reference config file not found");
@ -429,7 +418,7 @@ mod test {
#[test]
fn test_tool_conversion() {
let ref_config = fs::read_to_string(
"../../docs/source/resources/includes/arch_config_full_reference.yaml",
"../../docs/source/resources/includes/arch_config_full_reference_rendered.yaml",
)
.expect("reference config file not found");
let config: super::Configuration = serde_yaml::from_str(&ref_config).unwrap();

View file

@ -58,7 +58,16 @@ impl TryFrom<Vec<LlmProvider>> for LlmProviders {
let name = llm_provider.name.clone();
if llm_providers
.providers
.insert(name.clone(), llm_provider)
.insert(name.clone(), llm_provider.clone())
.is_some()
{
return Err(LlmProvidersNewError::DuplicateName(name));
}
// also add model_id as key for provider lookup
if llm_providers
.providers
.insert(llm_provider.model.clone().unwrap(), llm_provider)
.is_some()
{
return Err(LlmProvidersNewError::DuplicateName(name));

View file

@ -113,16 +113,10 @@ impl StreamContext {
}
debug!(
"request received: llm provider hint: {}, selected llm: {}, model: {}",
"request received: llm provider hint: {}, selected provider: {}",
self.get_http_request_header(ARCH_PROVIDER_HINT_HEADER)
.unwrap_or_default(),
self.llm_provider.as_ref().unwrap().name,
self.llm_provider
.as_ref()
.unwrap()
.model
.as_ref()
.unwrap_or(&String::new())
self.llm_provider.as_ref().unwrap().name
);
}
@ -349,7 +343,7 @@ impl HttpContext for StreamContext {
};
info!(
"on_http_request_body: provider: {}, model requested: {}, model selected: {}",
"on_http_request_body: provider: {}, model requested (in body): {}, model selected: {}",
self.llm_provider().name,
model_requested,
model_name.unwrap_or(&"None".to_string()),

View file

@ -30,7 +30,10 @@ fn request_headers_expectations(module: &mut Tester, http_context: i32) {
Some("x-arch-llm-provider-hint"),
)
.returning(None)
.expect_log(Some(LogLevel::Debug), Some("request received: llm provider hint: default, selected llm: open-ai-gpt-4, model: gpt-4"))
.expect_log(
Some(LogLevel::Debug),
Some("request received: llm provider hint: default, selected provider: open-ai-gpt-4"),
)
.expect_add_header_map_value(
Some(MapType::HttpRequestHeaders),
Some("x-arch-llm-provider"),
@ -263,7 +266,7 @@ fn llm_gateway_bad_request_to_open_ai_chat_completions() {
.expect_get_buffer_bytes(Some(BufferType::HttpRequestBody))
.returning(Some(incomplete_chat_completions_request_body))
.expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Info), Some("on_http_request_body: provider: open-ai-gpt-4, model requested: gpt-1, model selected: gpt-4"))
.expect_log(Some(LogLevel::Info), Some("on_http_request_body: provider: open-ai-gpt-4, model requested (in body): gpt-1, model selected: gpt-4"))
.expect_send_local_response(
Some(StatusCode::BAD_REQUEST.as_u16().into()),
None,
@ -429,7 +432,7 @@ fn llm_gateway_override_model_name() {
.returning(Some(chat_completions_request_body))
// The actual call is not important in this test, we just need to grab the token_id
.expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Info), Some("on_http_request_body: provider: open-ai-gpt-4, model requested: gpt-1, model selected: gpt-4"))
.expect_log(Some(LogLevel::Info), Some("on_http_request_body: provider: open-ai-gpt-4, model requested (in body): gpt-1, model selected: gpt-4"))
.expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Debug), None)
.expect_metric_record("input_sequence_length", 29)
@ -478,7 +481,7 @@ fn llm_gateway_override_use_default_model() {
// The actual call is not important in this test, we just need to grab the token_id
.expect_log(
Some(LogLevel::Info),
Some("on_http_request_body: provider: open-ai-gpt-4, model requested: gpt-1, model selected: gpt-4"),
Some("on_http_request_body: provider: open-ai-gpt-4, model requested (in body): gpt-1, model selected: gpt-4"),
)
.expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Debug), None)
@ -526,7 +529,7 @@ fn llm_gateway_override_use_model_name_none() {
.returning(Some(chat_completions_request_body))
// The actual call is not important in this test, we just need to grab the token_id
.expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Info), Some("on_http_request_body: provider: open-ai-gpt-4, model requested: none, model selected: gpt-4"))
.expect_log(Some(LogLevel::Info), Some("on_http_request_body: provider: open-ai-gpt-4, model requested (in body): none, model selected: gpt-4"))
.expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Debug), None)
.expect_metric_record("input_sequence_length", 29)

View file

@ -0,0 +1,15 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"type": "java",
"name": "WeatherForecastApplication",
"request": "launch",
"mainClass": "weather.WeatherForecastApplication",
"projectName": "weather-forecast-service"
}
]
}

View file

@ -9,10 +9,8 @@ listeners:
# Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way
llm_providers:
- name: OpenAI
provider_interface: openai
access_key: $OPENAI_API_KEY
model: gpt-4o-mini
- access_key: $OPENAI_API_KEY
model: openai/gpt-4o-mini
default: true
# Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem.

View file

@ -8,10 +8,8 @@ listeners:
timeout: 30s
llm_providers:
- name: gpt-4o
access_key: $OPENAI_API_KEY
provider_interface: openai
model: gpt-4o
- access_key: $OPENAI_API_KEY
model: openai/gpt-4o
endpoints:
frankfurther_api:

View file

@ -9,10 +9,8 @@ listeners:
# Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way
llm_providers:
- name: OpenAI
provider_interface: openai
access_key: $OPENAI_API_KEY
model: gpt-4o-mini
- access_key: $OPENAI_API_KEY
model: openai/gpt-4o-mini
default: true
# Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem.

View file

@ -13,10 +13,8 @@ endpoints:
connect_timeout: 0.005s
llm_providers:
- name: gpt-4o-mini
access_key: $OPENAI_API_KEY
provider_interface: openai
model: gpt-4o-mini
- access_key: $OPENAI_API_KEY
model: openai/gpt-4o-mini
default: true
system_prompt: |

View file

@ -8,10 +8,8 @@ listeners:
# Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way
llm_providers:
- name: OpenAI
provider_interface: openai
access_key: $OPENAI_API_KEY
model: gpt-4o
- access_key: $OPENAI_API_KEY
model: openai/gpt-4o
default: true
# default system prompt used by all prompt targets

View file

@ -8,10 +8,8 @@ listeners:
timeout: 30s
llm_providers:
- name: gpt-4o
access_key: $OPENAI_API_KEY
provider_interface: openai
model: gpt-4o
- access_key: $OPENAI_API_KEY
model: openai/gpt-4o
endpoints:
twelvedata_api:

View file

@ -17,15 +17,11 @@ overrides:
prompt_target_intent_matching_threshold: 0.6
llm_providers:
- name: groq
access_key: $GROQ_API_KEY
provider_interface: groq
model: llama-3.2-3b-preview
- access_key: $GROQ_API_KEY
model: groq/llama-3.2-3b-preview
- name: gpt-4o
access_key: $OPENAI_API_KEY
provider_interface: openai
model: gpt-4o
- access_key: $OPENAI_API_KEY
model: openai/gpt-4o
default: true
system_prompt: |

View file

@ -13,16 +13,12 @@ listeners:
timeout: 30s
llm_providers:
- name: gpt-4o
access_key: $OPENAI_API_KEY
provider: openai
model: gpt-4o
- access_key: $OPENAI_API_KEY
model: openai/gpt-4o
default: true
- name: ministral-3b
access_key: $MISTRAL_API_KEY
provider: openai
model: ministral-3b-latest
- access_key: $MISTRAL_API_KEY
model: mistral/ministral-3b-latest
```
### Step 2. Start arch gateway

View file

@ -18,7 +18,7 @@ You can also pass in a header to override model when sending prompt. Following e
```bash
$ curl --header 'Content-Type: application/json' \
--header 'x-arch-llm-provider-hint: ministral-3b' \
--header 'x-arch-llm-provider-hint: mistral/ministral-3b' \
--data '{"messages": [{"role": "user","content": "hello"}], "model": "none"}' \
http://localhost:12000/v1/chat/completions 2> /dev/null | jq .
{

View file

@ -9,46 +9,34 @@ listeners:
llm_providers:
- name: gpt-4o-mini
access_key: $OPENAI_API_KEY
provider_interface: openai
model: gpt-4o-mini
- access_key: $OPENAI_API_KEY
model: openai/gpt-4o-mini
- name: gpt-4o
access_key: $OPENAI_API_KEY
provider_interface: openai
model: gpt-4o
- access_key: $OPENAI_API_KEY
model: openai/gpt-4o
default: true
- name: ministral-3b
access_key: $MISTRAL_API_KEY
provider_interface: mistral
model: ministral-3b-latest
- access_key: $MISTRAL_API_KEY
model: mistral/ministral-3b-latest
- name: claude-sonnet
access_key: $ANTHROPIC_API_KEY
provider_interface: claude
model: claude-3-7-sonnet-latest
- access_key: $ANTHROPIC_API_KEY
model: claude/claude-3-7-sonnet-latest
- name: claude-sonnet-4
access_key: $ANTHROPIC_API_KEY
provider_interface: claude
model: claude-sonnet-4-0
- access_key: $ANTHROPIC_API_KEY
model: claude/claude-sonnet-4-0
- name: deepseek
access_key: $DEEPSEEK_API_KEY
provider_interface: deepseek
model: deepseek-reasoner
- access_key: $DEEPSEEK_API_KEY
model: deepseek/deepseek-reasoner
- name: groq
access_key: $GROQ_API_KEY
provider_interface: groq
model: llama-3.1-8b-instant
- access_key: $GROQ_API_KEY
model: groq/llama-3.1-8b-instant
- name: gemini
access_key: $GEMINI_API_KEY
provider_interface: gemini
model: gemini-1.5-pro-latest
- access_key: $GEMINI_API_KEY
model: gemini/gemini-1.5-pro-latest
- model: custom/test-model
base_url: http://host.docker.internal:11223
provider_interface: openai
tracing:
random_sampling: 100

View file

@ -9,10 +9,9 @@ listeners:
llm_providers:
- name: local-llama
- model: my_llm_provider/llama3.2
provider_interface: openai
model: llama3.2
endpoint: host.docker.internal:11434
base_url: http://host.docker.internal:11434
default: true
system_prompt: |

View file

@ -22,10 +22,8 @@ endpoints:
connect_timeout: 0.005s
llm_providers:
- name: gpt-4o-mini
access_key: $OPENAI_API_KEY
provider_interface: openai
model: gpt-4o-mini
- access_key: $OPENAI_API_KEY
model: openai/gpt-4o-mini
default: true
system_prompt: |

View file

@ -9,28 +9,21 @@ listeners:
llm_providers:
- name: gpt-4o-mini
provider_interface: openai
- model: openai/gpt-4o-mini
access_key: $OPENAI_API_KEY
model: gpt-4o-mini
- name: gpt-4.1
provider_interface: openai
access_key: $OPENAI_API_KEY
model: gpt-4.1
default: true
- name: code_generation
- model: openai/gpt-4o
access_key: $OPENAI_API_KEY
provider_interface: openai
model: gpt-4.1
usage: generating new code snippets, functions, or boilerplate based on user prompts or requirements
routing_preferences:
- name: code understanding
description: understand and explain existing code snippets, functions, or libraries
- name: code_understanding
provider_interface: openai
- model: openai/gpt-4.1
access_key: $OPENAI_API_KEY
model: gpt-4o-mini
usage: understand and explain existing code snippets, functions, or libraries
routing_preferences:
- name: code generation
description: generating new code snippets, functions, or boilerplate based on user prompts or requirements
tracing:
random_sampling: 100

View file

@ -14,32 +14,24 @@ listeners:
llm_providers:
- name: arch-router
provider_interface: arch
model: hf.co/katanemo/Arch-Router-1.5B.gguf:Q4_K_M
endpoint: host.docker.internal:11434
model: arch/hf.co/katanemo/Arch-Router-1.5B.gguf:Q4_K_M
base_url: http://host.docker.internal:11434
- name: gpt-4o-mini
provider_interface: openai
- model: openai/gpt-4o-mini
access_key: $OPENAI_API_KEY
model: gpt-4o-mini
- name: gpt-4.1
provider_interface: openai
access_key: $OPENAI_API_KEY
model: gpt-4.1
default: true
- name: code_generation
- model: openai/gpt-4o
access_key: $OPENAI_API_KEY
provider_interface: openai
model: gpt-4.1
usage: generating new code snippets, functions, or boilerplate based on user prompts or requirements
routing_preferences:
- name: code understanding
description: understand and explain existing code snippets, functions, or libraries
- name: code_understanding
provider_interface: openai
- model: openai/gpt-4.1
access_key: $OPENAI_API_KEY
model: gpt-4.1
usage: understand and explain existing code snippets, functions, or libraries
routing_preferences:
- name: code generation
description: generating new code snippets, functions, or boilerplate based on user prompts or requirements
tracing:
random_sampling: 100

View file

@ -2,13 +2,13 @@ POST http://localhost:12000/v1/chat/completions
Content-Type: application/json
{
"model": "openai/gpt-4.1",
"messages": [
{
"role": "user",
"content": "hi"
}
],
"model": "none"
]
}
HTTP 200
[Asserts]

View file

@ -14,4 +14,4 @@ Content-Type: application/json
HTTP 200
[Asserts]
header "content-type" matches /text\/event-stream/
body matches /^data: .*?gpt-4.1.*?\n/
body matches /^data: .*?gpt-4o-mini.*?\n/

View file

@ -85,10 +85,8 @@ system_prompt: |
Make sure your output is valid Markdown. And don't say "formatted in Markdown". Thanks!
llm_providers:
- name: openai
provider_interface: openai
access_key: $OPENAI_API_KEY
model: gpt-4o
- access_key: $OPENAI_API_KEY
model: openai/gpt-4o
default: true
prompt_targets:

View file

@ -9,10 +9,8 @@ listeners:
# Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way
llm_providers:
- name: OpenAI
provider_interface: openai
access_key: $OPENAI_API_KEY
model: gpt-4o
- access_key: $OPENAI_API_KEY
model: openai/gpt-4o
default: true
# default system prompt used by all prompt targets

View file

@ -50,10 +50,8 @@ Create ``arch_config.yaml`` file with the following content:
timeout: 30s
llm_providers:
- name: gpt-4o
access_key: $OPENAI_API_KEY
provider: openai
model: gpt-4o
- access_key: $OPENAI_API_KEY
model: openai/gpt-4o
system_prompt: |
You are a helpful assistant.
@ -153,16 +151,12 @@ Create ``arch_config.yaml`` file with the following content:
timeout: 30s
llm_providers:
- name: gpt-4o
access_key: $OPENAI_API_KEY
provider_interface: openai
model: gpt-4o
- access_key: $OPENAI_API_KEY
model: openai/gpt-4o
default: true
- name: ministral-3b
access_key: $MISTRAL_API_KEY
provider_interface: openai
model: ministral-3b-latest
- access_key: $MISTRAL_API_KEY
model: mistralministral-3b-latest
Step 2. Start arch gateway
~~~~~~~~~~~~~~~~~~~~~~~~~~

View file

@ -9,10 +9,8 @@ listeners:
# Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way
llm_providers:
- name: OpenAI
provider_interface: openai
access_key: $OPENAI_API_KEY
model: gpt-4o
- access_key: $OPENAI_API_KEY
model: openai/gpt-4o
default: true
# default system prompt used by all prompt targets

View file

@ -74,9 +74,6 @@ Below is an example to show how to set up a prompt target for the Arch Router:
:caption: Route Config Example
routing:
model: archgw-v1-router-model
listeners:
egress_traffic:
address: 0.0.0.0
@ -85,29 +82,22 @@ Below is an example to show how to set up a prompt target for the Arch Router:
timeout: 30s
llm_providers:
- name: archgw-v1-router-model
provider_interface: openai
model: katanemo/Arch-Router-1.5B
base_url: ...
- name: gpt-4o-mini
provider_interface: openai
access_key: $OPENAI_API_KEY
model: gpt-4o-mini
default: true
- model: openai/gpt-4o-mini
access_key: $OPENAI_API_KEY
default: true
- name: code_generation
provider_interface: openai
access_key: $OPENAI_API_KEY
model: gpt-4o
usage: Generating new code snippets, functions, or boilerplate based on user prompts or requirements
- name: code_understanding
provider_interface: openai
access_key: $OPENAI_API_KEY
model: gpt-4.1
usage: understand and explain existing code snippets, functions, or libraries
- model: openai/gpt-4o
access_key: $OPENAI_API_KEY
routing_preferences:
- name: code understanding
description: understand and explain existing code snippets, functions, or libraries
- model: openai/gpt-4.1
access_key: $OPENAI_API_KEY
routing_preferences:
- name: code generation
description: generating new code snippets, functions, or boilerplate based on user prompts or requirements
Example Use Cases
-------------------------

View file

@ -30,21 +30,16 @@ endpoints:
# Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way
llm_providers:
- name: OpenAI
provider_interface: openai
- name: openai/gpt-4o
access_key: $OPENAI_API_KEY
model: gpt-4o
model: openai/gpt-4o
default: true
- name: Mistral8x7b
provider_interface: openai
access_key: $MISTRAL_API_KEY
model: mistral-8x7b
- access_key: $MISTRAL_API_KEY
model: mistral/mistral-8x7b
- name: MistralLocal7b
provider_interface: openai
model: mistral-7b-instruct
endpoint: mistral_local
- model: mistral/mistral-7b-instruct
base_url: http://mistral_local
# provides a way to override default settings for the arch system
overrides:

View file

@ -0,0 +1,81 @@
endpoints:
app_server:
connect_timeout: 0.005s
endpoint: 127.0.0.1
port: 80
error_target:
endpoint: error_target_1
port: 80
mistral_local:
endpoint: 127.0.0.1
port: 8001
listeners:
egress_traffic:
address: 0.0.0.0
message_format: openai
port: 12000
timeout: 5s
ingress_traffic:
address: 0.0.0.0
message_format: openai
port: 10000
timeout: 5s
llm_providers:
- access_key: $OPENAI_API_KEY
default: true
model: gpt-4o
name: openai/gpt-4o
provider_interface: openai
- access_key: $MISTRAL_API_KEY
model: mistral-8x7b
name: mistral/mistral-8x7b
provider_interface: mistral
- base_url: http://mistral_local
endpoint: mistral_local
model: mistral-7b-instruct
name: mistral/mistral-7b-instruct
port: 80
protocol: http
provider_interface: mistral
overrides:
prompt_target_intent_matching_threshold: 0.6
prompt_guards:
input_guards:
jailbreak:
on_exception:
message: Looks like you're curious about my abilities, but I can only provide
assistance within my programmed parameters.
prompt_targets:
- auto_llm_dispatch_on_response: true
default: true
description: handel all scenarios that are question and answer in nature. Like summarization,
information extraction, etc.
endpoint:
http_method: POST
name: app_server
path: /agent/summary
name: information_extraction
system_prompt: You are a helpful information extraction assistant. Use the information
that is provided to you.
- description: Reboot a specific network device
endpoint:
name: app_server
path: /agent/action
name: reboot_network_device
parameters:
- description: Identifier of the network device to reboot.
name: device_id
required: true
type: str
- default: false
description: Confirmation flag to proceed with reboot.
enum:
- true
- false
name: confirmation
type: bool
system_prompt: You are a network assistant that just offers facts; not advice on manufacturers
or purchasing decisions.
tracing:
sampling_rate: 0.1
version: v0.1

View file

@ -4,6 +4,7 @@
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "model server",
"type": "debugpy",

7
model_server/.vscode/settings.json vendored Normal file
View file

@ -0,0 +1,7 @@
{
"python.testing.pytestArgs": [
"."
],
"python.testing.unittestEnabled": false,
"python.testing.pytestEnabled": true
}

15
tests/archgw/.vscode/launch.json vendored Normal file
View file

@ -0,0 +1,15 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "Python Debugger: Current File",
"type": "debugpy",
"request": "launch",
"program": "${file}",
"console": "integratedTerminal"
}
]
}

7
tests/archgw/.vscode/settings.json vendored Normal file
View file

@ -0,0 +1,7 @@
{
"python.testing.pytestArgs": [
"."
],
"python.testing.unittestEnabled": false,
"python.testing.pytestEnabled": true
}

View file

@ -13,21 +13,15 @@ endpoints:
connect_timeout: 0.005s
llm_providers:
- name: gpt-4o-mini
access_key: $OPENAI_API_KEY
provider_interface: openai
model: gpt-4o-mini
- access_key: $OPENAI_API_KEY
model: openai/gpt-4o-mini
default: true
- name: gpt-3.5-turbo-0125
access_key: $OPENAI_API_KEY
provider_interface: openai
model: gpt-3.5-turbo-0125
- access_key: $OPENAI_API_KEY
model: openai/gpt-3.5-turbo-0125
- name: gpt-4o
access_key: $OPENAI_API_KEY
provider_interface: openai
model: gpt-4o
- access_key: $OPENAI_API_KEY
model: openai/gpt-4o
system_prompt: |
You are a helpful assistant.

15
tests/e2e/.vscode/launch.json vendored Normal file
View file

@ -0,0 +1,15 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "Python Debugger: Current File",
"type": "debugpy",
"request": "launch",
"program": "${file}",
"console": "integratedTerminal"
}
]
}

7
tests/e2e/.vscode/settings.json vendored Normal file
View file

@ -0,0 +1,7 @@
{
"python.testing.pytestArgs": [
"."
],
"python.testing.unittestEnabled": false,
"python.testing.pytestEnabled": true
}

15
tests/modelserver/.vscode/launch.json vendored Normal file
View file

@ -0,0 +1,15 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "Python Debugger: Current File",
"type": "debugpy",
"request": "launch",
"program": "${file}",
"console": "integratedTerminal"
}
]
}

View file

@ -0,0 +1,7 @@
{
"python.testing.pytestArgs": [
"."
],
"python.testing.unittestEnabled": false,
"python.testing.pytestEnabled": true
}