diff --git a/.github/workflows/arch_tools_tests.yml b/.github/workflows/arch_tools_tests.yml new file mode 100644 index 00000000..da845b20 --- /dev/null +++ b/.github/workflows/arch_tools_tests.yml @@ -0,0 +1,40 @@ +name: arch tools tests + +permissions: + contents: read + +on: + push: + branches: + - main + pull_request: + +jobs: + arch_tools_tests: + runs-on: ubuntu-latest-m + defaults: + run: + working-directory: ./arch/tools + + steps: + - name: Checkout code + uses: actions/checkout@v3 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: "3.12" + + - name: install poetry + run: | + export POETRY_VERSION=1.8.5 + curl -sSL https://install.python-poetry.org | python3 - + export PATH="$HOME/.local/bin:$PATH" + + - name: install arch tools + run: | + poetry install + + - name: run tests + run: | + poetry run pytest diff --git a/.gitignore b/.gitignore index dc20274c..f155cd1a 100644 --- a/.gitignore +++ b/.gitignore @@ -102,13 +102,10 @@ venv.bak/ # mypy .mypy_cache/ -# VSCode stuff: -.vscode/ - # MacOS Metadata *.DS_Store - +*.yaml_rendered # ========================================= diff --git a/README.md b/README.md index d87b2737..836f5315 100644 --- a/README.md +++ b/README.md @@ -104,10 +104,8 @@ listeners: timeout: 30s llm_providers: - - name: gpt-4o - access_key: $OPENAI_API_KEY - provider: openai - model: gpt-4o + - access_key: $OPENAI_API_KEY + model: openai/gpt-4o system_prompt: | You are a helpful assistant. @@ -204,16 +202,12 @@ listeners: timeout: 30s llm_providers: - - name: gpt-4o - access_key: $OPENAI_API_KEY - provider: openai - model: gpt-4o + - access_key: $OPENAI_API_KEY + model: openai/gpt-4o default: true - - name: mistral-3b - access_key: $MISTRAL_API_KEY - provider: openai - model: mistral-3b-latest + - access_key: $MISTRAL_API_KEY + model: mistral/mistral-3b-latest ``` #### Preference-based Routing @@ -230,17 +224,18 @@ listeners: timeout: 30s llm_providers: - - name: code_generation + - model: openai/gpt-4.1 access_key: $OPENAI_API_KEY - provider_interface: openai - model: gpt-4.1 - usage: generating new code snippets, functions, or boilerplate based on user prompts or requirements + default: true + routing_preferences: + - name: code generation + description: generating new code snippets, functions, or boilerplate based on user prompts or requirements - - name: code_understanding - provider_interface: openai + - model: openai/gpt-4o-mini access_key: $OPENAI_API_KEY - model: gpt-4o-mini - usage: understand and explain existing code snippets, functions, or libraries + routing_preferences: + - name: code understanding + description: understand and explain existing code snippets, functions, or libraries ``` Arch uses a lightweight 1.5B autoregressive model to map prompts (and conversation context) to these policies. This approach adapts to intent drift, supports multi-turn conversations, and avoids the brittleness of embedding-based classifiers or manual if/else chains. No retraining is required when adding new models or updating policies — routing is governed entirely by human-readable rules. You can learn more about the design, benchmarks, and methodology behind preference-based routing in our paper: diff --git a/arch/arch_config_schema.yaml b/arch/arch_config_schema.yaml index 4212674c..9e9abac8 100644 --- a/arch/arch_config_schema.yaml +++ b/arch/arch_config_schema.yaml @@ -66,11 +66,16 @@ properties: properties: name: type: string - # provider field is deprecated, use provider_interface instead - provider: + access_key: + type: string + model: + type: string + default: + type: boolean + base_url: + type: string + http_host: type: string - enum: - - openai provider_interface: type: string enum: @@ -81,29 +86,22 @@ properties: - mistral - openai - gemini - access_key: - type: string - model: - type: string - default: - type: boolean - # endpoint field is deprecated, use base_url instead - endpoint: - type: string - base_url: - type: string - protocol: - type: string - enum: - - http - - https - http_host: - type: string - usage: - type: string + routing_preferences: + type: array + items: + type: object + properties: + name: + type: string + description: + type: string + additionalProperties: false + required: + - name + - description additionalProperties: false required: - - name + - model overrides: type: object properties: diff --git a/arch/tools/cli/config_generator.py b/arch/tools/cli/config_generator.py index 4f4249fb..8346acc0 100644 --- a/arch/tools/cli/config_generator.py +++ b/arch/tools/cli/config_generator.py @@ -5,16 +5,16 @@ import yaml from jsonschema import validate from urllib.parse import urlparse -ENVOY_CONFIG_TEMPLATE_FILE = os.getenv( - "ENVOY_CONFIG_TEMPLATE_FILE", "envoy.template.yaml" -) -ARCH_CONFIG_FILE = os.getenv("ARCH_CONFIG_FILE", "/app/arch_config.yaml") -ENVOY_CONFIG_FILE_RENDERED = os.getenv( - "ENVOY_CONFIG_FILE_RENDERED", "/etc/envoy/envoy.yaml" -) -ARCH_CONFIG_SCHEMA_FILE = os.getenv( - "ARCH_CONFIG_SCHEMA_FILE", "arch_config_schema.yaml" -) + +SUPPORTED_PROVIDERS = [ + "arch", + "claude", + "deepseek", + "groq", + "mistral", + "openai", + "gemini", +] def get_endpoint_and_port(endpoint, protocol): @@ -32,8 +32,22 @@ def get_endpoint_and_port(endpoint, protocol): def validate_and_render_schema(): - env = Environment(loader=FileSystemLoader("./")) - template = env.get_template("envoy.template.yaml") + ENVOY_CONFIG_TEMPLATE_FILE = os.getenv( + "ENVOY_CONFIG_TEMPLATE_FILE", "envoy.template.yaml" + ) + ARCH_CONFIG_FILE = os.getenv("ARCH_CONFIG_FILE", "/app/arch_config.yaml") + ARCH_CONFIG_FILE_RENDERED = os.getenv( + "ARCH_CONFIG_FILE_RENDERED", "/app/arch_config_rendered.yaml" + ) + ENVOY_CONFIG_FILE_RENDERED = os.getenv( + "ENVOY_CONFIG_FILE_RENDERED", "/etc/envoy/envoy.yaml" + ) + ARCH_CONFIG_SCHEMA_FILE = os.getenv( + "ARCH_CONFIG_SCHEMA_FILE", "arch_config_schema.yaml" + ) + + env = Environment(loader=FileSystemLoader(os.getenv("TEMPLATE_ROOT", "./"))) + template = env.get_template(ENVOY_CONFIG_TEMPLATE_FILE) try: validate_prompt_config(ARCH_CONFIG_FILE, ARCH_CONFIG_SCHEMA_FILE) @@ -82,6 +96,8 @@ def validate_and_render_schema(): updated_llm_providers = [] llm_provider_name_set = set() llms_with_usage = [] + model_name_keys = set() + model_usage_name_keys = set() for llm_provider in config_yaml["llm_providers"]: if llm_provider.get("usage", None): llms_with_usage.append(llm_provider["name"]) @@ -89,10 +105,52 @@ def validate_and_render_schema(): raise Exception( f"Duplicate llm_provider name {llm_provider.get('name')}, please provide unique name for each llm_provider" ) - if llm_provider.get("name") is None: + + model_name = llm_provider.get("model") + if model_name in model_name_keys: raise Exception( - f"llm_provider name is required, please provide name for llm_provider" + f"Duplicate model name {model_name}, please provide unique model name for each llm_provider" ) + model_name_keys.add(model_name) + if llm_provider.get("name") is None: + llm_provider["name"] = model_name + + model_name_tokens = model_name.split("/") + if len(model_name_tokens) < 2: + raise Exception( + f"Invalid model name {model_name}. Please provide model name in the format /." + ) + provider = model_name_tokens[0] + model_id = "/".join(model_name_tokens[1:]) + if provider not in SUPPORTED_PROVIDERS: + if ( + llm_provider.get("base_url", None) is None + or llm_provider.get("provider_interface", None) is None + ): + raise Exception( + f"Must provide base_url and provider_interface for unsupported provider {provider} for model {model_name}. Supported providers are: {', '.join(SUPPORTED_PROVIDERS)}" + ) + provider = llm_provider.get("provider_interface", None) + elif llm_provider.get("provider_interface", None) is not None: + raise Exception( + f"Please provide provider interface as part of model name {model_name} using the format /. For example, use 'openai/gpt-3.5-turbo' instead of 'gpt-3.5-turbo' " + ) + + if model_id in model_name_keys: + raise Exception( + f"Duplicate model_id {model_id}, please provide unique model_id for each llm_provider" + ) + model_name_keys.add(model_id) + + for routing_preference in llm_provider.get("routing_preferences", []): + if routing_preference.get("name") in model_usage_name_keys: + raise Exception( + f"Duplicate routing preference name \"{routing_preference.get('name')}\", please provide unique name for each routing preference" + ) + model_usage_name_keys.add(routing_preference.get("name")) + + llm_provider["model"] = model_id + llm_provider["provider_interface"] = provider llm_provider_name_set.add(llm_provider.get("name")) provider = None if llm_provider.get("provider") and llm_provider.get("provider_interface"): @@ -105,21 +163,14 @@ def validate_and_render_schema(): del llm_provider["provider"] updated_llm_providers.append(llm_provider) - if llm_provider.get("endpoint") and llm_provider.get("base_url"): - raise Exception("Please provide either endpoint or base_url, not both") - - if llm_provider.get("endpoint", None): - endpoint = llm_provider["endpoint"] - protocol = llm_provider.get("protocol", "http") - llm_provider["endpoint"], llm_provider["port"] = get_endpoint_and_port( - endpoint, protocol - ) - llms_with_endpoint.append(llm_provider) - elif llm_provider.get("base_url", None): + if llm_provider.get("base_url", None): base_url = llm_provider["base_url"] urlparse_result = urlparse(base_url) - if llm_provider.get("port"): - raise Exception("Please provider port in base_url") + url_path = urlparse_result.path + if url_path and url_path != "/": + raise Exception( + f"Please provide base_url without path, got {base_url}. Use base_url like 'http://example.com' instead of 'http://example.com/path'." + ) if urlparse_result.scheme == "" or urlparse_result.scheme not in [ "http", "https", @@ -140,7 +191,7 @@ def validate_and_render_schema(): llm_provider["protocol"] = protocol llms_with_endpoint.append(llm_provider) - if len(llms_with_usage) > 0: + if len(model_usage_name_keys) > 0: routing_llm_provider = config_yaml.get("routing", {}).get("llm_provider", None) if routing_llm_provider and routing_llm_provider not in llm_provider_name_set: raise Exception( @@ -198,6 +249,7 @@ def validate_and_render_schema(): agent_orchestrator = list(endpoints.keys())[0] print("agent_orchestrator: ", agent_orchestrator) + data = { "prompt_gateway_listener": prompt_gateway_listener, "llm_gateway_listener": llm_gateway_listener, @@ -216,6 +268,9 @@ def validate_and_render_schema(): with open(ENVOY_CONFIG_FILE_RENDERED, "w") as file: file.write(rendered) + with open(ARCH_CONFIG_FILE_RENDERED, "w") as file: + file.write(arch_config_string) + def validate_prompt_config(arch_config_file, arch_config_schema_file): with open(arch_config_file, "r") as file: @@ -231,7 +286,7 @@ def validate_prompt_config(arch_config_file, arch_config_schema_file): validate(config_yaml, config_schema_yaml) except Exception as e: print( - f"Error validating arch_config file: {arch_config_file}, schema file: {arch_config_schema_file}, error: {e.message}" + f"Error validating arch_config file: {arch_config_file}, schema file: {arch_config_schema_file}, error: {e}" ) raise e diff --git a/arch/tools/poetry.lock b/arch/tools/poetry.lock index e530591c..1f55d718 100644 --- a/arch/tools/poetry.lock +++ b/arch/tools/poetry.lock @@ -57,6 +57,34 @@ files = [ {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, ] +[[package]] +name = "exceptiongroup" +version = "1.3.0" +description = "Backport of PEP 654 (exception groups)" +optional = false +python-versions = ">=3.7" +files = [ + {file = "exceptiongroup-1.3.0-py3-none-any.whl", hash = "sha256:4d111e6e0c13d0644cad6ddaa7ed0261a0b36971f6d23e7ec9b4b9097da78a10"}, + {file = "exceptiongroup-1.3.0.tar.gz", hash = "sha256:b241f5885f560bc56a59ee63ca4c6a8bfa46ae4ad651af316d4e81817bb9fd88"}, +] + +[package.dependencies] +typing-extensions = {version = ">=4.6.0", markers = "python_version < \"3.13\""} + +[package.extras] +test = ["pytest (>=6)"] + +[[package]] +name = "iniconfig" +version = "2.1.0" +description = "brain-dead simple config-ini parsing" +optional = false +python-versions = ">=3.8" +files = [ + {file = "iniconfig-2.1.0-py3-none-any.whl", hash = "sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760"}, + {file = "iniconfig-2.1.0.tar.gz", hash = "sha256:3abbd2e30b36733fee78f9c7f7308f2d0050e88f0087fd25c2645f63c773e1c7"}, +] + [[package]] name = "jinja2" version = "3.1.6" @@ -179,6 +207,69 @@ files = [ {file = "markupsafe-3.0.2.tar.gz", hash = "sha256:ee55d3edf80167e48ea11a923c7386f4669df67d7994554387f84e7d8b0a2bf0"}, ] +[[package]] +name = "packaging" +version = "25.0" +description = "Core utilities for Python packages" +optional = false +python-versions = ">=3.8" +files = [ + {file = "packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484"}, + {file = "packaging-25.0.tar.gz", hash = "sha256:d443872c98d677bf60f6a1f2f8c1cb748e8fe762d2bf9d3148b5599295b0fc4f"}, +] + +[[package]] +name = "pluggy" +version = "1.6.0" +description = "plugin and hook calling mechanisms for python" +optional = false +python-versions = ">=3.9" +files = [ + {file = "pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746"}, + {file = "pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3"}, +] + +[package.extras] +dev = ["pre-commit", "tox"] +testing = ["coverage", "pytest", "pytest-benchmark"] + +[[package]] +name = "pygments" +version = "2.19.2" +description = "Pygments is a syntax highlighting package written in Python." +optional = false +python-versions = ">=3.8" +files = [ + {file = "pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b"}, + {file = "pygments-2.19.2.tar.gz", hash = "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887"}, +] + +[package.extras] +windows-terminal = ["colorama (>=0.4.6)"] + +[[package]] +name = "pytest" +version = "8.4.1" +description = "pytest: simple powerful testing with Python" +optional = false +python-versions = ">=3.9" +files = [ + {file = "pytest-8.4.1-py3-none-any.whl", hash = "sha256:539c70ba6fcead8e78eebbf1115e8b589e7565830d7d006a8723f19ac8a0afb7"}, + {file = "pytest-8.4.1.tar.gz", hash = "sha256:7c67fd69174877359ed9371ec3af8a3d2b04741818c51e5e99cc1742251fa93c"}, +] + +[package.dependencies] +colorama = {version = ">=0.4", markers = "sys_platform == \"win32\""} +exceptiongroup = {version = ">=1", markers = "python_version < \"3.11\""} +iniconfig = ">=1" +packaging = ">=20" +pluggy = ">=1.5,<2" +pygments = ">=2.7.2" +tomli = {version = ">=1", markers = "python_version < \"3.11\""} + +[package.extras] +dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "requests", "setuptools", "xmlschema"] + [[package]] name = "pyyaml" version = "6.0.2" @@ -430,6 +521,47 @@ enabler = ["pytest-enabler (>=2.2)"] test = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "jaraco.test (>=5.5)", "packaging (>=24.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.*)", "pytest-home (>=0.5)", "pytest-perf", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel (>=0.44.0)"] type = ["importlib-metadata (>=7.0.2)", "jaraco.develop (>=7.21)", "mypy (>=1.12,<1.14)", "pytest-mypy"] +[[package]] +name = "tomli" +version = "2.2.1" +description = "A lil' TOML parser" +optional = false +python-versions = ">=3.8" +files = [ + {file = "tomli-2.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678e4fa69e4575eb77d103de3df8a895e1591b48e740211bd1067378c69e8249"}, + {file = "tomli-2.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6"}, + {file = "tomli-2.2.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ece47d672db52ac607a3d9599a9d48dcb2f2f735c6c2d1f34130085bb12b112a"}, + {file = "tomli-2.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6972ca9c9cc9f0acaa56a8ca1ff51e7af152a9f87fb64623e31d5c83700080ee"}, + {file = "tomli-2.2.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c954d2250168d28797dd4e3ac5cf812a406cd5a92674ee4c8f123c889786aa8e"}, + {file = "tomli-2.2.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8dd28b3e155b80f4d54beb40a441d366adcfe740969820caf156c019fb5c7ec4"}, + {file = "tomli-2.2.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:e59e304978767a54663af13c07b3d1af22ddee3bb2fb0618ca1593e4f593a106"}, + {file = "tomli-2.2.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:33580bccab0338d00994d7f16f4c4ec25b776af3ffaac1ed74e0b3fc95e885a8"}, + {file = "tomli-2.2.1-cp311-cp311-win32.whl", hash = "sha256:465af0e0875402f1d226519c9904f37254b3045fc5084697cefb9bdde1ff99ff"}, + {file = "tomli-2.2.1-cp311-cp311-win_amd64.whl", hash = "sha256:2d0f2fdd22b02c6d81637a3c95f8cd77f995846af7414c5c4b8d0545afa1bc4b"}, + {file = "tomli-2.2.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:4a8f6e44de52d5e6c657c9fe83b562f5f4256d8ebbfe4ff922c495620a7f6cea"}, + {file = "tomli-2.2.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8d57ca8095a641b8237d5b079147646153d22552f1c637fd3ba7f4b0b29167a8"}, + {file = "tomli-2.2.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4e340144ad7ae1533cb897d406382b4b6fede8890a03738ff1683af800d54192"}, + {file = "tomli-2.2.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:db2b95f9de79181805df90bedc5a5ab4c165e6ec3fe99f970d0e302f384ad222"}, + {file = "tomli-2.2.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:40741994320b232529c802f8bc86da4e1aa9f413db394617b9a256ae0f9a7f77"}, + {file = "tomli-2.2.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:400e720fe168c0f8521520190686ef8ef033fb19fc493da09779e592861b78c6"}, + {file = "tomli-2.2.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:02abe224de6ae62c19f090f68da4e27b10af2b93213d36cf44e6e1c5abd19fdd"}, + {file = "tomli-2.2.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b82ebccc8c8a36f2094e969560a1b836758481f3dc360ce9a3277c65f374285e"}, + {file = "tomli-2.2.1-cp312-cp312-win32.whl", hash = "sha256:889f80ef92701b9dbb224e49ec87c645ce5df3fa2cc548664eb8a25e03127a98"}, + {file = "tomli-2.2.1-cp312-cp312-win_amd64.whl", hash = "sha256:7fc04e92e1d624a4a63c76474610238576942d6b8950a2d7f908a340494e67e4"}, + {file = "tomli-2.2.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f4039b9cbc3048b2416cc57ab3bda989a6fcf9b36cf8937f01a6e731b64f80d7"}, + {file = "tomli-2.2.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:286f0ca2ffeeb5b9bd4fcc8d6c330534323ec51b2f52da063b11c502da16f30c"}, + {file = "tomli-2.2.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a92ef1a44547e894e2a17d24e7557a5e85a9e1d0048b0b5e7541f76c5032cb13"}, + {file = "tomli-2.2.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9316dc65bed1684c9a98ee68759ceaed29d229e985297003e494aa825ebb0281"}, + {file = "tomli-2.2.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e85e99945e688e32d5a35c1ff38ed0b3f41f43fad8df0bdf79f72b2ba7bc5272"}, + {file = "tomli-2.2.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ac065718db92ca818f8d6141b5f66369833d4a80a9d74435a268c52bdfa73140"}, + {file = "tomli-2.2.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:d920f33822747519673ee656a4b6ac33e382eca9d331c87770faa3eef562aeb2"}, + {file = "tomli-2.2.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a198f10c4d1b1375d7687bc25294306e551bf1abfa4eace6650070a5c1ae2744"}, + {file = "tomli-2.2.1-cp313-cp313-win32.whl", hash = "sha256:d3f5614314d758649ab2ab3a62d4f2004c825922f9e370b29416484086b264ec"}, + {file = "tomli-2.2.1-cp313-cp313-win_amd64.whl", hash = "sha256:a38aa0308e754b0e3c67e344754dff64999ff9b513e691d0e786265c93583c69"}, + {file = "tomli-2.2.1-py3-none-any.whl", hash = "sha256:cb55c73c5f4408779d0cf3eef9f762b9c9f147a77de7b258bef0a5628adc85cc"}, + {file = "tomli-2.2.1.tar.gz", hash = "sha256:cd45e1dc79c835ce60f7404ec8119f2eb06d38b1deba146f07ced3bbc44505ff"}, +] + [[package]] name = "typing-extensions" version = "4.14.1" @@ -444,4 +576,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "7d17c2f24f8ad4503a5248c3f7e1a74f458a5ea55a2fb63db382cb6abe6d52df" +content-hash = "83d32fa807f6c7058ecbfc43b777c4d4c637695025cf774ff10532bff8f6712b" diff --git a/arch/tools/pyproject.toml b/arch/tools/pyproject.toml index af89fc31..dda97235 100644 --- a/arch/tools/pyproject.toml +++ b/arch/tools/pyproject.toml @@ -20,6 +20,13 @@ pyyaml = "^6.0.2" [tool.poetry.scripts] archgw = "cli.main:main" +[tool.poetry.group.dev.dependencies] +pytest = "^8.4.1" + [build-system] requires = ["poetry-core>=1.0.0"] build-backend = "poetry.core.masonry.api" + + +[tool.pytest.ini_options] +addopts = ["-v"] diff --git a/arch/tools/test/fastapi_test.py b/arch/tools/test/fastapi_test.py deleted file mode 100644 index bedac8bd..00000000 --- a/arch/tools/test/fastapi_test.py +++ /dev/null @@ -1,45 +0,0 @@ -from fastapi import FastAPI -from pydantic import BaseModel -from typing import List, Dict, Set - -app = FastAPI() - - -class User(BaseModel): - name: str = Field( - "John Doe", description="The name of the user." - ) # Default value and description for name - location: int = None - age: int = Field( - 30, description="The age of the user." - ) # Default value and description for age - tags: Set[str] = Field( - default_factory=set, description="A set of tags associated with the user." - ) # Default empty set and description for tags - metadata: Dict[str, int] = Field( - default_factory=dict, - description="A dictionary storing metadata about the user, with string keys and integer values.", - ) # Default empty dict and description for metadata - - -@app.get("/agent/default") -async def default(request: User): - """ - This endpoint handles information extraction queries. - It can summarize, extract details, and perform various other information-related tasks. - """ - return {"info": f"Query: {request.name}, Count: {request.age}"} - - -@app.post("/agent/action") -async def reboot_network_device(device_id: str, confirmation: str): - """ - This endpoint reboots a network device based on the device ID. - Confirmation is required to proceed with the reboot. - - Args: - device_id: The device_id that you want to reboot. - confirmation: The confirmation that the user wants to reboot. - metadata: Ignore this parameter - """ - return {"status": "Device rebooted", "device_id": device_id} diff --git a/arch/tools/test/fastapi_test_prompt_targets.yml b/arch/tools/test/fastapi_test_prompt_targets.yml deleted file mode 100644 index 7fb9d118..00000000 --- a/arch/tools/test/fastapi_test_prompt_targets.yml +++ /dev/null @@ -1,33 +0,0 @@ -prompt_targets: -- name: default - path: /agent/default - description: "This endpoint handles information extraction queries.\n It can\ - \ summarize, extract details, and perform various other information-related tasks." - parameters: - - name: query - type: str - description: Field from Pydantic model DefaultRequest - default_value: null - required: false - - name: count - type: int - description: Field from Pydantic model DefaultRequest - default_value: null - required: false - type: default - auto-llm-dispatch-on-response: true -- name: reboot_network_device - path: /agent/action - description: "This endpoint reboots a network device based on the device ID.\n \ - \ Confirmation is required to proceed with the reboot." - parameters: - - name: device_id - type: str - description: Description for device_id - default_value: '' - required: true - - name: confirmation - type: int - description: Description for confirmation - default_value: '' - required: true diff --git a/arch/tools/test/test_cli.py b/arch/tools/test/test_cli.py deleted file mode 100644 index 0f7cb56c..00000000 --- a/arch/tools/test/test_cli.py +++ /dev/null @@ -1,42 +0,0 @@ -import pytest -from click.testing import CliRunner -from tools.cli.main import main # Import your CLI's entry point -import importlib.metadata - - -def get_version(): - """Helper function to fetch the version.""" - try: - version = importlib.metadata.version("archgw") - return version - except importlib.metadata.PackageNotFoundError: - return None - - -@pytest.fixture -def runner(): - """Fixture to create a Click test runner.""" - return CliRunner() - - -def test_version_option(runner): - """Test the --version option.""" - result = runner.invoke(main, ["--version"]) - assert result.exit_code == 0 - expected_version = get_version() - assert f"archgw cli version: {expected_version}" in result.output - - -def test_default_behavior(runner): - """Test the default behavior when no command is provided.""" - result = runner.invoke(main) - assert result.exit_code == 0 - assert "Arch (The Intelligent Prompt Gateway) CLI" in result.output - assert "Usage:" in result.output # Ensure help text is shown - - -def test_invalid_command(runner): - """Test that an invalid command returns an appropriate error message.""" - result = runner.invoke(main, ["invalid_command"]) - assert result.exit_code != 0 # Non-zero exit code for invalid command - assert "Error: No such command 'invalid_command'" in result.output diff --git a/arch/tools/test/test_config_generator.py b/arch/tools/test/test_config_generator.py new file mode 100644 index 00000000..0e4f8446 --- /dev/null +++ b/arch/tools/test/test_config_generator.py @@ -0,0 +1,272 @@ +import pytest +from unittest import mock +import sys +from cli.config_generator import validate_and_render_schema + +# Patch sys.path to allow import from cli/ +import os + +sys.path.insert( + 0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "cli")) +) + + +@pytest.fixture(autouse=True) +def cleanup_env(monkeypatch): + # Clean up environment variables and mocks after each test + yield + monkeypatch.undo() + + +def test_validate_and_render_happy_path(monkeypatch): + monkeypatch.setenv("ARCH_CONFIG_FILE", "fake_arch_config.yaml") + monkeypatch.setenv("ARCH_CONFIG_SCHEMA_FILE", "fake_arch_config_schema.yaml") + monkeypatch.setenv("ENVOY_CONFIG_TEMPLATE_FILE", "./envoy.template.yaml") + monkeypatch.setenv("ARCH_CONFIG_FILE_RENDERED", "fake_arch_config_rendered.yaml") + monkeypatch.setenv("ENVOY_CONFIG_FILE_RENDERED", "fake_envoy.yaml") + monkeypatch.setenv("TEMPLATE_ROOT", "../") + + arch_config = """ +version: v0.1.0 + +listeners: + egress_traffic: + address: 0.0.0.0 + port: 12000 + message_format: openai + timeout: 30s + +llm_providers: + + - model: openai/gpt-4o-mini + access_key: $OPENAI_API_KEY + default: true + + - model: openai/gpt-4o + access_key: $OPENAI_API_KEY + routing_preferences: + - name: code understanding + description: understand and explain existing code snippets, functions, or libraries + + - model: openai/gpt-4.1 + access_key: $OPENAI_API_KEY + routing_preferences: + - name: code generation + description: generating new code snippets, functions, or boilerplate based on user prompts or requirements + +tracing: + random_sampling: 100 +""" + arch_config_schema = "" + with open("../arch_config_schema.yaml", "r") as file: + arch_config_schema = file.read() + + m_open = mock.mock_open() + # Provide enough file handles for all open() calls in validate_and_render_schema + m_open.side_effect = [ + mock.mock_open(read_data="").return_value, + mock.mock_open(read_data=arch_config).return_value, # ARCH_CONFIG_FILE + mock.mock_open( + read_data=arch_config_schema + ).return_value, # ARCH_CONFIG_SCHEMA_FILE + mock.mock_open(read_data=arch_config).return_value, # ARCH_CONFIG_FILE + mock.mock_open( + read_data=arch_config_schema + ).return_value, # ARCH_CONFIG_SCHEMA_FILE + mock.mock_open().return_value, # ENVOY_CONFIG_FILE_RENDERED (write) + mock.mock_open().return_value, # ARCH_CONFIG_FILE_RENDERED (write) + ] + with mock.patch("builtins.open", m_open): + with mock.patch("config_generator.Environment"): + validate_and_render_schema() + + +arch_config_test_cases = [ + { + "id": "duplicate_provider_name", + "expected_error": "Duplicate llm_provider name", + "arch_config": """ +version: v0.1.0 + +listeners: + egress_traffic: + address: 0.0.0.0 + port: 12000 + message_format: openai + timeout: 30s + +llm_providers: + + - name: test1 + model: openai/gpt-4o + access_key: $OPENAI_API_KEY + + - name: test1 + model: openai/gpt-4o + access_key: $OPENAI_API_KEY + +""", + }, + { + "id": "provider_interface_with_model_id", + "expected_error": "Please provide provider interface as part of model name", + "arch_config": """ +version: v0.1.0 + +listeners: + egress_traffic: + address: 0.0.0.0 + port: 12000 + message_format: openai + timeout: 30s + +llm_providers: + + - model: openai/gpt-4o + access_key: $OPENAI_API_KEY + provider_interface: openai + +""", + }, + { + "id": "duplicate_model_id", + "expected_error": "Duplicate model_id", + "arch_config": """ +version: v0.1.0 + +listeners: + egress_traffic: + address: 0.0.0.0 + port: 12000 + message_format: openai + timeout: 30s + +llm_providers: + + - model: openai/gpt-4o + access_key: $OPENAI_API_KEY + + - model: mistral/gpt-4o + +""", + }, + { + "id": "custom_provider_base_url", + "expected_error": "Must provide base_url and provider_interface", + "arch_config": """ +version: v0.1.0 + +listeners: + egress_traffic: + address: 0.0.0.0 + port: 12000 + message_format: openai + timeout: 30s + +llm_providers: + + - model: custom/gpt-4o + +""", + }, + { + "id": "base_url_no_prefix", + "expected_error": "Please provide base_url without path", + "arch_config": """ +version: v0.1.0 + +listeners: + egress_traffic: + address: 0.0.0.0 + port: 12000 + message_format: openai + timeout: 30s + +llm_providers: + + - model: custom/gpt-4o + base_url: "http://custom.com/test" + provider_interface: openai + +""", + }, + { + "id": "duplicate_routeing_preference_name", + "expected_error": "Duplicate routing preference name", + "arch_config": """ +version: v0.1.0 + +listeners: + egress_traffic: + address: 0.0.0.0 + port: 12000 + message_format: openai + timeout: 30s + +llm_providers: + + - model: openai/gpt-4o-mini + access_key: $OPENAI_API_KEY + default: true + + - model: openai/gpt-4o + access_key: $OPENAI_API_KEY + routing_preferences: + - name: code understanding + description: understand and explain existing code snippets, functions, or libraries + + - model: openai/gpt-4.1 + access_key: $OPENAI_API_KEY + routing_preferences: + - name: code understanding + description: generating new code snippets, functions, or boilerplate based on user prompts or requirements + +tracing: + random_sampling: 100 + +""", + }, +] + + +@pytest.mark.parametrize( + "arch_config_test_case", + arch_config_test_cases, + ids=[case["id"] for case in arch_config_test_cases], +) +def test_validate_and_render_schema_tests(monkeypatch, arch_config_test_case): + monkeypatch.setenv("ARCH_CONFIG_FILE", "fake_arch_config.yaml") + monkeypatch.setenv("ARCH_CONFIG_SCHEMA_FILE", "fake_arch_config_schema.yaml") + monkeypatch.setenv("ENVOY_CONFIG_TEMPLATE_FILE", "./envoy.template.yaml") + monkeypatch.setenv("ARCH_CONFIG_FILE_RENDERED", "fake_arch_config_rendered.yaml") + monkeypatch.setenv("ENVOY_CONFIG_FILE_RENDERED", "fake_envoy.yaml") + monkeypatch.setenv("TEMPLATE_ROOT", "../") + + arch_config = arch_config_test_case["arch_config"] + expected_error = arch_config_test_case["expected_error"] + test_id = arch_config_test_case["id"] + + arch_config_schema = "" + with open("../arch_config_schema.yaml", "r") as file: + arch_config_schema = file.read() + + m_open = mock.mock_open() + # Provide enough file handles for all open() calls in validate_and_render_schema + m_open.side_effect = [ + mock.mock_open(read_data="").return_value, + mock.mock_open(read_data=arch_config).return_value, # ARCH_CONFIG_FILE + mock.mock_open( + read_data=arch_config_schema + ).return_value, # ARCH_CONFIG_SCHEMA_FILE + mock.mock_open(read_data=arch_config).return_value, # ARCH_CONFIG_FILE + mock.mock_open( + read_data=arch_config_schema + ).return_value, # ARCH_CONFIG_SCHEMA_FILE + mock.mock_open().return_value, # ENVOY_CONFIG_FILE_RENDERED (write) + mock.mock_open().return_value, # ARCH_CONFIG_FILE_RENDERED (write) + ] + with mock.patch("builtins.open", m_open): + with mock.patch("config_generator.Environment"): + with pytest.raises(Exception) as excinfo: + validate_and_render_schema() + assert expected_error in str(excinfo.value) diff --git a/arch/validate_arch_config.sh b/arch/validate_arch_config.sh index a3822e90..493d1b2f 100644 --- a/arch/validate_arch_config.sh +++ b/arch/validate_arch_config.sh @@ -3,11 +3,22 @@ failed_files=() for file in $(find . -name arch_config.yaml -o -name arch_config_full_reference.yaml); do - echo "Validating $file..." - if ! docker run --rm -v "$(pwd)/$file:/app/arch_config.yaml:ro" --entrypoint /bin/sh katanemo/archgw:latest -c "python config_generator.py" 2>&1 > /dev/null ; then + echo "Validating ${file}..." + touch $(pwd)/${file}_rendered + if ! docker run --rm -v "$(pwd)/${file}:/app/arch_config.yaml:ro" -v "$(pwd)/${file}_rendered:/app/arch_config_rendered.yaml:rw" --entrypoint /bin/sh katanemo/archgw:latest -c "python config_generator.py" 2>&1 > /dev/null ; then echo "Validation failed for $file" failed_files+=("$file") fi + RENDERED_CHECKED_IN_FILE=$(echo $file | sed 's/\.yaml$/_rendered.yaml/') + if [ -f "$RENDERED_CHECKED_IN_FILE" ]; then + echo "Checking rendered file against checked-in version..." + if ! diff -q "${file}_rendered" "$RENDERED_CHECKED_IN_FILE" > /dev/null; then + echo "Rendered file ${file}_rendered does not match checked-in version ${RENDERED_CHECKED_IN_FILE}" + failed_files+=("${file}_rendered") + else + echo "Rendered file matches checked-in version." + fi + fi done # Print summary of failed files diff --git a/crates/.vscode/launch.json b/crates/.vscode/launch.json new file mode 100644 index 00000000..56a29b46 --- /dev/null +++ b/crates/.vscode/launch.json @@ -0,0 +1,21 @@ +{ + "version": "0.2.0", + "configurations": [ + { + "name": "Debug Brightstaff", + "type": "lldb", + "request": "launch", + "program": "${workspaceFolder}/target/debug/brightstaff", + "args": [], + "cwd": "${workspaceFolder}", + "stopOnEntry": false, + "sourceLanguages": ["rust"], + "env": { + "RUST_LOG": "debug", + "RUST_BACKTRACE": "1", + "ARCH_CONFIG_PATH_RENDERED": "../demos/use_cases/preference_based_routing/arch_config_rendered.yaml" + }, + "preLaunchTask": "rust: cargo build" + } + ] +} diff --git a/crates/.vscode/tasks.json b/crates/.vscode/tasks.json new file mode 100644 index 00000000..8d648bc7 --- /dev/null +++ b/crates/.vscode/tasks.json @@ -0,0 +1,21 @@ +{ + "version": "2.0.0", + "tasks": [ + { + "type": "cargo", + "command": "build", + "args": [ + "--bin", + "brightstaff" + ], + "problemMatcher": [ + "$rustc" + ], + "group": { + "kind": "build", + "isDefault": true + }, + "label": "rust: cargo build" + } + ] +} diff --git a/crates/brightstaff/src/handlers/chat_completions.rs b/crates/brightstaff/src/handlers/chat_completions.rs index 55f6d475..89c9ee13 100644 --- a/crates/brightstaff/src/handlers/chat_completions.rs +++ b/crates/brightstaff/src/handlers/chat_completions.rs @@ -12,7 +12,7 @@ use hyper::{Request, Response, StatusCode}; use tokio::sync::mpsc; use tokio_stream::wrappers::ReceiverStream; use tokio_stream::StreamExt; -use tracing::{debug, info, trace, warn}; +use tracing::{debug, info, warn}; use crate::router::llm_router::RouterService; @@ -81,8 +81,8 @@ pub async fn chat_completions( } } - trace!( - "arch-router request body: {}", + debug!( + "arch-router request received: {}", &serde_json::to_string(&chat_completion_request).unwrap() ); @@ -102,9 +102,9 @@ pub async fn chat_completions( .as_ref() .and_then(|s| serde_yaml::from_str(s).ok()); - debug!("usage preferences: {:?}", usage_preferences); + debug!("usage preferences from request: {:?}", usage_preferences); - let mut selected_llm = match router_service + let mut determined_route = match router_service .determine_route( &chat_completion_request.messages, trace_parent.clone(), @@ -121,14 +121,14 @@ pub async fn chat_completions( } }; - if selected_llm.is_none() { + if determined_route.is_none() { debug!("No LLM model selected, using default from request"); - selected_llm = Some(chat_completion_request.model.clone()); + determined_route = Some(chat_completion_request.model.clone()); } info!( "sending request to llm provider: {} with llm model: {:?}", - llm_provider_endpoint, selected_llm + llm_provider_endpoint, determined_route ); if let Some(trace_parent) = trace_parent { @@ -138,10 +138,10 @@ pub async fn chat_completions( ); } - if let Some(selected_llm) = selected_llm { + if let Some(selected_route) = determined_route { request_headers.insert( ARCH_PROVIDER_HINT_HEADER, - header::HeaderValue::from_str(&selected_llm).unwrap(), + header::HeaderValue::from_str(&selected_route).unwrap(), ); } diff --git a/crates/brightstaff/src/main.rs b/crates/brightstaff/src/main.rs index 05944a5f..4e4f18b7 100644 --- a/crates/brightstaff/src/main.rs +++ b/crates/brightstaff/src/main.rs @@ -44,9 +44,13 @@ async fn main() -> Result<(), Box> { let _tracer_provider = init_tracer(); let bind_address = env::var("BIND_ADDRESS").unwrap_or_else(|_| BIND_ADDRESS.to_string()); + info!( + "current working directory: {}", + env::current_dir().unwrap().display() + ); // loading arch_config.yaml file - let arch_config_path = - env::var("ARCH_CONFIG_PATH").unwrap_or_else(|_| "./arch_config.yaml".to_string()); + let arch_config_path = env::var("ARCH_CONFIG_PATH_RENDERED") + .unwrap_or_else(|_| "./arch_config_rendered.yaml".to_string()); info!("Loading arch_config.yaml from {}", arch_config_path); let config_contents = diff --git a/crates/brightstaff/src/router/llm_router.rs b/crates/brightstaff/src/router/llm_router.rs index d4173b01..c1320c66 100644 --- a/crates/brightstaff/src/router/llm_router.rs +++ b/crates/brightstaff/src/router/llm_router.rs @@ -1,7 +1,7 @@ -use std::{collections::HashMap, sync::Arc}; +use std::sync::Arc; use common::{ - configuration::{LlmProvider, LlmRoute, ModelUsagePreference}, + configuration::{LlmProvider, ModelUsagePreference, RoutingPreference}, consts::ARCH_PROVIDER_HINT_HEADER, }; use hermesllm::providers::openai::types::{ChatCompletionsResponse, ContentType, Message}; @@ -19,7 +19,6 @@ pub struct RouterService { router_model: Arc, routing_provider_name: String, llm_usage_defined: bool, - llm_provider_map: HashMap, } #[derive(Debug, Error)] @@ -45,11 +44,14 @@ impl RouterService { ) -> Self { let providers_with_usage = providers .iter() - .filter(|provider| provider.usage.is_some()) + .filter(|provider| provider.routing_preferences.is_some()) .cloned() .collect::>(); - let llm_routes: Vec = providers_with_usage.iter().map(LlmRoute::from).collect(); + let llm_routes: Vec = providers_with_usage + .iter() + .flat_map(|provider| provider.routing_preferences.clone().unwrap_or_default()) + .collect(); let router_model = Arc::new(router_model_v1::RouterModelV1::new( llm_routes, @@ -57,18 +59,12 @@ impl RouterService { router_model_v1::MAX_TOKEN_LEN, )); - let llm_provider_map: HashMap = providers - .into_iter() - .map(|provider| (provider.name.clone(), provider)) - .collect(); - RouterService { router_url, client: reqwest::Client::new(), router_model, routing_provider_name, llm_usage_defined: !providers_with_usage.is_empty(), - llm_provider_map, } } @@ -155,40 +151,21 @@ impl RouterService { if let Some(ContentType::Text(content)) = &chat_completion_response.choices[0].message.content { - let mut selected_model: Option = None; - if let Some(selected_llm_name) = self.router_model.parse_response(content)? { - if selected_llm_name != "other" { - if let Some(usage_preferences) = usage_preferences { - for usage in usage_preferences { - if usage.name == selected_llm_name { - selected_model = Some(usage.model); - break; - } - } - if selected_model.is_none() { - warn!( - "Selected LLM model not found in usage preferences: {}", - selected_llm_name - ); - } - } else if let Some(provider) = self.llm_provider_map.get(&selected_llm_name) { - selected_model = provider.model.clone(); - } else { - warn!( - "Selected LLM model not found in provider map: {}", - selected_llm_name - ); - } - } - } + let route_name = self.router_model.parse_response(content)?; info!( "router response: {}, selected_model: {:?}, response time: {}ms", content.replace("\n", "\\n"), - selected_model, + route_name, router_response_time.as_millis() ); - Ok(selected_model) + if let Some(ref route) = route_name { + if route == "other" { + return Ok(None); + } + } + + Ok(route_name) } else { Ok(None) } diff --git a/crates/brightstaff/src/router/router_model_v1.rs b/crates/brightstaff/src/router/router_model_v1.rs index e6ccd912..0dcefff6 100644 --- a/crates/brightstaff/src/router/router_model_v1.rs +++ b/crates/brightstaff/src/router/router_model_v1.rs @@ -1,5 +1,5 @@ use common::{ - configuration::{LlmRoute, ModelUsagePreference}, + configuration::{ModelUsagePreference, RoutingPreference}, consts::{SYSTEM_ROLE, TOOL_ROLE, USER_ROLE}, }; use hermesllm::providers::openai::types::{ChatCompletionsRequest, ContentType, Message}; @@ -36,7 +36,11 @@ pub struct RouterModelV1 { max_token_length: usize, } impl RouterModelV1 { - pub fn new(llm_routes: Vec, routing_model: String, max_token_length: usize) -> Self { + pub fn new( + llm_routes: Vec, + routing_model: String, + max_token_length: usize, + ) -> Self { let llm_route_json_str = serde_json::to_string(&llm_routes).unwrap_or_else(|_| "[]".to_string()); RouterModelV1 { @@ -138,9 +142,9 @@ impl RouterModel for RouterModelV1 { let llm_route_json = usage_preferences .as_ref() .map(|prefs| { - let llm_route: Vec = prefs + let llm_route: Vec = prefs .iter() - .map(|pref| LlmRoute { + .map(|pref| RoutingPreference { name: pref.name.clone(), description: pref.usage.clone().unwrap_or_default(), }) @@ -255,7 +259,7 @@ Based on your analysis, provide your response in the following JSON formats if y {"name": "Speech Recognition", "description": "Converting spoken language into written text"} ] "#; - let llm_routes = serde_json::from_str::>(routes_str).unwrap(); + let llm_routes = serde_json::from_str::>(routes_str).unwrap(); let routing_model = "test-model".to_string(); let router = RouterModelV1::new(llm_routes, routing_model.clone(), usize::MAX); @@ -314,7 +318,7 @@ Based on your analysis, provide your response in the following JSON formats if y {"name": "Speech Recognition", "description": "Converting spoken language into written text"} ] "#; - let llm_routes = serde_json::from_str::>(routes_str).unwrap(); + let llm_routes = serde_json::from_str::>(routes_str).unwrap(); let routing_model = "test-model".to_string(); let router = RouterModelV1::new(llm_routes, routing_model.clone(), usize::MAX); @@ -379,7 +383,7 @@ Based on your analysis, provide your response in the following JSON formats if y {"name": "Speech Recognition", "description": "Converting spoken language into written text"} ] "#; - let llm_routes = serde_json::from_str::>(routes_str).unwrap(); + let llm_routes = serde_json::from_str::>(routes_str).unwrap(); let routing_model = "test-model".to_string(); let router = RouterModelV1::new(llm_routes, routing_model.clone(), 235); @@ -440,7 +444,7 @@ Based on your analysis, provide your response in the following JSON formats if y {"name": "Speech Recognition", "description": "Converting spoken language into written text"} ] "#; - let llm_routes = serde_json::from_str::>(routes_str).unwrap(); + let llm_routes = serde_json::from_str::>(routes_str).unwrap(); let routing_model = "test-model".to_string(); let router = RouterModelV1::new(llm_routes, routing_model.clone(), 200); @@ -501,7 +505,7 @@ Based on your analysis, provide your response in the following JSON formats if y {"name": "Speech Recognition", "description": "Converting spoken language into written text"} ] "#; - let llm_routes = serde_json::from_str::>(routes_str).unwrap(); + let llm_routes = serde_json::from_str::>(routes_str).unwrap(); let routing_model = "test-model".to_string(); let router = RouterModelV1::new(llm_routes, routing_model.clone(), 230); @@ -569,7 +573,7 @@ Based on your analysis, provide your response in the following JSON formats if y {"name": "Speech Recognition", "description": "Converting spoken language into written text"} ] "#; - let llm_routes = serde_json::from_str::>(routes_str).unwrap(); + let llm_routes = serde_json::from_str::>(routes_str).unwrap(); let routing_model = "test-model".to_string(); let router = RouterModelV1::new(llm_routes, routing_model.clone(), usize::MAX); @@ -639,7 +643,7 @@ Based on your analysis, provide your response in the following JSON formats if y {"name": "Speech Recognition", "description": "Converting spoken language into written text"} ] "#; - let llm_routes = serde_json::from_str::>(routes_str).unwrap(); + let llm_routes = serde_json::from_str::>(routes_str).unwrap(); let routing_model = "test-model".to_string(); let router = RouterModelV1::new(llm_routes, routing_model.clone(), usize::MAX); @@ -716,7 +720,7 @@ Based on your analysis, provide your response in the following JSON formats if y {"name": "Speech Recognition", "description": "Converting spoken language into written text"} ] "#; - let llm_routes = serde_json::from_str::>(routes_str).unwrap(); + let llm_routes = serde_json::from_str::>(routes_str).unwrap(); let router = RouterModelV1::new(llm_routes, "test-model".to_string(), 2000); diff --git a/crates/common/src/configuration.rs b/crates/common/src/configuration.rs index d92f38fb..0693c09b 100644 --- a/crates/common/src/configuration.rs +++ b/crates/common/src/configuration.rs @@ -187,24 +187,11 @@ pub struct ModelUsagePreference { } #[derive(Debug, Clone, Serialize, Deserialize)] -pub struct LlmRoute { +pub struct RoutingPreference { pub name: String, pub description: String, } -impl From<&LlmProvider> for LlmRoute { - fn from(provider: &LlmProvider) -> Self { - Self { - name: provider.name.to_string(), - description: provider - .usage - .as_ref() - .cloned() - .unwrap_or_else(|| "No description available".to_string()), - } - } -} - #[derive(Debug, Clone, Serialize, Deserialize)] //TODO: use enum for model, but if there is a new model, we need to update the code pub struct LlmProvider { @@ -218,6 +205,7 @@ pub struct LlmProvider { pub port: Option, pub rate_limits: Option, pub usage: Option, + pub routing_preferences: Option>, } pub trait IntoModels { @@ -256,6 +244,7 @@ impl Default for LlmProvider { port: None, rate_limits: None, usage: None, + routing_preferences: None, } } } @@ -368,7 +357,7 @@ mod test { #[test] fn test_deserialize_configuration() { let ref_config = fs::read_to_string( - "../../docs/source/resources/includes/arch_config_full_reference.yaml", + "../../docs/source/resources/includes/arch_config_full_reference_rendered.yaml", ) .expect("reference config file not found"); @@ -429,7 +418,7 @@ mod test { #[test] fn test_tool_conversion() { let ref_config = fs::read_to_string( - "../../docs/source/resources/includes/arch_config_full_reference.yaml", + "../../docs/source/resources/includes/arch_config_full_reference_rendered.yaml", ) .expect("reference config file not found"); let config: super::Configuration = serde_yaml::from_str(&ref_config).unwrap(); diff --git a/crates/common/src/llm_providers.rs b/crates/common/src/llm_providers.rs index 8214f148..120be691 100644 --- a/crates/common/src/llm_providers.rs +++ b/crates/common/src/llm_providers.rs @@ -58,7 +58,16 @@ impl TryFrom> for LlmProviders { let name = llm_provider.name.clone(); if llm_providers .providers - .insert(name.clone(), llm_provider) + .insert(name.clone(), llm_provider.clone()) + .is_some() + { + return Err(LlmProvidersNewError::DuplicateName(name)); + } + + // also add model_id as key for provider lookup + if llm_providers + .providers + .insert(llm_provider.model.clone().unwrap(), llm_provider) .is_some() { return Err(LlmProvidersNewError::DuplicateName(name)); diff --git a/crates/llm_gateway/src/stream_context.rs b/crates/llm_gateway/src/stream_context.rs index 2fa29496..82b88509 100644 --- a/crates/llm_gateway/src/stream_context.rs +++ b/crates/llm_gateway/src/stream_context.rs @@ -113,16 +113,10 @@ impl StreamContext { } debug!( - "request received: llm provider hint: {}, selected llm: {}, model: {}", + "request received: llm provider hint: {}, selected provider: {}", self.get_http_request_header(ARCH_PROVIDER_HINT_HEADER) .unwrap_or_default(), - self.llm_provider.as_ref().unwrap().name, - self.llm_provider - .as_ref() - .unwrap() - .model - .as_ref() - .unwrap_or(&String::new()) + self.llm_provider.as_ref().unwrap().name ); } @@ -349,7 +343,7 @@ impl HttpContext for StreamContext { }; info!( - "on_http_request_body: provider: {}, model requested: {}, model selected: {}", + "on_http_request_body: provider: {}, model requested (in body): {}, model selected: {}", self.llm_provider().name, model_requested, model_name.unwrap_or(&"None".to_string()), diff --git a/crates/llm_gateway/tests/integration.rs b/crates/llm_gateway/tests/integration.rs index 80c5e5da..108ab1ce 100644 --- a/crates/llm_gateway/tests/integration.rs +++ b/crates/llm_gateway/tests/integration.rs @@ -30,7 +30,10 @@ fn request_headers_expectations(module: &mut Tester, http_context: i32) { Some("x-arch-llm-provider-hint"), ) .returning(None) - .expect_log(Some(LogLevel::Debug), Some("request received: llm provider hint: default, selected llm: open-ai-gpt-4, model: gpt-4")) + .expect_log( + Some(LogLevel::Debug), + Some("request received: llm provider hint: default, selected provider: open-ai-gpt-4"), + ) .expect_add_header_map_value( Some(MapType::HttpRequestHeaders), Some("x-arch-llm-provider"), @@ -263,7 +266,7 @@ fn llm_gateway_bad_request_to_open_ai_chat_completions() { .expect_get_buffer_bytes(Some(BufferType::HttpRequestBody)) .returning(Some(incomplete_chat_completions_request_body)) .expect_log(Some(LogLevel::Debug), None) - .expect_log(Some(LogLevel::Info), Some("on_http_request_body: provider: open-ai-gpt-4, model requested: gpt-1, model selected: gpt-4")) + .expect_log(Some(LogLevel::Info), Some("on_http_request_body: provider: open-ai-gpt-4, model requested (in body): gpt-1, model selected: gpt-4")) .expect_send_local_response( Some(StatusCode::BAD_REQUEST.as_u16().into()), None, @@ -429,7 +432,7 @@ fn llm_gateway_override_model_name() { .returning(Some(chat_completions_request_body)) // The actual call is not important in this test, we just need to grab the token_id .expect_log(Some(LogLevel::Debug), None) - .expect_log(Some(LogLevel::Info), Some("on_http_request_body: provider: open-ai-gpt-4, model requested: gpt-1, model selected: gpt-4")) + .expect_log(Some(LogLevel::Info), Some("on_http_request_body: provider: open-ai-gpt-4, model requested (in body): gpt-1, model selected: gpt-4")) .expect_log(Some(LogLevel::Debug), None) .expect_log(Some(LogLevel::Debug), None) .expect_metric_record("input_sequence_length", 29) @@ -478,7 +481,7 @@ fn llm_gateway_override_use_default_model() { // The actual call is not important in this test, we just need to grab the token_id .expect_log( Some(LogLevel::Info), - Some("on_http_request_body: provider: open-ai-gpt-4, model requested: gpt-1, model selected: gpt-4"), + Some("on_http_request_body: provider: open-ai-gpt-4, model requested (in body): gpt-1, model selected: gpt-4"), ) .expect_log(Some(LogLevel::Debug), None) .expect_log(Some(LogLevel::Debug), None) @@ -526,7 +529,7 @@ fn llm_gateway_override_use_model_name_none() { .returning(Some(chat_completions_request_body)) // The actual call is not important in this test, we just need to grab the token_id .expect_log(Some(LogLevel::Debug), None) - .expect_log(Some(LogLevel::Info), Some("on_http_request_body: provider: open-ai-gpt-4, model requested: none, model selected: gpt-4")) + .expect_log(Some(LogLevel::Info), Some("on_http_request_body: provider: open-ai-gpt-4, model requested (in body): none, model selected: gpt-4")) .expect_log(Some(LogLevel::Debug), None) .expect_log(Some(LogLevel::Debug), None) .expect_metric_record("input_sequence_length", 29) diff --git a/demos/samples_java/weather_forcecast_service/.vscode/launch.json b/demos/samples_java/weather_forcecast_service/.vscode/launch.json new file mode 100644 index 00000000..a9232a53 --- /dev/null +++ b/demos/samples_java/weather_forcecast_service/.vscode/launch.json @@ -0,0 +1,15 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "type": "java", + "name": "WeatherForecastApplication", + "request": "launch", + "mainClass": "weather.WeatherForecastApplication", + "projectName": "weather-forecast-service" + } + ] +} diff --git a/demos/samples_java/weather_forcecast_service/arch_config.yaml b/demos/samples_java/weather_forcecast_service/arch_config.yaml index d11aaa96..68c3841d 100644 --- a/demos/samples_java/weather_forcecast_service/arch_config.yaml +++ b/demos/samples_java/weather_forcecast_service/arch_config.yaml @@ -9,10 +9,8 @@ listeners: # Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way llm_providers: - - name: OpenAI - provider_interface: openai - access_key: $OPENAI_API_KEY - model: gpt-4o-mini + - access_key: $OPENAI_API_KEY + model: openai/gpt-4o-mini default: true # Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem. diff --git a/demos/samples_python/currency_exchange/arch_config.yaml b/demos/samples_python/currency_exchange/arch_config.yaml index a9c0bce0..1c399449 100644 --- a/demos/samples_python/currency_exchange/arch_config.yaml +++ b/demos/samples_python/currency_exchange/arch_config.yaml @@ -8,10 +8,8 @@ listeners: timeout: 30s llm_providers: - - name: gpt-4o - access_key: $OPENAI_API_KEY - provider_interface: openai - model: gpt-4o + - access_key: $OPENAI_API_KEY + model: openai/gpt-4o endpoints: frankfurther_api: diff --git a/demos/samples_python/human_resources_agent/arch_config.yaml b/demos/samples_python/human_resources_agent/arch_config.yaml index f46a6c7a..2a96a6d5 100644 --- a/demos/samples_python/human_resources_agent/arch_config.yaml +++ b/demos/samples_python/human_resources_agent/arch_config.yaml @@ -9,10 +9,8 @@ listeners: # Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way llm_providers: - - name: OpenAI - provider_interface: openai - access_key: $OPENAI_API_KEY - model: gpt-4o-mini + - access_key: $OPENAI_API_KEY + model: openai/gpt-4o-mini default: true # Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem. diff --git a/demos/samples_python/multi_turn_rag_agent/arch_config.yaml b/demos/samples_python/multi_turn_rag_agent/arch_config.yaml index 706a374b..a29622ec 100644 --- a/demos/samples_python/multi_turn_rag_agent/arch_config.yaml +++ b/demos/samples_python/multi_turn_rag_agent/arch_config.yaml @@ -13,10 +13,8 @@ endpoints: connect_timeout: 0.005s llm_providers: - - name: gpt-4o-mini - access_key: $OPENAI_API_KEY - provider_interface: openai - model: gpt-4o-mini + - access_key: $OPENAI_API_KEY + model: openai/gpt-4o-mini default: true system_prompt: | diff --git a/demos/samples_python/network_switch_operator_agent/arch_config.yaml b/demos/samples_python/network_switch_operator_agent/arch_config.yaml index 0175e1c0..16c8951e 100644 --- a/demos/samples_python/network_switch_operator_agent/arch_config.yaml +++ b/demos/samples_python/network_switch_operator_agent/arch_config.yaml @@ -8,10 +8,8 @@ listeners: # Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way llm_providers: - - name: OpenAI - provider_interface: openai - access_key: $OPENAI_API_KEY - model: gpt-4o + - access_key: $OPENAI_API_KEY + model: openai/gpt-4o default: true # default system prompt used by all prompt targets diff --git a/demos/samples_python/stock_quote/arch_config.yaml b/demos/samples_python/stock_quote/arch_config.yaml index 6b4a427f..fcff2c0b 100644 --- a/demos/samples_python/stock_quote/arch_config.yaml +++ b/demos/samples_python/stock_quote/arch_config.yaml @@ -8,10 +8,8 @@ listeners: timeout: 30s llm_providers: - - name: gpt-4o - access_key: $OPENAI_API_KEY - provider_interface: openai - model: gpt-4o + - access_key: $OPENAI_API_KEY + model: openai/gpt-4o endpoints: twelvedata_api: diff --git a/demos/samples_python/weather_forecast/arch_config.yaml b/demos/samples_python/weather_forecast/arch_config.yaml index a51bf6a1..afc0ef04 100644 --- a/demos/samples_python/weather_forecast/arch_config.yaml +++ b/demos/samples_python/weather_forecast/arch_config.yaml @@ -17,15 +17,11 @@ overrides: prompt_target_intent_matching_threshold: 0.6 llm_providers: - - name: groq - access_key: $GROQ_API_KEY - provider_interface: groq - model: llama-3.2-3b-preview + - access_key: $GROQ_API_KEY + model: groq/llama-3.2-3b-preview - - name: gpt-4o - access_key: $OPENAI_API_KEY - provider_interface: openai - model: gpt-4o + - access_key: $OPENAI_API_KEY + model: openai/gpt-4o default: true system_prompt: | diff --git a/demos/use_cases/README.md b/demos/use_cases/README.md index f0684e81..30cce822 100644 --- a/demos/use_cases/README.md +++ b/demos/use_cases/README.md @@ -13,16 +13,12 @@ listeners: timeout: 30s llm_providers: - - name: gpt-4o - access_key: $OPENAI_API_KEY - provider: openai - model: gpt-4o + - access_key: $OPENAI_API_KEY + model: openai/gpt-4o default: true - - name: ministral-3b - access_key: $MISTRAL_API_KEY - provider: openai - model: ministral-3b-latest + - access_key: $MISTRAL_API_KEY + model: mistral/ministral-3b-latest ``` ### Step 2. Start arch gateway diff --git a/demos/use_cases/llm_routing/README.md b/demos/use_cases/llm_routing/README.md index 0d18d0bd..45c7c5a5 100644 --- a/demos/use_cases/llm_routing/README.md +++ b/demos/use_cases/llm_routing/README.md @@ -18,7 +18,7 @@ You can also pass in a header to override model when sending prompt. Following e ```bash $ curl --header 'Content-Type: application/json' \ - --header 'x-arch-llm-provider-hint: ministral-3b' \ + --header 'x-arch-llm-provider-hint: mistral/ministral-3b' \ --data '{"messages": [{"role": "user","content": "hello"}], "model": "none"}' \ http://localhost:12000/v1/chat/completions 2> /dev/null | jq . { diff --git a/demos/use_cases/llm_routing/arch_config.yaml b/demos/use_cases/llm_routing/arch_config.yaml index 43bdd495..cb3a42e6 100644 --- a/demos/use_cases/llm_routing/arch_config.yaml +++ b/demos/use_cases/llm_routing/arch_config.yaml @@ -9,46 +9,34 @@ listeners: llm_providers: - - name: gpt-4o-mini - access_key: $OPENAI_API_KEY - provider_interface: openai - model: gpt-4o-mini + - access_key: $OPENAI_API_KEY + model: openai/gpt-4o-mini - - name: gpt-4o - access_key: $OPENAI_API_KEY - provider_interface: openai - model: gpt-4o + - access_key: $OPENAI_API_KEY + model: openai/gpt-4o default: true - - name: ministral-3b - access_key: $MISTRAL_API_KEY - provider_interface: mistral - model: ministral-3b-latest + - access_key: $MISTRAL_API_KEY + model: mistral/ministral-3b-latest - - name: claude-sonnet - access_key: $ANTHROPIC_API_KEY - provider_interface: claude - model: claude-3-7-sonnet-latest + - access_key: $ANTHROPIC_API_KEY + model: claude/claude-3-7-sonnet-latest - - name: claude-sonnet-4 - access_key: $ANTHROPIC_API_KEY - provider_interface: claude - model: claude-sonnet-4-0 + - access_key: $ANTHROPIC_API_KEY + model: claude/claude-sonnet-4-0 - - name: deepseek - access_key: $DEEPSEEK_API_KEY - provider_interface: deepseek - model: deepseek-reasoner + - access_key: $DEEPSEEK_API_KEY + model: deepseek/deepseek-reasoner - - name: groq - access_key: $GROQ_API_KEY - provider_interface: groq - model: llama-3.1-8b-instant + - access_key: $GROQ_API_KEY + model: groq/llama-3.1-8b-instant - - name: gemini - access_key: $GEMINI_API_KEY - provider_interface: gemini - model: gemini-1.5-pro-latest + - access_key: $GEMINI_API_KEY + model: gemini/gemini-1.5-pro-latest + + - model: custom/test-model + base_url: http://host.docker.internal:11223 + provider_interface: openai tracing: random_sampling: 100 diff --git a/demos/use_cases/ollama/arch_config.yaml b/demos/use_cases/ollama/arch_config.yaml index 24eb1bf1..5f88b0a3 100644 --- a/demos/use_cases/ollama/arch_config.yaml +++ b/demos/use_cases/ollama/arch_config.yaml @@ -9,10 +9,9 @@ listeners: llm_providers: - - name: local-llama + - model: my_llm_provider/llama3.2 provider_interface: openai - model: llama3.2 - endpoint: host.docker.internal:11434 + base_url: http://host.docker.internal:11434 default: true system_prompt: | diff --git a/demos/use_cases/orchestrating_agents/arch_config.yaml b/demos/use_cases/orchestrating_agents/arch_config.yaml index 639c5242..b52ceb22 100644 --- a/demos/use_cases/orchestrating_agents/arch_config.yaml +++ b/demos/use_cases/orchestrating_agents/arch_config.yaml @@ -22,10 +22,8 @@ endpoints: connect_timeout: 0.005s llm_providers: - - name: gpt-4o-mini - access_key: $OPENAI_API_KEY - provider_interface: openai - model: gpt-4o-mini + - access_key: $OPENAI_API_KEY + model: openai/gpt-4o-mini default: true system_prompt: | diff --git a/demos/use_cases/preference_based_routing/arch_config.yaml b/demos/use_cases/preference_based_routing/arch_config.yaml index f8521811..33136325 100644 --- a/demos/use_cases/preference_based_routing/arch_config.yaml +++ b/demos/use_cases/preference_based_routing/arch_config.yaml @@ -9,28 +9,21 @@ listeners: llm_providers: - - name: gpt-4o-mini - provider_interface: openai + - model: openai/gpt-4o-mini access_key: $OPENAI_API_KEY - model: gpt-4o-mini - - - name: gpt-4.1 - provider_interface: openai - access_key: $OPENAI_API_KEY - model: gpt-4.1 default: true - - name: code_generation + - model: openai/gpt-4o access_key: $OPENAI_API_KEY - provider_interface: openai - model: gpt-4.1 - usage: generating new code snippets, functions, or boilerplate based on user prompts or requirements + routing_preferences: + - name: code understanding + description: understand and explain existing code snippets, functions, or libraries - - name: code_understanding - provider_interface: openai + - model: openai/gpt-4.1 access_key: $OPENAI_API_KEY - model: gpt-4o-mini - usage: understand and explain existing code snippets, functions, or libraries + routing_preferences: + - name: code generation + description: generating new code snippets, functions, or boilerplate based on user prompts or requirements tracing: random_sampling: 100 diff --git a/demos/use_cases/preference_based_routing/arch_config_local.yaml b/demos/use_cases/preference_based_routing/arch_config_local.yaml index 029918d0..b965cd0c 100644 --- a/demos/use_cases/preference_based_routing/arch_config_local.yaml +++ b/demos/use_cases/preference_based_routing/arch_config_local.yaml @@ -14,32 +14,24 @@ listeners: llm_providers: - name: arch-router - provider_interface: arch - model: hf.co/katanemo/Arch-Router-1.5B.gguf:Q4_K_M - endpoint: host.docker.internal:11434 + model: arch/hf.co/katanemo/Arch-Router-1.5B.gguf:Q4_K_M + base_url: http://host.docker.internal:11434 - - name: gpt-4o-mini - provider_interface: openai + - model: openai/gpt-4o-mini access_key: $OPENAI_API_KEY - model: gpt-4o-mini - - - name: gpt-4.1 - provider_interface: openai - access_key: $OPENAI_API_KEY - model: gpt-4.1 default: true - - name: code_generation + - model: openai/gpt-4o access_key: $OPENAI_API_KEY - provider_interface: openai - model: gpt-4.1 - usage: generating new code snippets, functions, or boilerplate based on user prompts or requirements + routing_preferences: + - name: code understanding + description: understand and explain existing code snippets, functions, or libraries - - name: code_understanding - provider_interface: openai + - model: openai/gpt-4.1 access_key: $OPENAI_API_KEY - model: gpt-4.1 - usage: understand and explain existing code snippets, functions, or libraries + routing_preferences: + - name: code generation + description: generating new code snippets, functions, or boilerplate based on user prompts or requirements tracing: random_sampling: 100 diff --git a/demos/use_cases/preference_based_routing/hurl_tests/simple.hurl b/demos/use_cases/preference_based_routing/hurl_tests/simple.hurl index 432f0996..d9b243e7 100644 --- a/demos/use_cases/preference_based_routing/hurl_tests/simple.hurl +++ b/demos/use_cases/preference_based_routing/hurl_tests/simple.hurl @@ -2,13 +2,13 @@ POST http://localhost:12000/v1/chat/completions Content-Type: application/json { + "model": "openai/gpt-4.1", "messages": [ { "role": "user", "content": "hi" } - ], - "model": "none" + ] } HTTP 200 [Asserts] diff --git a/demos/use_cases/preference_based_routing/hurl_tests/simple_stream.hurl b/demos/use_cases/preference_based_routing/hurl_tests/simple_stream.hurl index 8b92bcc7..16c68c72 100644 --- a/demos/use_cases/preference_based_routing/hurl_tests/simple_stream.hurl +++ b/demos/use_cases/preference_based_routing/hurl_tests/simple_stream.hurl @@ -14,4 +14,4 @@ Content-Type: application/json HTTP 200 [Asserts] header "content-type" matches /text\/event-stream/ -body matches /^data: .*?gpt-4.1.*?\n/ +body matches /^data: .*?gpt-4o-mini.*?\n/ diff --git a/demos/use_cases/spotify_bearer_auth/arch_config.yaml b/demos/use_cases/spotify_bearer_auth/arch_config.yaml index 99a67401..b848d718 100644 --- a/demos/use_cases/spotify_bearer_auth/arch_config.yaml +++ b/demos/use_cases/spotify_bearer_auth/arch_config.yaml @@ -85,10 +85,8 @@ system_prompt: | Make sure your output is valid Markdown. And don't say "formatted in Markdown". Thanks! llm_providers: - - name: openai - provider_interface: openai - access_key: $OPENAI_API_KEY - model: gpt-4o + - access_key: $OPENAI_API_KEY + model: openai/gpt-4o default: true prompt_targets: diff --git a/docs/source/concepts/includes/arch_config.yaml b/docs/source/concepts/includes/arch_config.yaml index 4523ae32..69b24f41 100644 --- a/docs/source/concepts/includes/arch_config.yaml +++ b/docs/source/concepts/includes/arch_config.yaml @@ -9,10 +9,8 @@ listeners: # Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way llm_providers: - - name: OpenAI - provider_interface: openai - access_key: $OPENAI_API_KEY - model: gpt-4o + - access_key: $OPENAI_API_KEY + model: openai/gpt-4o default: true # default system prompt used by all prompt targets diff --git a/docs/source/get_started/quickstart.rst b/docs/source/get_started/quickstart.rst index 9c3499ea..34dbf64c 100644 --- a/docs/source/get_started/quickstart.rst +++ b/docs/source/get_started/quickstart.rst @@ -50,10 +50,8 @@ Create ``arch_config.yaml`` file with the following content: timeout: 30s llm_providers: - - name: gpt-4o - access_key: $OPENAI_API_KEY - provider: openai - model: gpt-4o + - access_key: $OPENAI_API_KEY + model: openai/gpt-4o system_prompt: | You are a helpful assistant. @@ -153,16 +151,12 @@ Create ``arch_config.yaml`` file with the following content: timeout: 30s llm_providers: - - name: gpt-4o - access_key: $OPENAI_API_KEY - provider_interface: openai - model: gpt-4o + - access_key: $OPENAI_API_KEY + model: openai/gpt-4o default: true - - name: ministral-3b - access_key: $MISTRAL_API_KEY - provider_interface: openai - model: ministral-3b-latest + - access_key: $MISTRAL_API_KEY + model: mistralministral-3b-latest Step 2. Start arch gateway ~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/docs/source/guides/includes/arch_config.yaml b/docs/source/guides/includes/arch_config.yaml index 4ee46cbb..89501f88 100644 --- a/docs/source/guides/includes/arch_config.yaml +++ b/docs/source/guides/includes/arch_config.yaml @@ -9,10 +9,8 @@ listeners: # Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way llm_providers: - - name: OpenAI - provider_interface: openai - access_key: $OPENAI_API_KEY - model: gpt-4o + - access_key: $OPENAI_API_KEY + model: openai/gpt-4o default: true # default system prompt used by all prompt targets diff --git a/docs/source/guides/llm_router.rst b/docs/source/guides/llm_router.rst index 0cde6ff8..f999860c 100644 --- a/docs/source/guides/llm_router.rst +++ b/docs/source/guides/llm_router.rst @@ -74,9 +74,6 @@ Below is an example to show how to set up a prompt target for the Arch Router: :caption: Route Config Example - routing: - model: archgw-v1-router-model - listeners: egress_traffic: address: 0.0.0.0 @@ -85,29 +82,22 @@ Below is an example to show how to set up a prompt target for the Arch Router: timeout: 30s llm_providers: - - name: archgw-v1-router-model - provider_interface: openai - model: katanemo/Arch-Router-1.5B - base_url: ... - - name: gpt-4o-mini - provider_interface: openai - access_key: $OPENAI_API_KEY - model: gpt-4o-mini - default: true + - model: openai/gpt-4o-mini + access_key: $OPENAI_API_KEY + default: true - - name: code_generation - provider_interface: openai - access_key: $OPENAI_API_KEY - model: gpt-4o - usage: Generating new code snippets, functions, or boilerplate based on user prompts or requirements - - - name: code_understanding - provider_interface: openai - access_key: $OPENAI_API_KEY - model: gpt-4.1 - usage: understand and explain existing code snippets, functions, or libraries + - model: openai/gpt-4o + access_key: $OPENAI_API_KEY + routing_preferences: + - name: code understanding + description: understand and explain existing code snippets, functions, or libraries + - model: openai/gpt-4.1 + access_key: $OPENAI_API_KEY + routing_preferences: + - name: code generation + description: generating new code snippets, functions, or boilerplate based on user prompts or requirements Example Use Cases ------------------------- diff --git a/docs/source/resources/includes/arch_config_full_reference.yaml b/docs/source/resources/includes/arch_config_full_reference.yaml index 5ef2639c..808baff1 100644 --- a/docs/source/resources/includes/arch_config_full_reference.yaml +++ b/docs/source/resources/includes/arch_config_full_reference.yaml @@ -30,21 +30,16 @@ endpoints: # Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way llm_providers: - - name: OpenAI - provider_interface: openai + - name: openai/gpt-4o access_key: $OPENAI_API_KEY - model: gpt-4o + model: openai/gpt-4o default: true - - name: Mistral8x7b - provider_interface: openai - access_key: $MISTRAL_API_KEY - model: mistral-8x7b + - access_key: $MISTRAL_API_KEY + model: mistral/mistral-8x7b - - name: MistralLocal7b - provider_interface: openai - model: mistral-7b-instruct - endpoint: mistral_local + - model: mistral/mistral-7b-instruct + base_url: http://mistral_local # provides a way to override default settings for the arch system overrides: diff --git a/docs/source/resources/includes/arch_config_full_reference_rendered.yaml b/docs/source/resources/includes/arch_config_full_reference_rendered.yaml new file mode 100644 index 00000000..503f6a80 --- /dev/null +++ b/docs/source/resources/includes/arch_config_full_reference_rendered.yaml @@ -0,0 +1,81 @@ +endpoints: + app_server: + connect_timeout: 0.005s + endpoint: 127.0.0.1 + port: 80 + error_target: + endpoint: error_target_1 + port: 80 + mistral_local: + endpoint: 127.0.0.1 + port: 8001 +listeners: + egress_traffic: + address: 0.0.0.0 + message_format: openai + port: 12000 + timeout: 5s + ingress_traffic: + address: 0.0.0.0 + message_format: openai + port: 10000 + timeout: 5s +llm_providers: +- access_key: $OPENAI_API_KEY + default: true + model: gpt-4o + name: openai/gpt-4o + provider_interface: openai +- access_key: $MISTRAL_API_KEY + model: mistral-8x7b + name: mistral/mistral-8x7b + provider_interface: mistral +- base_url: http://mistral_local + endpoint: mistral_local + model: mistral-7b-instruct + name: mistral/mistral-7b-instruct + port: 80 + protocol: http + provider_interface: mistral +overrides: + prompt_target_intent_matching_threshold: 0.6 +prompt_guards: + input_guards: + jailbreak: + on_exception: + message: Looks like you're curious about my abilities, but I can only provide + assistance within my programmed parameters. +prompt_targets: +- auto_llm_dispatch_on_response: true + default: true + description: handel all scenarios that are question and answer in nature. Like summarization, + information extraction, etc. + endpoint: + http_method: POST + name: app_server + path: /agent/summary + name: information_extraction + system_prompt: You are a helpful information extraction assistant. Use the information + that is provided to you. +- description: Reboot a specific network device + endpoint: + name: app_server + path: /agent/action + name: reboot_network_device + parameters: + - description: Identifier of the network device to reboot. + name: device_id + required: true + type: str + - default: false + description: Confirmation flag to proceed with reboot. + enum: + - true + - false + name: confirmation + type: bool +system_prompt: You are a network assistant that just offers facts; not advice on manufacturers + or purchasing decisions. +tracing: + sampling_rate: 0.1 +version: v0.1 diff --git a/model_server/.vscode/launch.json b/model_server/.vscode/launch.json index ca83be87..19ed7342 100644 --- a/model_server/.vscode/launch.json +++ b/model_server/.vscode/launch.json @@ -4,6 +4,7 @@ // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 "version": "0.2.0", "configurations": [ + { "name": "model server", "type": "debugpy", diff --git a/model_server/.vscode/settings.json b/model_server/.vscode/settings.json new file mode 100644 index 00000000..98ba633e --- /dev/null +++ b/model_server/.vscode/settings.json @@ -0,0 +1,7 @@ +{ + "python.testing.pytestArgs": [ + "." + ], + "python.testing.unittestEnabled": false, + "python.testing.pytestEnabled": true +} diff --git a/tests/archgw/.vscode/launch.json b/tests/archgw/.vscode/launch.json new file mode 100644 index 00000000..6a211d8e --- /dev/null +++ b/tests/archgw/.vscode/launch.json @@ -0,0 +1,15 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "name": "Python Debugger: Current File", + "type": "debugpy", + "request": "launch", + "program": "${file}", + "console": "integratedTerminal" + } + ] +} diff --git a/tests/archgw/.vscode/settings.json b/tests/archgw/.vscode/settings.json new file mode 100644 index 00000000..98ba633e --- /dev/null +++ b/tests/archgw/.vscode/settings.json @@ -0,0 +1,7 @@ +{ + "python.testing.pytestArgs": [ + "." + ], + "python.testing.unittestEnabled": false, + "python.testing.pytestEnabled": true +} diff --git a/tests/archgw/arch_config.yaml b/tests/archgw/arch_config.yaml index c702887b..70a95a3e 100644 --- a/tests/archgw/arch_config.yaml +++ b/tests/archgw/arch_config.yaml @@ -13,21 +13,15 @@ endpoints: connect_timeout: 0.005s llm_providers: - - name: gpt-4o-mini - access_key: $OPENAI_API_KEY - provider_interface: openai - model: gpt-4o-mini + - access_key: $OPENAI_API_KEY + model: openai/gpt-4o-mini default: true - - name: gpt-3.5-turbo-0125 - access_key: $OPENAI_API_KEY - provider_interface: openai - model: gpt-3.5-turbo-0125 + - access_key: $OPENAI_API_KEY + model: openai/gpt-3.5-turbo-0125 - - name: gpt-4o - access_key: $OPENAI_API_KEY - provider_interface: openai - model: gpt-4o + - access_key: $OPENAI_API_KEY + model: openai/gpt-4o system_prompt: | You are a helpful assistant. diff --git a/tests/e2e/.vscode/launch.json b/tests/e2e/.vscode/launch.json new file mode 100644 index 00000000..6a211d8e --- /dev/null +++ b/tests/e2e/.vscode/launch.json @@ -0,0 +1,15 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "name": "Python Debugger: Current File", + "type": "debugpy", + "request": "launch", + "program": "${file}", + "console": "integratedTerminal" + } + ] +} diff --git a/tests/e2e/.vscode/settings.json b/tests/e2e/.vscode/settings.json new file mode 100644 index 00000000..98ba633e --- /dev/null +++ b/tests/e2e/.vscode/settings.json @@ -0,0 +1,7 @@ +{ + "python.testing.pytestArgs": [ + "." + ], + "python.testing.unittestEnabled": false, + "python.testing.pytestEnabled": true +} diff --git a/tests/modelserver/.vscode/launch.json b/tests/modelserver/.vscode/launch.json new file mode 100644 index 00000000..6a211d8e --- /dev/null +++ b/tests/modelserver/.vscode/launch.json @@ -0,0 +1,15 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "name": "Python Debugger: Current File", + "type": "debugpy", + "request": "launch", + "program": "${file}", + "console": "integratedTerminal" + } + ] +} diff --git a/tests/modelserver/.vscode/settings.json b/tests/modelserver/.vscode/settings.json new file mode 100644 index 00000000..98ba633e --- /dev/null +++ b/tests/modelserver/.vscode/settings.json @@ -0,0 +1,7 @@ +{ + "python.testing.pytestArgs": [ + "." + ], + "python.testing.unittestEnabled": false, + "python.testing.pytestEnabled": true +}