mirror of
https://github.com/katanemo/plano.git
synced 2026-04-25 00:36:34 +02:00
better model names (#517)
This commit is contained in:
parent
4e2355965b
commit
a7fddf30f9
55 changed files with 979 additions and 483 deletions
40
.github/workflows/arch_tools_tests.yml
vendored
Normal file
40
.github/workflows/arch_tools_tests.yml
vendored
Normal file
|
|
@ -0,0 +1,40 @@
|
|||
name: arch tools tests
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
pull_request:
|
||||
|
||||
jobs:
|
||||
arch_tools_tests:
|
||||
runs-on: ubuntu-latest-m
|
||||
defaults:
|
||||
run:
|
||||
working-directory: ./arch/tools
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: "3.12"
|
||||
|
||||
- name: install poetry
|
||||
run: |
|
||||
export POETRY_VERSION=1.8.5
|
||||
curl -sSL https://install.python-poetry.org | python3 -
|
||||
export PATH="$HOME/.local/bin:$PATH"
|
||||
|
||||
- name: install arch tools
|
||||
run: |
|
||||
poetry install
|
||||
|
||||
- name: run tests
|
||||
run: |
|
||||
poetry run pytest
|
||||
5
.gitignore
vendored
5
.gitignore
vendored
|
|
@ -102,13 +102,10 @@ venv.bak/
|
|||
# mypy
|
||||
.mypy_cache/
|
||||
|
||||
# VSCode stuff:
|
||||
.vscode/
|
||||
|
||||
# MacOS Metadata
|
||||
*.DS_Store
|
||||
|
||||
|
||||
*.yaml_rendered
|
||||
|
||||
# =========================================
|
||||
|
||||
|
|
|
|||
35
README.md
35
README.md
|
|
@ -104,10 +104,8 @@ listeners:
|
|||
timeout: 30s
|
||||
|
||||
llm_providers:
|
||||
- name: gpt-4o
|
||||
access_key: $OPENAI_API_KEY
|
||||
provider: openai
|
||||
model: gpt-4o
|
||||
- access_key: $OPENAI_API_KEY
|
||||
model: openai/gpt-4o
|
||||
|
||||
system_prompt: |
|
||||
You are a helpful assistant.
|
||||
|
|
@ -204,16 +202,12 @@ listeners:
|
|||
timeout: 30s
|
||||
|
||||
llm_providers:
|
||||
- name: gpt-4o
|
||||
access_key: $OPENAI_API_KEY
|
||||
provider: openai
|
||||
model: gpt-4o
|
||||
- access_key: $OPENAI_API_KEY
|
||||
model: openai/gpt-4o
|
||||
default: true
|
||||
|
||||
- name: mistral-3b
|
||||
access_key: $MISTRAL_API_KEY
|
||||
provider: openai
|
||||
model: mistral-3b-latest
|
||||
- access_key: $MISTRAL_API_KEY
|
||||
model: mistral/mistral-3b-latest
|
||||
```
|
||||
|
||||
#### Preference-based Routing
|
||||
|
|
@ -230,17 +224,18 @@ listeners:
|
|||
timeout: 30s
|
||||
|
||||
llm_providers:
|
||||
- name: code_generation
|
||||
- model: openai/gpt-4.1
|
||||
access_key: $OPENAI_API_KEY
|
||||
provider_interface: openai
|
||||
model: gpt-4.1
|
||||
usage: generating new code snippets, functions, or boilerplate based on user prompts or requirements
|
||||
default: true
|
||||
routing_preferences:
|
||||
- name: code generation
|
||||
description: generating new code snippets, functions, or boilerplate based on user prompts or requirements
|
||||
|
||||
- name: code_understanding
|
||||
provider_interface: openai
|
||||
- model: openai/gpt-4o-mini
|
||||
access_key: $OPENAI_API_KEY
|
||||
model: gpt-4o-mini
|
||||
usage: understand and explain existing code snippets, functions, or libraries
|
||||
routing_preferences:
|
||||
- name: code understanding
|
||||
description: understand and explain existing code snippets, functions, or libraries
|
||||
```
|
||||
|
||||
Arch uses a lightweight 1.5B autoregressive model to map prompts (and conversation context) to these policies. This approach adapts to intent drift, supports multi-turn conversations, and avoids the brittleness of embedding-based classifiers or manual if/else chains. No retraining is required when adding new models or updating policies — routing is governed entirely by human-readable rules. You can learn more about the design, benchmarks, and methodology behind preference-based routing in our paper:
|
||||
|
|
|
|||
|
|
@ -66,11 +66,16 @@ properties:
|
|||
properties:
|
||||
name:
|
||||
type: string
|
||||
# provider field is deprecated, use provider_interface instead
|
||||
provider:
|
||||
access_key:
|
||||
type: string
|
||||
model:
|
||||
type: string
|
||||
default:
|
||||
type: boolean
|
||||
base_url:
|
||||
type: string
|
||||
http_host:
|
||||
type: string
|
||||
enum:
|
||||
- openai
|
||||
provider_interface:
|
||||
type: string
|
||||
enum:
|
||||
|
|
@ -81,29 +86,22 @@ properties:
|
|||
- mistral
|
||||
- openai
|
||||
- gemini
|
||||
access_key:
|
||||
type: string
|
||||
model:
|
||||
type: string
|
||||
default:
|
||||
type: boolean
|
||||
# endpoint field is deprecated, use base_url instead
|
||||
endpoint:
|
||||
type: string
|
||||
base_url:
|
||||
type: string
|
||||
protocol:
|
||||
type: string
|
||||
enum:
|
||||
- http
|
||||
- https
|
||||
http_host:
|
||||
type: string
|
||||
usage:
|
||||
type: string
|
||||
routing_preferences:
|
||||
type: array
|
||||
items:
|
||||
type: object
|
||||
properties:
|
||||
name:
|
||||
type: string
|
||||
description:
|
||||
type: string
|
||||
additionalProperties: false
|
||||
required:
|
||||
- name
|
||||
- description
|
||||
additionalProperties: false
|
||||
required:
|
||||
- name
|
||||
- model
|
||||
overrides:
|
||||
type: object
|
||||
properties:
|
||||
|
|
|
|||
|
|
@ -5,16 +5,16 @@ import yaml
|
|||
from jsonschema import validate
|
||||
from urllib.parse import urlparse
|
||||
|
||||
ENVOY_CONFIG_TEMPLATE_FILE = os.getenv(
|
||||
"ENVOY_CONFIG_TEMPLATE_FILE", "envoy.template.yaml"
|
||||
)
|
||||
ARCH_CONFIG_FILE = os.getenv("ARCH_CONFIG_FILE", "/app/arch_config.yaml")
|
||||
ENVOY_CONFIG_FILE_RENDERED = os.getenv(
|
||||
"ENVOY_CONFIG_FILE_RENDERED", "/etc/envoy/envoy.yaml"
|
||||
)
|
||||
ARCH_CONFIG_SCHEMA_FILE = os.getenv(
|
||||
"ARCH_CONFIG_SCHEMA_FILE", "arch_config_schema.yaml"
|
||||
)
|
||||
|
||||
SUPPORTED_PROVIDERS = [
|
||||
"arch",
|
||||
"claude",
|
||||
"deepseek",
|
||||
"groq",
|
||||
"mistral",
|
||||
"openai",
|
||||
"gemini",
|
||||
]
|
||||
|
||||
|
||||
def get_endpoint_and_port(endpoint, protocol):
|
||||
|
|
@ -32,8 +32,22 @@ def get_endpoint_and_port(endpoint, protocol):
|
|||
|
||||
|
||||
def validate_and_render_schema():
|
||||
env = Environment(loader=FileSystemLoader("./"))
|
||||
template = env.get_template("envoy.template.yaml")
|
||||
ENVOY_CONFIG_TEMPLATE_FILE = os.getenv(
|
||||
"ENVOY_CONFIG_TEMPLATE_FILE", "envoy.template.yaml"
|
||||
)
|
||||
ARCH_CONFIG_FILE = os.getenv("ARCH_CONFIG_FILE", "/app/arch_config.yaml")
|
||||
ARCH_CONFIG_FILE_RENDERED = os.getenv(
|
||||
"ARCH_CONFIG_FILE_RENDERED", "/app/arch_config_rendered.yaml"
|
||||
)
|
||||
ENVOY_CONFIG_FILE_RENDERED = os.getenv(
|
||||
"ENVOY_CONFIG_FILE_RENDERED", "/etc/envoy/envoy.yaml"
|
||||
)
|
||||
ARCH_CONFIG_SCHEMA_FILE = os.getenv(
|
||||
"ARCH_CONFIG_SCHEMA_FILE", "arch_config_schema.yaml"
|
||||
)
|
||||
|
||||
env = Environment(loader=FileSystemLoader(os.getenv("TEMPLATE_ROOT", "./")))
|
||||
template = env.get_template(ENVOY_CONFIG_TEMPLATE_FILE)
|
||||
|
||||
try:
|
||||
validate_prompt_config(ARCH_CONFIG_FILE, ARCH_CONFIG_SCHEMA_FILE)
|
||||
|
|
@ -82,6 +96,8 @@ def validate_and_render_schema():
|
|||
updated_llm_providers = []
|
||||
llm_provider_name_set = set()
|
||||
llms_with_usage = []
|
||||
model_name_keys = set()
|
||||
model_usage_name_keys = set()
|
||||
for llm_provider in config_yaml["llm_providers"]:
|
||||
if llm_provider.get("usage", None):
|
||||
llms_with_usage.append(llm_provider["name"])
|
||||
|
|
@ -89,10 +105,52 @@ def validate_and_render_schema():
|
|||
raise Exception(
|
||||
f"Duplicate llm_provider name {llm_provider.get('name')}, please provide unique name for each llm_provider"
|
||||
)
|
||||
if llm_provider.get("name") is None:
|
||||
|
||||
model_name = llm_provider.get("model")
|
||||
if model_name in model_name_keys:
|
||||
raise Exception(
|
||||
f"llm_provider name is required, please provide name for llm_provider"
|
||||
f"Duplicate model name {model_name}, please provide unique model name for each llm_provider"
|
||||
)
|
||||
model_name_keys.add(model_name)
|
||||
if llm_provider.get("name") is None:
|
||||
llm_provider["name"] = model_name
|
||||
|
||||
model_name_tokens = model_name.split("/")
|
||||
if len(model_name_tokens) < 2:
|
||||
raise Exception(
|
||||
f"Invalid model name {model_name}. Please provide model name in the format <provider>/<model_id>."
|
||||
)
|
||||
provider = model_name_tokens[0]
|
||||
model_id = "/".join(model_name_tokens[1:])
|
||||
if provider not in SUPPORTED_PROVIDERS:
|
||||
if (
|
||||
llm_provider.get("base_url", None) is None
|
||||
or llm_provider.get("provider_interface", None) is None
|
||||
):
|
||||
raise Exception(
|
||||
f"Must provide base_url and provider_interface for unsupported provider {provider} for model {model_name}. Supported providers are: {', '.join(SUPPORTED_PROVIDERS)}"
|
||||
)
|
||||
provider = llm_provider.get("provider_interface", None)
|
||||
elif llm_provider.get("provider_interface", None) is not None:
|
||||
raise Exception(
|
||||
f"Please provide provider interface as part of model name {model_name} using the format <provider>/<model_id>. For example, use 'openai/gpt-3.5-turbo' instead of 'gpt-3.5-turbo' "
|
||||
)
|
||||
|
||||
if model_id in model_name_keys:
|
||||
raise Exception(
|
||||
f"Duplicate model_id {model_id}, please provide unique model_id for each llm_provider"
|
||||
)
|
||||
model_name_keys.add(model_id)
|
||||
|
||||
for routing_preference in llm_provider.get("routing_preferences", []):
|
||||
if routing_preference.get("name") in model_usage_name_keys:
|
||||
raise Exception(
|
||||
f"Duplicate routing preference name \"{routing_preference.get('name')}\", please provide unique name for each routing preference"
|
||||
)
|
||||
model_usage_name_keys.add(routing_preference.get("name"))
|
||||
|
||||
llm_provider["model"] = model_id
|
||||
llm_provider["provider_interface"] = provider
|
||||
llm_provider_name_set.add(llm_provider.get("name"))
|
||||
provider = None
|
||||
if llm_provider.get("provider") and llm_provider.get("provider_interface"):
|
||||
|
|
@ -105,21 +163,14 @@ def validate_and_render_schema():
|
|||
del llm_provider["provider"]
|
||||
updated_llm_providers.append(llm_provider)
|
||||
|
||||
if llm_provider.get("endpoint") and llm_provider.get("base_url"):
|
||||
raise Exception("Please provide either endpoint or base_url, not both")
|
||||
|
||||
if llm_provider.get("endpoint", None):
|
||||
endpoint = llm_provider["endpoint"]
|
||||
protocol = llm_provider.get("protocol", "http")
|
||||
llm_provider["endpoint"], llm_provider["port"] = get_endpoint_and_port(
|
||||
endpoint, protocol
|
||||
)
|
||||
llms_with_endpoint.append(llm_provider)
|
||||
elif llm_provider.get("base_url", None):
|
||||
if llm_provider.get("base_url", None):
|
||||
base_url = llm_provider["base_url"]
|
||||
urlparse_result = urlparse(base_url)
|
||||
if llm_provider.get("port"):
|
||||
raise Exception("Please provider port in base_url")
|
||||
url_path = urlparse_result.path
|
||||
if url_path and url_path != "/":
|
||||
raise Exception(
|
||||
f"Please provide base_url without path, got {base_url}. Use base_url like 'http://example.com' instead of 'http://example.com/path'."
|
||||
)
|
||||
if urlparse_result.scheme == "" or urlparse_result.scheme not in [
|
||||
"http",
|
||||
"https",
|
||||
|
|
@ -140,7 +191,7 @@ def validate_and_render_schema():
|
|||
llm_provider["protocol"] = protocol
|
||||
llms_with_endpoint.append(llm_provider)
|
||||
|
||||
if len(llms_with_usage) > 0:
|
||||
if len(model_usage_name_keys) > 0:
|
||||
routing_llm_provider = config_yaml.get("routing", {}).get("llm_provider", None)
|
||||
if routing_llm_provider and routing_llm_provider not in llm_provider_name_set:
|
||||
raise Exception(
|
||||
|
|
@ -198,6 +249,7 @@ def validate_and_render_schema():
|
|||
agent_orchestrator = list(endpoints.keys())[0]
|
||||
|
||||
print("agent_orchestrator: ", agent_orchestrator)
|
||||
|
||||
data = {
|
||||
"prompt_gateway_listener": prompt_gateway_listener,
|
||||
"llm_gateway_listener": llm_gateway_listener,
|
||||
|
|
@ -216,6 +268,9 @@ def validate_and_render_schema():
|
|||
with open(ENVOY_CONFIG_FILE_RENDERED, "w") as file:
|
||||
file.write(rendered)
|
||||
|
||||
with open(ARCH_CONFIG_FILE_RENDERED, "w") as file:
|
||||
file.write(arch_config_string)
|
||||
|
||||
|
||||
def validate_prompt_config(arch_config_file, arch_config_schema_file):
|
||||
with open(arch_config_file, "r") as file:
|
||||
|
|
@ -231,7 +286,7 @@ def validate_prompt_config(arch_config_file, arch_config_schema_file):
|
|||
validate(config_yaml, config_schema_yaml)
|
||||
except Exception as e:
|
||||
print(
|
||||
f"Error validating arch_config file: {arch_config_file}, schema file: {arch_config_schema_file}, error: {e.message}"
|
||||
f"Error validating arch_config file: {arch_config_file}, schema file: {arch_config_schema_file}, error: {e}"
|
||||
)
|
||||
raise e
|
||||
|
||||
|
|
|
|||
134
arch/tools/poetry.lock
generated
134
arch/tools/poetry.lock
generated
|
|
@ -57,6 +57,34 @@ files = [
|
|||
{file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "exceptiongroup"
|
||||
version = "1.3.0"
|
||||
description = "Backport of PEP 654 (exception groups)"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
files = [
|
||||
{file = "exceptiongroup-1.3.0-py3-none-any.whl", hash = "sha256:4d111e6e0c13d0644cad6ddaa7ed0261a0b36971f6d23e7ec9b4b9097da78a10"},
|
||||
{file = "exceptiongroup-1.3.0.tar.gz", hash = "sha256:b241f5885f560bc56a59ee63ca4c6a8bfa46ae4ad651af316d4e81817bb9fd88"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
typing-extensions = {version = ">=4.6.0", markers = "python_version < \"3.13\""}
|
||||
|
||||
[package.extras]
|
||||
test = ["pytest (>=6)"]
|
||||
|
||||
[[package]]
|
||||
name = "iniconfig"
|
||||
version = "2.1.0"
|
||||
description = "brain-dead simple config-ini parsing"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
files = [
|
||||
{file = "iniconfig-2.1.0-py3-none-any.whl", hash = "sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760"},
|
||||
{file = "iniconfig-2.1.0.tar.gz", hash = "sha256:3abbd2e30b36733fee78f9c7f7308f2d0050e88f0087fd25c2645f63c773e1c7"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "jinja2"
|
||||
version = "3.1.6"
|
||||
|
|
@ -179,6 +207,69 @@ files = [
|
|||
{file = "markupsafe-3.0.2.tar.gz", hash = "sha256:ee55d3edf80167e48ea11a923c7386f4669df67d7994554387f84e7d8b0a2bf0"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "packaging"
|
||||
version = "25.0"
|
||||
description = "Core utilities for Python packages"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
files = [
|
||||
{file = "packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484"},
|
||||
{file = "packaging-25.0.tar.gz", hash = "sha256:d443872c98d677bf60f6a1f2f8c1cb748e8fe762d2bf9d3148b5599295b0fc4f"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pluggy"
|
||||
version = "1.6.0"
|
||||
description = "plugin and hook calling mechanisms for python"
|
||||
optional = false
|
||||
python-versions = ">=3.9"
|
||||
files = [
|
||||
{file = "pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746"},
|
||||
{file = "pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3"},
|
||||
]
|
||||
|
||||
[package.extras]
|
||||
dev = ["pre-commit", "tox"]
|
||||
testing = ["coverage", "pytest", "pytest-benchmark"]
|
||||
|
||||
[[package]]
|
||||
name = "pygments"
|
||||
version = "2.19.2"
|
||||
description = "Pygments is a syntax highlighting package written in Python."
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
files = [
|
||||
{file = "pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b"},
|
||||
{file = "pygments-2.19.2.tar.gz", hash = "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887"},
|
||||
]
|
||||
|
||||
[package.extras]
|
||||
windows-terminal = ["colorama (>=0.4.6)"]
|
||||
|
||||
[[package]]
|
||||
name = "pytest"
|
||||
version = "8.4.1"
|
||||
description = "pytest: simple powerful testing with Python"
|
||||
optional = false
|
||||
python-versions = ">=3.9"
|
||||
files = [
|
||||
{file = "pytest-8.4.1-py3-none-any.whl", hash = "sha256:539c70ba6fcead8e78eebbf1115e8b589e7565830d7d006a8723f19ac8a0afb7"},
|
||||
{file = "pytest-8.4.1.tar.gz", hash = "sha256:7c67fd69174877359ed9371ec3af8a3d2b04741818c51e5e99cc1742251fa93c"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
colorama = {version = ">=0.4", markers = "sys_platform == \"win32\""}
|
||||
exceptiongroup = {version = ">=1", markers = "python_version < \"3.11\""}
|
||||
iniconfig = ">=1"
|
||||
packaging = ">=20"
|
||||
pluggy = ">=1.5,<2"
|
||||
pygments = ">=2.7.2"
|
||||
tomli = {version = ">=1", markers = "python_version < \"3.11\""}
|
||||
|
||||
[package.extras]
|
||||
dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "requests", "setuptools", "xmlschema"]
|
||||
|
||||
[[package]]
|
||||
name = "pyyaml"
|
||||
version = "6.0.2"
|
||||
|
|
@ -430,6 +521,47 @@ enabler = ["pytest-enabler (>=2.2)"]
|
|||
test = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "jaraco.test (>=5.5)", "packaging (>=24.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.*)", "pytest-home (>=0.5)", "pytest-perf", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel (>=0.44.0)"]
|
||||
type = ["importlib-metadata (>=7.0.2)", "jaraco.develop (>=7.21)", "mypy (>=1.12,<1.14)", "pytest-mypy"]
|
||||
|
||||
[[package]]
|
||||
name = "tomli"
|
||||
version = "2.2.1"
|
||||
description = "A lil' TOML parser"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
files = [
|
||||
{file = "tomli-2.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678e4fa69e4575eb77d103de3df8a895e1591b48e740211bd1067378c69e8249"},
|
||||
{file = "tomli-2.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6"},
|
||||
{file = "tomli-2.2.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ece47d672db52ac607a3d9599a9d48dcb2f2f735c6c2d1f34130085bb12b112a"},
|
||||
{file = "tomli-2.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6972ca9c9cc9f0acaa56a8ca1ff51e7af152a9f87fb64623e31d5c83700080ee"},
|
||||
{file = "tomli-2.2.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c954d2250168d28797dd4e3ac5cf812a406cd5a92674ee4c8f123c889786aa8e"},
|
||||
{file = "tomli-2.2.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8dd28b3e155b80f4d54beb40a441d366adcfe740969820caf156c019fb5c7ec4"},
|
||||
{file = "tomli-2.2.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:e59e304978767a54663af13c07b3d1af22ddee3bb2fb0618ca1593e4f593a106"},
|
||||
{file = "tomli-2.2.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:33580bccab0338d00994d7f16f4c4ec25b776af3ffaac1ed74e0b3fc95e885a8"},
|
||||
{file = "tomli-2.2.1-cp311-cp311-win32.whl", hash = "sha256:465af0e0875402f1d226519c9904f37254b3045fc5084697cefb9bdde1ff99ff"},
|
||||
{file = "tomli-2.2.1-cp311-cp311-win_amd64.whl", hash = "sha256:2d0f2fdd22b02c6d81637a3c95f8cd77f995846af7414c5c4b8d0545afa1bc4b"},
|
||||
{file = "tomli-2.2.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:4a8f6e44de52d5e6c657c9fe83b562f5f4256d8ebbfe4ff922c495620a7f6cea"},
|
||||
{file = "tomli-2.2.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8d57ca8095a641b8237d5b079147646153d22552f1c637fd3ba7f4b0b29167a8"},
|
||||
{file = "tomli-2.2.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4e340144ad7ae1533cb897d406382b4b6fede8890a03738ff1683af800d54192"},
|
||||
{file = "tomli-2.2.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:db2b95f9de79181805df90bedc5a5ab4c165e6ec3fe99f970d0e302f384ad222"},
|
||||
{file = "tomli-2.2.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:40741994320b232529c802f8bc86da4e1aa9f413db394617b9a256ae0f9a7f77"},
|
||||
{file = "tomli-2.2.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:400e720fe168c0f8521520190686ef8ef033fb19fc493da09779e592861b78c6"},
|
||||
{file = "tomli-2.2.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:02abe224de6ae62c19f090f68da4e27b10af2b93213d36cf44e6e1c5abd19fdd"},
|
||||
{file = "tomli-2.2.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b82ebccc8c8a36f2094e969560a1b836758481f3dc360ce9a3277c65f374285e"},
|
||||
{file = "tomli-2.2.1-cp312-cp312-win32.whl", hash = "sha256:889f80ef92701b9dbb224e49ec87c645ce5df3fa2cc548664eb8a25e03127a98"},
|
||||
{file = "tomli-2.2.1-cp312-cp312-win_amd64.whl", hash = "sha256:7fc04e92e1d624a4a63c76474610238576942d6b8950a2d7f908a340494e67e4"},
|
||||
{file = "tomli-2.2.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f4039b9cbc3048b2416cc57ab3bda989a6fcf9b36cf8937f01a6e731b64f80d7"},
|
||||
{file = "tomli-2.2.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:286f0ca2ffeeb5b9bd4fcc8d6c330534323ec51b2f52da063b11c502da16f30c"},
|
||||
{file = "tomli-2.2.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a92ef1a44547e894e2a17d24e7557a5e85a9e1d0048b0b5e7541f76c5032cb13"},
|
||||
{file = "tomli-2.2.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9316dc65bed1684c9a98ee68759ceaed29d229e985297003e494aa825ebb0281"},
|
||||
{file = "tomli-2.2.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e85e99945e688e32d5a35c1ff38ed0b3f41f43fad8df0bdf79f72b2ba7bc5272"},
|
||||
{file = "tomli-2.2.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ac065718db92ca818f8d6141b5f66369833d4a80a9d74435a268c52bdfa73140"},
|
||||
{file = "tomli-2.2.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:d920f33822747519673ee656a4b6ac33e382eca9d331c87770faa3eef562aeb2"},
|
||||
{file = "tomli-2.2.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a198f10c4d1b1375d7687bc25294306e551bf1abfa4eace6650070a5c1ae2744"},
|
||||
{file = "tomli-2.2.1-cp313-cp313-win32.whl", hash = "sha256:d3f5614314d758649ab2ab3a62d4f2004c825922f9e370b29416484086b264ec"},
|
||||
{file = "tomli-2.2.1-cp313-cp313-win_amd64.whl", hash = "sha256:a38aa0308e754b0e3c67e344754dff64999ff9b513e691d0e786265c93583c69"},
|
||||
{file = "tomli-2.2.1-py3-none-any.whl", hash = "sha256:cb55c73c5f4408779d0cf3eef9f762b9c9f147a77de7b258bef0a5628adc85cc"},
|
||||
{file = "tomli-2.2.1.tar.gz", hash = "sha256:cd45e1dc79c835ce60f7404ec8119f2eb06d38b1deba146f07ced3bbc44505ff"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "typing-extensions"
|
||||
version = "4.14.1"
|
||||
|
|
@ -444,4 +576,4 @@ files = [
|
|||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = "^3.10"
|
||||
content-hash = "7d17c2f24f8ad4503a5248c3f7e1a74f458a5ea55a2fb63db382cb6abe6d52df"
|
||||
content-hash = "83d32fa807f6c7058ecbfc43b777c4d4c637695025cf774ff10532bff8f6712b"
|
||||
|
|
|
|||
|
|
@ -20,6 +20,13 @@ pyyaml = "^6.0.2"
|
|||
[tool.poetry.scripts]
|
||||
archgw = "cli.main:main"
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
pytest = "^8.4.1"
|
||||
|
||||
[build-system]
|
||||
requires = ["poetry-core>=1.0.0"]
|
||||
build-backend = "poetry.core.masonry.api"
|
||||
|
||||
|
||||
[tool.pytest.ini_options]
|
||||
addopts = ["-v"]
|
||||
|
|
|
|||
|
|
@ -1,45 +0,0 @@
|
|||
from fastapi import FastAPI
|
||||
from pydantic import BaseModel
|
||||
from typing import List, Dict, Set
|
||||
|
||||
app = FastAPI()
|
||||
|
||||
|
||||
class User(BaseModel):
|
||||
name: str = Field(
|
||||
"John Doe", description="The name of the user."
|
||||
) # Default value and description for name
|
||||
location: int = None
|
||||
age: int = Field(
|
||||
30, description="The age of the user."
|
||||
) # Default value and description for age
|
||||
tags: Set[str] = Field(
|
||||
default_factory=set, description="A set of tags associated with the user."
|
||||
) # Default empty set and description for tags
|
||||
metadata: Dict[str, int] = Field(
|
||||
default_factory=dict,
|
||||
description="A dictionary storing metadata about the user, with string keys and integer values.",
|
||||
) # Default empty dict and description for metadata
|
||||
|
||||
|
||||
@app.get("/agent/default")
|
||||
async def default(request: User):
|
||||
"""
|
||||
This endpoint handles information extraction queries.
|
||||
It can summarize, extract details, and perform various other information-related tasks.
|
||||
"""
|
||||
return {"info": f"Query: {request.name}, Count: {request.age}"}
|
||||
|
||||
|
||||
@app.post("/agent/action")
|
||||
async def reboot_network_device(device_id: str, confirmation: str):
|
||||
"""
|
||||
This endpoint reboots a network device based on the device ID.
|
||||
Confirmation is required to proceed with the reboot.
|
||||
|
||||
Args:
|
||||
device_id: The device_id that you want to reboot.
|
||||
confirmation: The confirmation that the user wants to reboot.
|
||||
metadata: Ignore this parameter
|
||||
"""
|
||||
return {"status": "Device rebooted", "device_id": device_id}
|
||||
|
|
@ -1,33 +0,0 @@
|
|||
prompt_targets:
|
||||
- name: default
|
||||
path: /agent/default
|
||||
description: "This endpoint handles information extraction queries.\n It can\
|
||||
\ summarize, extract details, and perform various other information-related tasks."
|
||||
parameters:
|
||||
- name: query
|
||||
type: str
|
||||
description: Field from Pydantic model DefaultRequest
|
||||
default_value: null
|
||||
required: false
|
||||
- name: count
|
||||
type: int
|
||||
description: Field from Pydantic model DefaultRequest
|
||||
default_value: null
|
||||
required: false
|
||||
type: default
|
||||
auto-llm-dispatch-on-response: true
|
||||
- name: reboot_network_device
|
||||
path: /agent/action
|
||||
description: "This endpoint reboots a network device based on the device ID.\n \
|
||||
\ Confirmation is required to proceed with the reboot."
|
||||
parameters:
|
||||
- name: device_id
|
||||
type: str
|
||||
description: Description for device_id
|
||||
default_value: ''
|
||||
required: true
|
||||
- name: confirmation
|
||||
type: int
|
||||
description: Description for confirmation
|
||||
default_value: ''
|
||||
required: true
|
||||
|
|
@ -1,42 +0,0 @@
|
|||
import pytest
|
||||
from click.testing import CliRunner
|
||||
from tools.cli.main import main # Import your CLI's entry point
|
||||
import importlib.metadata
|
||||
|
||||
|
||||
def get_version():
|
||||
"""Helper function to fetch the version."""
|
||||
try:
|
||||
version = importlib.metadata.version("archgw")
|
||||
return version
|
||||
except importlib.metadata.PackageNotFoundError:
|
||||
return None
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def runner():
|
||||
"""Fixture to create a Click test runner."""
|
||||
return CliRunner()
|
||||
|
||||
|
||||
def test_version_option(runner):
|
||||
"""Test the --version option."""
|
||||
result = runner.invoke(main, ["--version"])
|
||||
assert result.exit_code == 0
|
||||
expected_version = get_version()
|
||||
assert f"archgw cli version: {expected_version}" in result.output
|
||||
|
||||
|
||||
def test_default_behavior(runner):
|
||||
"""Test the default behavior when no command is provided."""
|
||||
result = runner.invoke(main)
|
||||
assert result.exit_code == 0
|
||||
assert "Arch (The Intelligent Prompt Gateway) CLI" in result.output
|
||||
assert "Usage:" in result.output # Ensure help text is shown
|
||||
|
||||
|
||||
def test_invalid_command(runner):
|
||||
"""Test that an invalid command returns an appropriate error message."""
|
||||
result = runner.invoke(main, ["invalid_command"])
|
||||
assert result.exit_code != 0 # Non-zero exit code for invalid command
|
||||
assert "Error: No such command 'invalid_command'" in result.output
|
||||
272
arch/tools/test/test_config_generator.py
Normal file
272
arch/tools/test/test_config_generator.py
Normal file
|
|
@ -0,0 +1,272 @@
|
|||
import pytest
|
||||
from unittest import mock
|
||||
import sys
|
||||
from cli.config_generator import validate_and_render_schema
|
||||
|
||||
# Patch sys.path to allow import from cli/
|
||||
import os
|
||||
|
||||
sys.path.insert(
|
||||
0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "cli"))
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def cleanup_env(monkeypatch):
|
||||
# Clean up environment variables and mocks after each test
|
||||
yield
|
||||
monkeypatch.undo()
|
||||
|
||||
|
||||
def test_validate_and_render_happy_path(monkeypatch):
|
||||
monkeypatch.setenv("ARCH_CONFIG_FILE", "fake_arch_config.yaml")
|
||||
monkeypatch.setenv("ARCH_CONFIG_SCHEMA_FILE", "fake_arch_config_schema.yaml")
|
||||
monkeypatch.setenv("ENVOY_CONFIG_TEMPLATE_FILE", "./envoy.template.yaml")
|
||||
monkeypatch.setenv("ARCH_CONFIG_FILE_RENDERED", "fake_arch_config_rendered.yaml")
|
||||
monkeypatch.setenv("ENVOY_CONFIG_FILE_RENDERED", "fake_envoy.yaml")
|
||||
monkeypatch.setenv("TEMPLATE_ROOT", "../")
|
||||
|
||||
arch_config = """
|
||||
version: v0.1.0
|
||||
|
||||
listeners:
|
||||
egress_traffic:
|
||||
address: 0.0.0.0
|
||||
port: 12000
|
||||
message_format: openai
|
||||
timeout: 30s
|
||||
|
||||
llm_providers:
|
||||
|
||||
- model: openai/gpt-4o-mini
|
||||
access_key: $OPENAI_API_KEY
|
||||
default: true
|
||||
|
||||
- model: openai/gpt-4o
|
||||
access_key: $OPENAI_API_KEY
|
||||
routing_preferences:
|
||||
- name: code understanding
|
||||
description: understand and explain existing code snippets, functions, or libraries
|
||||
|
||||
- model: openai/gpt-4.1
|
||||
access_key: $OPENAI_API_KEY
|
||||
routing_preferences:
|
||||
- name: code generation
|
||||
description: generating new code snippets, functions, or boilerplate based on user prompts or requirements
|
||||
|
||||
tracing:
|
||||
random_sampling: 100
|
||||
"""
|
||||
arch_config_schema = ""
|
||||
with open("../arch_config_schema.yaml", "r") as file:
|
||||
arch_config_schema = file.read()
|
||||
|
||||
m_open = mock.mock_open()
|
||||
# Provide enough file handles for all open() calls in validate_and_render_schema
|
||||
m_open.side_effect = [
|
||||
mock.mock_open(read_data="").return_value,
|
||||
mock.mock_open(read_data=arch_config).return_value, # ARCH_CONFIG_FILE
|
||||
mock.mock_open(
|
||||
read_data=arch_config_schema
|
||||
).return_value, # ARCH_CONFIG_SCHEMA_FILE
|
||||
mock.mock_open(read_data=arch_config).return_value, # ARCH_CONFIG_FILE
|
||||
mock.mock_open(
|
||||
read_data=arch_config_schema
|
||||
).return_value, # ARCH_CONFIG_SCHEMA_FILE
|
||||
mock.mock_open().return_value, # ENVOY_CONFIG_FILE_RENDERED (write)
|
||||
mock.mock_open().return_value, # ARCH_CONFIG_FILE_RENDERED (write)
|
||||
]
|
||||
with mock.patch("builtins.open", m_open):
|
||||
with mock.patch("config_generator.Environment"):
|
||||
validate_and_render_schema()
|
||||
|
||||
|
||||
arch_config_test_cases = [
|
||||
{
|
||||
"id": "duplicate_provider_name",
|
||||
"expected_error": "Duplicate llm_provider name",
|
||||
"arch_config": """
|
||||
version: v0.1.0
|
||||
|
||||
listeners:
|
||||
egress_traffic:
|
||||
address: 0.0.0.0
|
||||
port: 12000
|
||||
message_format: openai
|
||||
timeout: 30s
|
||||
|
||||
llm_providers:
|
||||
|
||||
- name: test1
|
||||
model: openai/gpt-4o
|
||||
access_key: $OPENAI_API_KEY
|
||||
|
||||
- name: test1
|
||||
model: openai/gpt-4o
|
||||
access_key: $OPENAI_API_KEY
|
||||
|
||||
""",
|
||||
},
|
||||
{
|
||||
"id": "provider_interface_with_model_id",
|
||||
"expected_error": "Please provide provider interface as part of model name",
|
||||
"arch_config": """
|
||||
version: v0.1.0
|
||||
|
||||
listeners:
|
||||
egress_traffic:
|
||||
address: 0.0.0.0
|
||||
port: 12000
|
||||
message_format: openai
|
||||
timeout: 30s
|
||||
|
||||
llm_providers:
|
||||
|
||||
- model: openai/gpt-4o
|
||||
access_key: $OPENAI_API_KEY
|
||||
provider_interface: openai
|
||||
|
||||
""",
|
||||
},
|
||||
{
|
||||
"id": "duplicate_model_id",
|
||||
"expected_error": "Duplicate model_id",
|
||||
"arch_config": """
|
||||
version: v0.1.0
|
||||
|
||||
listeners:
|
||||
egress_traffic:
|
||||
address: 0.0.0.0
|
||||
port: 12000
|
||||
message_format: openai
|
||||
timeout: 30s
|
||||
|
||||
llm_providers:
|
||||
|
||||
- model: openai/gpt-4o
|
||||
access_key: $OPENAI_API_KEY
|
||||
|
||||
- model: mistral/gpt-4o
|
||||
|
||||
""",
|
||||
},
|
||||
{
|
||||
"id": "custom_provider_base_url",
|
||||
"expected_error": "Must provide base_url and provider_interface",
|
||||
"arch_config": """
|
||||
version: v0.1.0
|
||||
|
||||
listeners:
|
||||
egress_traffic:
|
||||
address: 0.0.0.0
|
||||
port: 12000
|
||||
message_format: openai
|
||||
timeout: 30s
|
||||
|
||||
llm_providers:
|
||||
|
||||
- model: custom/gpt-4o
|
||||
|
||||
""",
|
||||
},
|
||||
{
|
||||
"id": "base_url_no_prefix",
|
||||
"expected_error": "Please provide base_url without path",
|
||||
"arch_config": """
|
||||
version: v0.1.0
|
||||
|
||||
listeners:
|
||||
egress_traffic:
|
||||
address: 0.0.0.0
|
||||
port: 12000
|
||||
message_format: openai
|
||||
timeout: 30s
|
||||
|
||||
llm_providers:
|
||||
|
||||
- model: custom/gpt-4o
|
||||
base_url: "http://custom.com/test"
|
||||
provider_interface: openai
|
||||
|
||||
""",
|
||||
},
|
||||
{
|
||||
"id": "duplicate_routeing_preference_name",
|
||||
"expected_error": "Duplicate routing preference name",
|
||||
"arch_config": """
|
||||
version: v0.1.0
|
||||
|
||||
listeners:
|
||||
egress_traffic:
|
||||
address: 0.0.0.0
|
||||
port: 12000
|
||||
message_format: openai
|
||||
timeout: 30s
|
||||
|
||||
llm_providers:
|
||||
|
||||
- model: openai/gpt-4o-mini
|
||||
access_key: $OPENAI_API_KEY
|
||||
default: true
|
||||
|
||||
- model: openai/gpt-4o
|
||||
access_key: $OPENAI_API_KEY
|
||||
routing_preferences:
|
||||
- name: code understanding
|
||||
description: understand and explain existing code snippets, functions, or libraries
|
||||
|
||||
- model: openai/gpt-4.1
|
||||
access_key: $OPENAI_API_KEY
|
||||
routing_preferences:
|
||||
- name: code understanding
|
||||
description: generating new code snippets, functions, or boilerplate based on user prompts or requirements
|
||||
|
||||
tracing:
|
||||
random_sampling: 100
|
||||
|
||||
""",
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"arch_config_test_case",
|
||||
arch_config_test_cases,
|
||||
ids=[case["id"] for case in arch_config_test_cases],
|
||||
)
|
||||
def test_validate_and_render_schema_tests(monkeypatch, arch_config_test_case):
|
||||
monkeypatch.setenv("ARCH_CONFIG_FILE", "fake_arch_config.yaml")
|
||||
monkeypatch.setenv("ARCH_CONFIG_SCHEMA_FILE", "fake_arch_config_schema.yaml")
|
||||
monkeypatch.setenv("ENVOY_CONFIG_TEMPLATE_FILE", "./envoy.template.yaml")
|
||||
monkeypatch.setenv("ARCH_CONFIG_FILE_RENDERED", "fake_arch_config_rendered.yaml")
|
||||
monkeypatch.setenv("ENVOY_CONFIG_FILE_RENDERED", "fake_envoy.yaml")
|
||||
monkeypatch.setenv("TEMPLATE_ROOT", "../")
|
||||
|
||||
arch_config = arch_config_test_case["arch_config"]
|
||||
expected_error = arch_config_test_case["expected_error"]
|
||||
test_id = arch_config_test_case["id"]
|
||||
|
||||
arch_config_schema = ""
|
||||
with open("../arch_config_schema.yaml", "r") as file:
|
||||
arch_config_schema = file.read()
|
||||
|
||||
m_open = mock.mock_open()
|
||||
# Provide enough file handles for all open() calls in validate_and_render_schema
|
||||
m_open.side_effect = [
|
||||
mock.mock_open(read_data="").return_value,
|
||||
mock.mock_open(read_data=arch_config).return_value, # ARCH_CONFIG_FILE
|
||||
mock.mock_open(
|
||||
read_data=arch_config_schema
|
||||
).return_value, # ARCH_CONFIG_SCHEMA_FILE
|
||||
mock.mock_open(read_data=arch_config).return_value, # ARCH_CONFIG_FILE
|
||||
mock.mock_open(
|
||||
read_data=arch_config_schema
|
||||
).return_value, # ARCH_CONFIG_SCHEMA_FILE
|
||||
mock.mock_open().return_value, # ENVOY_CONFIG_FILE_RENDERED (write)
|
||||
mock.mock_open().return_value, # ARCH_CONFIG_FILE_RENDERED (write)
|
||||
]
|
||||
with mock.patch("builtins.open", m_open):
|
||||
with mock.patch("config_generator.Environment"):
|
||||
with pytest.raises(Exception) as excinfo:
|
||||
validate_and_render_schema()
|
||||
assert expected_error in str(excinfo.value)
|
||||
|
|
@ -3,11 +3,22 @@
|
|||
failed_files=()
|
||||
|
||||
for file in $(find . -name arch_config.yaml -o -name arch_config_full_reference.yaml); do
|
||||
echo "Validating $file..."
|
||||
if ! docker run --rm -v "$(pwd)/$file:/app/arch_config.yaml:ro" --entrypoint /bin/sh katanemo/archgw:latest -c "python config_generator.py" 2>&1 > /dev/null ; then
|
||||
echo "Validating ${file}..."
|
||||
touch $(pwd)/${file}_rendered
|
||||
if ! docker run --rm -v "$(pwd)/${file}:/app/arch_config.yaml:ro" -v "$(pwd)/${file}_rendered:/app/arch_config_rendered.yaml:rw" --entrypoint /bin/sh katanemo/archgw:latest -c "python config_generator.py" 2>&1 > /dev/null ; then
|
||||
echo "Validation failed for $file"
|
||||
failed_files+=("$file")
|
||||
fi
|
||||
RENDERED_CHECKED_IN_FILE=$(echo $file | sed 's/\.yaml$/_rendered.yaml/')
|
||||
if [ -f "$RENDERED_CHECKED_IN_FILE" ]; then
|
||||
echo "Checking rendered file against checked-in version..."
|
||||
if ! diff -q "${file}_rendered" "$RENDERED_CHECKED_IN_FILE" > /dev/null; then
|
||||
echo "Rendered file ${file}_rendered does not match checked-in version ${RENDERED_CHECKED_IN_FILE}"
|
||||
failed_files+=("${file}_rendered")
|
||||
else
|
||||
echo "Rendered file matches checked-in version."
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
# Print summary of failed files
|
||||
|
|
|
|||
21
crates/.vscode/launch.json
vendored
Normal file
21
crates/.vscode/launch.json
vendored
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
{
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
{
|
||||
"name": "Debug Brightstaff",
|
||||
"type": "lldb",
|
||||
"request": "launch",
|
||||
"program": "${workspaceFolder}/target/debug/brightstaff",
|
||||
"args": [],
|
||||
"cwd": "${workspaceFolder}",
|
||||
"stopOnEntry": false,
|
||||
"sourceLanguages": ["rust"],
|
||||
"env": {
|
||||
"RUST_LOG": "debug",
|
||||
"RUST_BACKTRACE": "1",
|
||||
"ARCH_CONFIG_PATH_RENDERED": "../demos/use_cases/preference_based_routing/arch_config_rendered.yaml"
|
||||
},
|
||||
"preLaunchTask": "rust: cargo build"
|
||||
}
|
||||
]
|
||||
}
|
||||
21
crates/.vscode/tasks.json
vendored
Normal file
21
crates/.vscode/tasks.json
vendored
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
{
|
||||
"version": "2.0.0",
|
||||
"tasks": [
|
||||
{
|
||||
"type": "cargo",
|
||||
"command": "build",
|
||||
"args": [
|
||||
"--bin",
|
||||
"brightstaff"
|
||||
],
|
||||
"problemMatcher": [
|
||||
"$rustc"
|
||||
],
|
||||
"group": {
|
||||
"kind": "build",
|
||||
"isDefault": true
|
||||
},
|
||||
"label": "rust: cargo build"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
@ -12,7 +12,7 @@ use hyper::{Request, Response, StatusCode};
|
|||
use tokio::sync::mpsc;
|
||||
use tokio_stream::wrappers::ReceiverStream;
|
||||
use tokio_stream::StreamExt;
|
||||
use tracing::{debug, info, trace, warn};
|
||||
use tracing::{debug, info, warn};
|
||||
|
||||
use crate::router::llm_router::RouterService;
|
||||
|
||||
|
|
@ -81,8 +81,8 @@ pub async fn chat_completions(
|
|||
}
|
||||
}
|
||||
|
||||
trace!(
|
||||
"arch-router request body: {}",
|
||||
debug!(
|
||||
"arch-router request received: {}",
|
||||
&serde_json::to_string(&chat_completion_request).unwrap()
|
||||
);
|
||||
|
||||
|
|
@ -102,9 +102,9 @@ pub async fn chat_completions(
|
|||
.as_ref()
|
||||
.and_then(|s| serde_yaml::from_str(s).ok());
|
||||
|
||||
debug!("usage preferences: {:?}", usage_preferences);
|
||||
debug!("usage preferences from request: {:?}", usage_preferences);
|
||||
|
||||
let mut selected_llm = match router_service
|
||||
let mut determined_route = match router_service
|
||||
.determine_route(
|
||||
&chat_completion_request.messages,
|
||||
trace_parent.clone(),
|
||||
|
|
@ -121,14 +121,14 @@ pub async fn chat_completions(
|
|||
}
|
||||
};
|
||||
|
||||
if selected_llm.is_none() {
|
||||
if determined_route.is_none() {
|
||||
debug!("No LLM model selected, using default from request");
|
||||
selected_llm = Some(chat_completion_request.model.clone());
|
||||
determined_route = Some(chat_completion_request.model.clone());
|
||||
}
|
||||
|
||||
info!(
|
||||
"sending request to llm provider: {} with llm model: {:?}",
|
||||
llm_provider_endpoint, selected_llm
|
||||
llm_provider_endpoint, determined_route
|
||||
);
|
||||
|
||||
if let Some(trace_parent) = trace_parent {
|
||||
|
|
@ -138,10 +138,10 @@ pub async fn chat_completions(
|
|||
);
|
||||
}
|
||||
|
||||
if let Some(selected_llm) = selected_llm {
|
||||
if let Some(selected_route) = determined_route {
|
||||
request_headers.insert(
|
||||
ARCH_PROVIDER_HINT_HEADER,
|
||||
header::HeaderValue::from_str(&selected_llm).unwrap(),
|
||||
header::HeaderValue::from_str(&selected_route).unwrap(),
|
||||
);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -44,9 +44,13 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
|
|||
let _tracer_provider = init_tracer();
|
||||
let bind_address = env::var("BIND_ADDRESS").unwrap_or_else(|_| BIND_ADDRESS.to_string());
|
||||
|
||||
info!(
|
||||
"current working directory: {}",
|
||||
env::current_dir().unwrap().display()
|
||||
);
|
||||
// loading arch_config.yaml file
|
||||
let arch_config_path =
|
||||
env::var("ARCH_CONFIG_PATH").unwrap_or_else(|_| "./arch_config.yaml".to_string());
|
||||
let arch_config_path = env::var("ARCH_CONFIG_PATH_RENDERED")
|
||||
.unwrap_or_else(|_| "./arch_config_rendered.yaml".to_string());
|
||||
info!("Loading arch_config.yaml from {}", arch_config_path);
|
||||
|
||||
let config_contents =
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
use std::{collections::HashMap, sync::Arc};
|
||||
use std::sync::Arc;
|
||||
|
||||
use common::{
|
||||
configuration::{LlmProvider, LlmRoute, ModelUsagePreference},
|
||||
configuration::{LlmProvider, ModelUsagePreference, RoutingPreference},
|
||||
consts::ARCH_PROVIDER_HINT_HEADER,
|
||||
};
|
||||
use hermesllm::providers::openai::types::{ChatCompletionsResponse, ContentType, Message};
|
||||
|
|
@ -19,7 +19,6 @@ pub struct RouterService {
|
|||
router_model: Arc<dyn RouterModel>,
|
||||
routing_provider_name: String,
|
||||
llm_usage_defined: bool,
|
||||
llm_provider_map: HashMap<String, LlmProvider>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
|
|
@ -45,11 +44,14 @@ impl RouterService {
|
|||
) -> Self {
|
||||
let providers_with_usage = providers
|
||||
.iter()
|
||||
.filter(|provider| provider.usage.is_some())
|
||||
.filter(|provider| provider.routing_preferences.is_some())
|
||||
.cloned()
|
||||
.collect::<Vec<LlmProvider>>();
|
||||
|
||||
let llm_routes: Vec<LlmRoute> = providers_with_usage.iter().map(LlmRoute::from).collect();
|
||||
let llm_routes: Vec<RoutingPreference> = providers_with_usage
|
||||
.iter()
|
||||
.flat_map(|provider| provider.routing_preferences.clone().unwrap_or_default())
|
||||
.collect();
|
||||
|
||||
let router_model = Arc::new(router_model_v1::RouterModelV1::new(
|
||||
llm_routes,
|
||||
|
|
@ -57,18 +59,12 @@ impl RouterService {
|
|||
router_model_v1::MAX_TOKEN_LEN,
|
||||
));
|
||||
|
||||
let llm_provider_map: HashMap<String, LlmProvider> = providers
|
||||
.into_iter()
|
||||
.map(|provider| (provider.name.clone(), provider))
|
||||
.collect();
|
||||
|
||||
RouterService {
|
||||
router_url,
|
||||
client: reqwest::Client::new(),
|
||||
router_model,
|
||||
routing_provider_name,
|
||||
llm_usage_defined: !providers_with_usage.is_empty(),
|
||||
llm_provider_map,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -155,40 +151,21 @@ impl RouterService {
|
|||
if let Some(ContentType::Text(content)) =
|
||||
&chat_completion_response.choices[0].message.content
|
||||
{
|
||||
let mut selected_model: Option<String> = None;
|
||||
if let Some(selected_llm_name) = self.router_model.parse_response(content)? {
|
||||
if selected_llm_name != "other" {
|
||||
if let Some(usage_preferences) = usage_preferences {
|
||||
for usage in usage_preferences {
|
||||
if usage.name == selected_llm_name {
|
||||
selected_model = Some(usage.model);
|
||||
break;
|
||||
}
|
||||
}
|
||||
if selected_model.is_none() {
|
||||
warn!(
|
||||
"Selected LLM model not found in usage preferences: {}",
|
||||
selected_llm_name
|
||||
);
|
||||
}
|
||||
} else if let Some(provider) = self.llm_provider_map.get(&selected_llm_name) {
|
||||
selected_model = provider.model.clone();
|
||||
} else {
|
||||
warn!(
|
||||
"Selected LLM model not found in provider map: {}",
|
||||
selected_llm_name
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
let route_name = self.router_model.parse_response(content)?;
|
||||
info!(
|
||||
"router response: {}, selected_model: {:?}, response time: {}ms",
|
||||
content.replace("\n", "\\n"),
|
||||
selected_model,
|
||||
route_name,
|
||||
router_response_time.as_millis()
|
||||
);
|
||||
|
||||
Ok(selected_model)
|
||||
if let Some(ref route) = route_name {
|
||||
if route == "other" {
|
||||
return Ok(None);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(route_name)
|
||||
} else {
|
||||
Ok(None)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
use common::{
|
||||
configuration::{LlmRoute, ModelUsagePreference},
|
||||
configuration::{ModelUsagePreference, RoutingPreference},
|
||||
consts::{SYSTEM_ROLE, TOOL_ROLE, USER_ROLE},
|
||||
};
|
||||
use hermesllm::providers::openai::types::{ChatCompletionsRequest, ContentType, Message};
|
||||
|
|
@ -36,7 +36,11 @@ pub struct RouterModelV1 {
|
|||
max_token_length: usize,
|
||||
}
|
||||
impl RouterModelV1 {
|
||||
pub fn new(llm_routes: Vec<LlmRoute>, routing_model: String, max_token_length: usize) -> Self {
|
||||
pub fn new(
|
||||
llm_routes: Vec<RoutingPreference>,
|
||||
routing_model: String,
|
||||
max_token_length: usize,
|
||||
) -> Self {
|
||||
let llm_route_json_str =
|
||||
serde_json::to_string(&llm_routes).unwrap_or_else(|_| "[]".to_string());
|
||||
RouterModelV1 {
|
||||
|
|
@ -138,9 +142,9 @@ impl RouterModel for RouterModelV1 {
|
|||
let llm_route_json = usage_preferences
|
||||
.as_ref()
|
||||
.map(|prefs| {
|
||||
let llm_route: Vec<LlmRoute> = prefs
|
||||
let llm_route: Vec<RoutingPreference> = prefs
|
||||
.iter()
|
||||
.map(|pref| LlmRoute {
|
||||
.map(|pref| RoutingPreference {
|
||||
name: pref.name.clone(),
|
||||
description: pref.usage.clone().unwrap_or_default(),
|
||||
})
|
||||
|
|
@ -255,7 +259,7 @@ Based on your analysis, provide your response in the following JSON formats if y
|
|||
{"name": "Speech Recognition", "description": "Converting spoken language into written text"}
|
||||
]
|
||||
"#;
|
||||
let llm_routes = serde_json::from_str::<Vec<LlmRoute>>(routes_str).unwrap();
|
||||
let llm_routes = serde_json::from_str::<Vec<RoutingPreference>>(routes_str).unwrap();
|
||||
let routing_model = "test-model".to_string();
|
||||
let router = RouterModelV1::new(llm_routes, routing_model.clone(), usize::MAX);
|
||||
|
||||
|
|
@ -314,7 +318,7 @@ Based on your analysis, provide your response in the following JSON formats if y
|
|||
{"name": "Speech Recognition", "description": "Converting spoken language into written text"}
|
||||
]
|
||||
"#;
|
||||
let llm_routes = serde_json::from_str::<Vec<LlmRoute>>(routes_str).unwrap();
|
||||
let llm_routes = serde_json::from_str::<Vec<RoutingPreference>>(routes_str).unwrap();
|
||||
let routing_model = "test-model".to_string();
|
||||
let router = RouterModelV1::new(llm_routes, routing_model.clone(), usize::MAX);
|
||||
|
||||
|
|
@ -379,7 +383,7 @@ Based on your analysis, provide your response in the following JSON formats if y
|
|||
{"name": "Speech Recognition", "description": "Converting spoken language into written text"}
|
||||
]
|
||||
"#;
|
||||
let llm_routes = serde_json::from_str::<Vec<LlmRoute>>(routes_str).unwrap();
|
||||
let llm_routes = serde_json::from_str::<Vec<RoutingPreference>>(routes_str).unwrap();
|
||||
let routing_model = "test-model".to_string();
|
||||
let router = RouterModelV1::new(llm_routes, routing_model.clone(), 235);
|
||||
|
||||
|
|
@ -440,7 +444,7 @@ Based on your analysis, provide your response in the following JSON formats if y
|
|||
{"name": "Speech Recognition", "description": "Converting spoken language into written text"}
|
||||
]
|
||||
"#;
|
||||
let llm_routes = serde_json::from_str::<Vec<LlmRoute>>(routes_str).unwrap();
|
||||
let llm_routes = serde_json::from_str::<Vec<RoutingPreference>>(routes_str).unwrap();
|
||||
let routing_model = "test-model".to_string();
|
||||
let router = RouterModelV1::new(llm_routes, routing_model.clone(), 200);
|
||||
|
||||
|
|
@ -501,7 +505,7 @@ Based on your analysis, provide your response in the following JSON formats if y
|
|||
{"name": "Speech Recognition", "description": "Converting spoken language into written text"}
|
||||
]
|
||||
"#;
|
||||
let llm_routes = serde_json::from_str::<Vec<LlmRoute>>(routes_str).unwrap();
|
||||
let llm_routes = serde_json::from_str::<Vec<RoutingPreference>>(routes_str).unwrap();
|
||||
let routing_model = "test-model".to_string();
|
||||
let router = RouterModelV1::new(llm_routes, routing_model.clone(), 230);
|
||||
|
||||
|
|
@ -569,7 +573,7 @@ Based on your analysis, provide your response in the following JSON formats if y
|
|||
{"name": "Speech Recognition", "description": "Converting spoken language into written text"}
|
||||
]
|
||||
"#;
|
||||
let llm_routes = serde_json::from_str::<Vec<LlmRoute>>(routes_str).unwrap();
|
||||
let llm_routes = serde_json::from_str::<Vec<RoutingPreference>>(routes_str).unwrap();
|
||||
let routing_model = "test-model".to_string();
|
||||
let router = RouterModelV1::new(llm_routes, routing_model.clone(), usize::MAX);
|
||||
|
||||
|
|
@ -639,7 +643,7 @@ Based on your analysis, provide your response in the following JSON formats if y
|
|||
{"name": "Speech Recognition", "description": "Converting spoken language into written text"}
|
||||
]
|
||||
"#;
|
||||
let llm_routes = serde_json::from_str::<Vec<LlmRoute>>(routes_str).unwrap();
|
||||
let llm_routes = serde_json::from_str::<Vec<RoutingPreference>>(routes_str).unwrap();
|
||||
let routing_model = "test-model".to_string();
|
||||
let router = RouterModelV1::new(llm_routes, routing_model.clone(), usize::MAX);
|
||||
|
||||
|
|
@ -716,7 +720,7 @@ Based on your analysis, provide your response in the following JSON formats if y
|
|||
{"name": "Speech Recognition", "description": "Converting spoken language into written text"}
|
||||
]
|
||||
"#;
|
||||
let llm_routes = serde_json::from_str::<Vec<LlmRoute>>(routes_str).unwrap();
|
||||
let llm_routes = serde_json::from_str::<Vec<RoutingPreference>>(routes_str).unwrap();
|
||||
|
||||
let router = RouterModelV1::new(llm_routes, "test-model".to_string(), 2000);
|
||||
|
||||
|
|
|
|||
|
|
@ -187,24 +187,11 @@ pub struct ModelUsagePreference {
|
|||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct LlmRoute {
|
||||
pub struct RoutingPreference {
|
||||
pub name: String,
|
||||
pub description: String,
|
||||
}
|
||||
|
||||
impl From<&LlmProvider> for LlmRoute {
|
||||
fn from(provider: &LlmProvider) -> Self {
|
||||
Self {
|
||||
name: provider.name.to_string(),
|
||||
description: provider
|
||||
.usage
|
||||
.as_ref()
|
||||
.cloned()
|
||||
.unwrap_or_else(|| "No description available".to_string()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
//TODO: use enum for model, but if there is a new model, we need to update the code
|
||||
pub struct LlmProvider {
|
||||
|
|
@ -218,6 +205,7 @@ pub struct LlmProvider {
|
|||
pub port: Option<u16>,
|
||||
pub rate_limits: Option<LlmRatelimit>,
|
||||
pub usage: Option<String>,
|
||||
pub routing_preferences: Option<Vec<RoutingPreference>>,
|
||||
}
|
||||
|
||||
pub trait IntoModels {
|
||||
|
|
@ -256,6 +244,7 @@ impl Default for LlmProvider {
|
|||
port: None,
|
||||
rate_limits: None,
|
||||
usage: None,
|
||||
routing_preferences: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -368,7 +357,7 @@ mod test {
|
|||
#[test]
|
||||
fn test_deserialize_configuration() {
|
||||
let ref_config = fs::read_to_string(
|
||||
"../../docs/source/resources/includes/arch_config_full_reference.yaml",
|
||||
"../../docs/source/resources/includes/arch_config_full_reference_rendered.yaml",
|
||||
)
|
||||
.expect("reference config file not found");
|
||||
|
||||
|
|
@ -429,7 +418,7 @@ mod test {
|
|||
#[test]
|
||||
fn test_tool_conversion() {
|
||||
let ref_config = fs::read_to_string(
|
||||
"../../docs/source/resources/includes/arch_config_full_reference.yaml",
|
||||
"../../docs/source/resources/includes/arch_config_full_reference_rendered.yaml",
|
||||
)
|
||||
.expect("reference config file not found");
|
||||
let config: super::Configuration = serde_yaml::from_str(&ref_config).unwrap();
|
||||
|
|
|
|||
|
|
@ -58,7 +58,16 @@ impl TryFrom<Vec<LlmProvider>> for LlmProviders {
|
|||
let name = llm_provider.name.clone();
|
||||
if llm_providers
|
||||
.providers
|
||||
.insert(name.clone(), llm_provider)
|
||||
.insert(name.clone(), llm_provider.clone())
|
||||
.is_some()
|
||||
{
|
||||
return Err(LlmProvidersNewError::DuplicateName(name));
|
||||
}
|
||||
|
||||
// also add model_id as key for provider lookup
|
||||
if llm_providers
|
||||
.providers
|
||||
.insert(llm_provider.model.clone().unwrap(), llm_provider)
|
||||
.is_some()
|
||||
{
|
||||
return Err(LlmProvidersNewError::DuplicateName(name));
|
||||
|
|
|
|||
|
|
@ -113,16 +113,10 @@ impl StreamContext {
|
|||
}
|
||||
|
||||
debug!(
|
||||
"request received: llm provider hint: {}, selected llm: {}, model: {}",
|
||||
"request received: llm provider hint: {}, selected provider: {}",
|
||||
self.get_http_request_header(ARCH_PROVIDER_HINT_HEADER)
|
||||
.unwrap_or_default(),
|
||||
self.llm_provider.as_ref().unwrap().name,
|
||||
self.llm_provider
|
||||
.as_ref()
|
||||
.unwrap()
|
||||
.model
|
||||
.as_ref()
|
||||
.unwrap_or(&String::new())
|
||||
self.llm_provider.as_ref().unwrap().name
|
||||
);
|
||||
}
|
||||
|
||||
|
|
@ -349,7 +343,7 @@ impl HttpContext for StreamContext {
|
|||
};
|
||||
|
||||
info!(
|
||||
"on_http_request_body: provider: {}, model requested: {}, model selected: {}",
|
||||
"on_http_request_body: provider: {}, model requested (in body): {}, model selected: {}",
|
||||
self.llm_provider().name,
|
||||
model_requested,
|
||||
model_name.unwrap_or(&"None".to_string()),
|
||||
|
|
|
|||
|
|
@ -30,7 +30,10 @@ fn request_headers_expectations(module: &mut Tester, http_context: i32) {
|
|||
Some("x-arch-llm-provider-hint"),
|
||||
)
|
||||
.returning(None)
|
||||
.expect_log(Some(LogLevel::Debug), Some("request received: llm provider hint: default, selected llm: open-ai-gpt-4, model: gpt-4"))
|
||||
.expect_log(
|
||||
Some(LogLevel::Debug),
|
||||
Some("request received: llm provider hint: default, selected provider: open-ai-gpt-4"),
|
||||
)
|
||||
.expect_add_header_map_value(
|
||||
Some(MapType::HttpRequestHeaders),
|
||||
Some("x-arch-llm-provider"),
|
||||
|
|
@ -263,7 +266,7 @@ fn llm_gateway_bad_request_to_open_ai_chat_completions() {
|
|||
.expect_get_buffer_bytes(Some(BufferType::HttpRequestBody))
|
||||
.returning(Some(incomplete_chat_completions_request_body))
|
||||
.expect_log(Some(LogLevel::Debug), None)
|
||||
.expect_log(Some(LogLevel::Info), Some("on_http_request_body: provider: open-ai-gpt-4, model requested: gpt-1, model selected: gpt-4"))
|
||||
.expect_log(Some(LogLevel::Info), Some("on_http_request_body: provider: open-ai-gpt-4, model requested (in body): gpt-1, model selected: gpt-4"))
|
||||
.expect_send_local_response(
|
||||
Some(StatusCode::BAD_REQUEST.as_u16().into()),
|
||||
None,
|
||||
|
|
@ -429,7 +432,7 @@ fn llm_gateway_override_model_name() {
|
|||
.returning(Some(chat_completions_request_body))
|
||||
// The actual call is not important in this test, we just need to grab the token_id
|
||||
.expect_log(Some(LogLevel::Debug), None)
|
||||
.expect_log(Some(LogLevel::Info), Some("on_http_request_body: provider: open-ai-gpt-4, model requested: gpt-1, model selected: gpt-4"))
|
||||
.expect_log(Some(LogLevel::Info), Some("on_http_request_body: provider: open-ai-gpt-4, model requested (in body): gpt-1, model selected: gpt-4"))
|
||||
.expect_log(Some(LogLevel::Debug), None)
|
||||
.expect_log(Some(LogLevel::Debug), None)
|
||||
.expect_metric_record("input_sequence_length", 29)
|
||||
|
|
@ -478,7 +481,7 @@ fn llm_gateway_override_use_default_model() {
|
|||
// The actual call is not important in this test, we just need to grab the token_id
|
||||
.expect_log(
|
||||
Some(LogLevel::Info),
|
||||
Some("on_http_request_body: provider: open-ai-gpt-4, model requested: gpt-1, model selected: gpt-4"),
|
||||
Some("on_http_request_body: provider: open-ai-gpt-4, model requested (in body): gpt-1, model selected: gpt-4"),
|
||||
)
|
||||
.expect_log(Some(LogLevel::Debug), None)
|
||||
.expect_log(Some(LogLevel::Debug), None)
|
||||
|
|
@ -526,7 +529,7 @@ fn llm_gateway_override_use_model_name_none() {
|
|||
.returning(Some(chat_completions_request_body))
|
||||
// The actual call is not important in this test, we just need to grab the token_id
|
||||
.expect_log(Some(LogLevel::Debug), None)
|
||||
.expect_log(Some(LogLevel::Info), Some("on_http_request_body: provider: open-ai-gpt-4, model requested: none, model selected: gpt-4"))
|
||||
.expect_log(Some(LogLevel::Info), Some("on_http_request_body: provider: open-ai-gpt-4, model requested (in body): none, model selected: gpt-4"))
|
||||
.expect_log(Some(LogLevel::Debug), None)
|
||||
.expect_log(Some(LogLevel::Debug), None)
|
||||
.expect_metric_record("input_sequence_length", 29)
|
||||
|
|
|
|||
15
demos/samples_java/weather_forcecast_service/.vscode/launch.json
vendored
Normal file
15
demos/samples_java/weather_forcecast_service/.vscode/launch.json
vendored
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
{
|
||||
// Use IntelliSense to learn about possible attributes.
|
||||
// Hover to view descriptions of existing attributes.
|
||||
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
{
|
||||
"type": "java",
|
||||
"name": "WeatherForecastApplication",
|
||||
"request": "launch",
|
||||
"mainClass": "weather.WeatherForecastApplication",
|
||||
"projectName": "weather-forecast-service"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
@ -9,10 +9,8 @@ listeners:
|
|||
|
||||
# Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way
|
||||
llm_providers:
|
||||
- name: OpenAI
|
||||
provider_interface: openai
|
||||
access_key: $OPENAI_API_KEY
|
||||
model: gpt-4o-mini
|
||||
- access_key: $OPENAI_API_KEY
|
||||
model: openai/gpt-4o-mini
|
||||
default: true
|
||||
|
||||
# Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem.
|
||||
|
|
|
|||
|
|
@ -8,10 +8,8 @@ listeners:
|
|||
timeout: 30s
|
||||
|
||||
llm_providers:
|
||||
- name: gpt-4o
|
||||
access_key: $OPENAI_API_KEY
|
||||
provider_interface: openai
|
||||
model: gpt-4o
|
||||
- access_key: $OPENAI_API_KEY
|
||||
model: openai/gpt-4o
|
||||
|
||||
endpoints:
|
||||
frankfurther_api:
|
||||
|
|
|
|||
|
|
@ -9,10 +9,8 @@ listeners:
|
|||
|
||||
# Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way
|
||||
llm_providers:
|
||||
- name: OpenAI
|
||||
provider_interface: openai
|
||||
access_key: $OPENAI_API_KEY
|
||||
model: gpt-4o-mini
|
||||
- access_key: $OPENAI_API_KEY
|
||||
model: openai/gpt-4o-mini
|
||||
default: true
|
||||
|
||||
# Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem.
|
||||
|
|
|
|||
|
|
@ -13,10 +13,8 @@ endpoints:
|
|||
connect_timeout: 0.005s
|
||||
|
||||
llm_providers:
|
||||
- name: gpt-4o-mini
|
||||
access_key: $OPENAI_API_KEY
|
||||
provider_interface: openai
|
||||
model: gpt-4o-mini
|
||||
- access_key: $OPENAI_API_KEY
|
||||
model: openai/gpt-4o-mini
|
||||
default: true
|
||||
|
||||
system_prompt: |
|
||||
|
|
|
|||
|
|
@ -8,10 +8,8 @@ listeners:
|
|||
|
||||
# Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way
|
||||
llm_providers:
|
||||
- name: OpenAI
|
||||
provider_interface: openai
|
||||
access_key: $OPENAI_API_KEY
|
||||
model: gpt-4o
|
||||
- access_key: $OPENAI_API_KEY
|
||||
model: openai/gpt-4o
|
||||
default: true
|
||||
|
||||
# default system prompt used by all prompt targets
|
||||
|
|
|
|||
|
|
@ -8,10 +8,8 @@ listeners:
|
|||
timeout: 30s
|
||||
|
||||
llm_providers:
|
||||
- name: gpt-4o
|
||||
access_key: $OPENAI_API_KEY
|
||||
provider_interface: openai
|
||||
model: gpt-4o
|
||||
- access_key: $OPENAI_API_KEY
|
||||
model: openai/gpt-4o
|
||||
|
||||
endpoints:
|
||||
twelvedata_api:
|
||||
|
|
|
|||
|
|
@ -17,15 +17,11 @@ overrides:
|
|||
prompt_target_intent_matching_threshold: 0.6
|
||||
|
||||
llm_providers:
|
||||
- name: groq
|
||||
access_key: $GROQ_API_KEY
|
||||
provider_interface: groq
|
||||
model: llama-3.2-3b-preview
|
||||
- access_key: $GROQ_API_KEY
|
||||
model: groq/llama-3.2-3b-preview
|
||||
|
||||
- name: gpt-4o
|
||||
access_key: $OPENAI_API_KEY
|
||||
provider_interface: openai
|
||||
model: gpt-4o
|
||||
- access_key: $OPENAI_API_KEY
|
||||
model: openai/gpt-4o
|
||||
default: true
|
||||
|
||||
system_prompt: |
|
||||
|
|
|
|||
|
|
@ -13,16 +13,12 @@ listeners:
|
|||
timeout: 30s
|
||||
|
||||
llm_providers:
|
||||
- name: gpt-4o
|
||||
access_key: $OPENAI_API_KEY
|
||||
provider: openai
|
||||
model: gpt-4o
|
||||
- access_key: $OPENAI_API_KEY
|
||||
model: openai/gpt-4o
|
||||
default: true
|
||||
|
||||
- name: ministral-3b
|
||||
access_key: $MISTRAL_API_KEY
|
||||
provider: openai
|
||||
model: ministral-3b-latest
|
||||
- access_key: $MISTRAL_API_KEY
|
||||
model: mistral/ministral-3b-latest
|
||||
```
|
||||
|
||||
### Step 2. Start arch gateway
|
||||
|
|
|
|||
|
|
@ -18,7 +18,7 @@ You can also pass in a header to override model when sending prompt. Following e
|
|||
```bash
|
||||
|
||||
$ curl --header 'Content-Type: application/json' \
|
||||
--header 'x-arch-llm-provider-hint: ministral-3b' \
|
||||
--header 'x-arch-llm-provider-hint: mistral/ministral-3b' \
|
||||
--data '{"messages": [{"role": "user","content": "hello"}], "model": "none"}' \
|
||||
http://localhost:12000/v1/chat/completions 2> /dev/null | jq .
|
||||
{
|
||||
|
|
|
|||
|
|
@ -9,46 +9,34 @@ listeners:
|
|||
|
||||
llm_providers:
|
||||
|
||||
- name: gpt-4o-mini
|
||||
access_key: $OPENAI_API_KEY
|
||||
provider_interface: openai
|
||||
model: gpt-4o-mini
|
||||
- access_key: $OPENAI_API_KEY
|
||||
model: openai/gpt-4o-mini
|
||||
|
||||
- name: gpt-4o
|
||||
access_key: $OPENAI_API_KEY
|
||||
provider_interface: openai
|
||||
model: gpt-4o
|
||||
- access_key: $OPENAI_API_KEY
|
||||
model: openai/gpt-4o
|
||||
default: true
|
||||
|
||||
- name: ministral-3b
|
||||
access_key: $MISTRAL_API_KEY
|
||||
provider_interface: mistral
|
||||
model: ministral-3b-latest
|
||||
- access_key: $MISTRAL_API_KEY
|
||||
model: mistral/ministral-3b-latest
|
||||
|
||||
- name: claude-sonnet
|
||||
access_key: $ANTHROPIC_API_KEY
|
||||
provider_interface: claude
|
||||
model: claude-3-7-sonnet-latest
|
||||
- access_key: $ANTHROPIC_API_KEY
|
||||
model: claude/claude-3-7-sonnet-latest
|
||||
|
||||
- name: claude-sonnet-4
|
||||
access_key: $ANTHROPIC_API_KEY
|
||||
provider_interface: claude
|
||||
model: claude-sonnet-4-0
|
||||
- access_key: $ANTHROPIC_API_KEY
|
||||
model: claude/claude-sonnet-4-0
|
||||
|
||||
- name: deepseek
|
||||
access_key: $DEEPSEEK_API_KEY
|
||||
provider_interface: deepseek
|
||||
model: deepseek-reasoner
|
||||
- access_key: $DEEPSEEK_API_KEY
|
||||
model: deepseek/deepseek-reasoner
|
||||
|
||||
- name: groq
|
||||
access_key: $GROQ_API_KEY
|
||||
provider_interface: groq
|
||||
model: llama-3.1-8b-instant
|
||||
- access_key: $GROQ_API_KEY
|
||||
model: groq/llama-3.1-8b-instant
|
||||
|
||||
- name: gemini
|
||||
access_key: $GEMINI_API_KEY
|
||||
provider_interface: gemini
|
||||
model: gemini-1.5-pro-latest
|
||||
- access_key: $GEMINI_API_KEY
|
||||
model: gemini/gemini-1.5-pro-latest
|
||||
|
||||
- model: custom/test-model
|
||||
base_url: http://host.docker.internal:11223
|
||||
provider_interface: openai
|
||||
|
||||
tracing:
|
||||
random_sampling: 100
|
||||
|
|
|
|||
|
|
@ -9,10 +9,9 @@ listeners:
|
|||
|
||||
llm_providers:
|
||||
|
||||
- name: local-llama
|
||||
- model: my_llm_provider/llama3.2
|
||||
provider_interface: openai
|
||||
model: llama3.2
|
||||
endpoint: host.docker.internal:11434
|
||||
base_url: http://host.docker.internal:11434
|
||||
default: true
|
||||
|
||||
system_prompt: |
|
||||
|
|
|
|||
|
|
@ -22,10 +22,8 @@ endpoints:
|
|||
connect_timeout: 0.005s
|
||||
|
||||
llm_providers:
|
||||
- name: gpt-4o-mini
|
||||
access_key: $OPENAI_API_KEY
|
||||
provider_interface: openai
|
||||
model: gpt-4o-mini
|
||||
- access_key: $OPENAI_API_KEY
|
||||
model: openai/gpt-4o-mini
|
||||
default: true
|
||||
|
||||
system_prompt: |
|
||||
|
|
|
|||
|
|
@ -9,28 +9,21 @@ listeners:
|
|||
|
||||
llm_providers:
|
||||
|
||||
- name: gpt-4o-mini
|
||||
provider_interface: openai
|
||||
- model: openai/gpt-4o-mini
|
||||
access_key: $OPENAI_API_KEY
|
||||
model: gpt-4o-mini
|
||||
|
||||
- name: gpt-4.1
|
||||
provider_interface: openai
|
||||
access_key: $OPENAI_API_KEY
|
||||
model: gpt-4.1
|
||||
default: true
|
||||
|
||||
- name: code_generation
|
||||
- model: openai/gpt-4o
|
||||
access_key: $OPENAI_API_KEY
|
||||
provider_interface: openai
|
||||
model: gpt-4.1
|
||||
usage: generating new code snippets, functions, or boilerplate based on user prompts or requirements
|
||||
routing_preferences:
|
||||
- name: code understanding
|
||||
description: understand and explain existing code snippets, functions, or libraries
|
||||
|
||||
- name: code_understanding
|
||||
provider_interface: openai
|
||||
- model: openai/gpt-4.1
|
||||
access_key: $OPENAI_API_KEY
|
||||
model: gpt-4o-mini
|
||||
usage: understand and explain existing code snippets, functions, or libraries
|
||||
routing_preferences:
|
||||
- name: code generation
|
||||
description: generating new code snippets, functions, or boilerplate based on user prompts or requirements
|
||||
|
||||
tracing:
|
||||
random_sampling: 100
|
||||
|
|
|
|||
|
|
@ -14,32 +14,24 @@ listeners:
|
|||
llm_providers:
|
||||
|
||||
- name: arch-router
|
||||
provider_interface: arch
|
||||
model: hf.co/katanemo/Arch-Router-1.5B.gguf:Q4_K_M
|
||||
endpoint: host.docker.internal:11434
|
||||
model: arch/hf.co/katanemo/Arch-Router-1.5B.gguf:Q4_K_M
|
||||
base_url: http://host.docker.internal:11434
|
||||
|
||||
- name: gpt-4o-mini
|
||||
provider_interface: openai
|
||||
- model: openai/gpt-4o-mini
|
||||
access_key: $OPENAI_API_KEY
|
||||
model: gpt-4o-mini
|
||||
|
||||
- name: gpt-4.1
|
||||
provider_interface: openai
|
||||
access_key: $OPENAI_API_KEY
|
||||
model: gpt-4.1
|
||||
default: true
|
||||
|
||||
- name: code_generation
|
||||
- model: openai/gpt-4o
|
||||
access_key: $OPENAI_API_KEY
|
||||
provider_interface: openai
|
||||
model: gpt-4.1
|
||||
usage: generating new code snippets, functions, or boilerplate based on user prompts or requirements
|
||||
routing_preferences:
|
||||
- name: code understanding
|
||||
description: understand and explain existing code snippets, functions, or libraries
|
||||
|
||||
- name: code_understanding
|
||||
provider_interface: openai
|
||||
- model: openai/gpt-4.1
|
||||
access_key: $OPENAI_API_KEY
|
||||
model: gpt-4.1
|
||||
usage: understand and explain existing code snippets, functions, or libraries
|
||||
routing_preferences:
|
||||
- name: code generation
|
||||
description: generating new code snippets, functions, or boilerplate based on user prompts or requirements
|
||||
|
||||
tracing:
|
||||
random_sampling: 100
|
||||
|
|
|
|||
|
|
@ -2,13 +2,13 @@ POST http://localhost:12000/v1/chat/completions
|
|||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"model": "openai/gpt-4.1",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "hi"
|
||||
}
|
||||
],
|
||||
"model": "none"
|
||||
]
|
||||
}
|
||||
HTTP 200
|
||||
[Asserts]
|
||||
|
|
|
|||
|
|
@ -14,4 +14,4 @@ Content-Type: application/json
|
|||
HTTP 200
|
||||
[Asserts]
|
||||
header "content-type" matches /text\/event-stream/
|
||||
body matches /^data: .*?gpt-4.1.*?\n/
|
||||
body matches /^data: .*?gpt-4o-mini.*?\n/
|
||||
|
|
|
|||
|
|
@ -85,10 +85,8 @@ system_prompt: |
|
|||
Make sure your output is valid Markdown. And don't say "formatted in Markdown". Thanks!
|
||||
|
||||
llm_providers:
|
||||
- name: openai
|
||||
provider_interface: openai
|
||||
access_key: $OPENAI_API_KEY
|
||||
model: gpt-4o
|
||||
- access_key: $OPENAI_API_KEY
|
||||
model: openai/gpt-4o
|
||||
default: true
|
||||
|
||||
prompt_targets:
|
||||
|
|
|
|||
|
|
@ -9,10 +9,8 @@ listeners:
|
|||
|
||||
# Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way
|
||||
llm_providers:
|
||||
- name: OpenAI
|
||||
provider_interface: openai
|
||||
access_key: $OPENAI_API_KEY
|
||||
model: gpt-4o
|
||||
- access_key: $OPENAI_API_KEY
|
||||
model: openai/gpt-4o
|
||||
default: true
|
||||
|
||||
# default system prompt used by all prompt targets
|
||||
|
|
|
|||
|
|
@ -50,10 +50,8 @@ Create ``arch_config.yaml`` file with the following content:
|
|||
timeout: 30s
|
||||
|
||||
llm_providers:
|
||||
- name: gpt-4o
|
||||
access_key: $OPENAI_API_KEY
|
||||
provider: openai
|
||||
model: gpt-4o
|
||||
- access_key: $OPENAI_API_KEY
|
||||
model: openai/gpt-4o
|
||||
|
||||
system_prompt: |
|
||||
You are a helpful assistant.
|
||||
|
|
@ -153,16 +151,12 @@ Create ``arch_config.yaml`` file with the following content:
|
|||
timeout: 30s
|
||||
|
||||
llm_providers:
|
||||
- name: gpt-4o
|
||||
access_key: $OPENAI_API_KEY
|
||||
provider_interface: openai
|
||||
model: gpt-4o
|
||||
- access_key: $OPENAI_API_KEY
|
||||
model: openai/gpt-4o
|
||||
default: true
|
||||
|
||||
- name: ministral-3b
|
||||
access_key: $MISTRAL_API_KEY
|
||||
provider_interface: openai
|
||||
model: ministral-3b-latest
|
||||
- access_key: $MISTRAL_API_KEY
|
||||
model: mistralministral-3b-latest
|
||||
|
||||
Step 2. Start arch gateway
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
|
|
|||
|
|
@ -9,10 +9,8 @@ listeners:
|
|||
|
||||
# Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way
|
||||
llm_providers:
|
||||
- name: OpenAI
|
||||
provider_interface: openai
|
||||
access_key: $OPENAI_API_KEY
|
||||
model: gpt-4o
|
||||
- access_key: $OPENAI_API_KEY
|
||||
model: openai/gpt-4o
|
||||
default: true
|
||||
|
||||
# default system prompt used by all prompt targets
|
||||
|
|
|
|||
|
|
@ -74,9 +74,6 @@ Below is an example to show how to set up a prompt target for the Arch Router:
|
|||
:caption: Route Config Example
|
||||
|
||||
|
||||
routing:
|
||||
model: archgw-v1-router-model
|
||||
|
||||
listeners:
|
||||
egress_traffic:
|
||||
address: 0.0.0.0
|
||||
|
|
@ -85,29 +82,22 @@ Below is an example to show how to set up a prompt target for the Arch Router:
|
|||
timeout: 30s
|
||||
|
||||
llm_providers:
|
||||
- name: archgw-v1-router-model
|
||||
provider_interface: openai
|
||||
model: katanemo/Arch-Router-1.5B
|
||||
base_url: ...
|
||||
|
||||
- name: gpt-4o-mini
|
||||
provider_interface: openai
|
||||
access_key: $OPENAI_API_KEY
|
||||
model: gpt-4o-mini
|
||||
default: true
|
||||
- model: openai/gpt-4o-mini
|
||||
access_key: $OPENAI_API_KEY
|
||||
default: true
|
||||
|
||||
- name: code_generation
|
||||
provider_interface: openai
|
||||
access_key: $OPENAI_API_KEY
|
||||
model: gpt-4o
|
||||
usage: Generating new code snippets, functions, or boilerplate based on user prompts or requirements
|
||||
|
||||
- name: code_understanding
|
||||
provider_interface: openai
|
||||
access_key: $OPENAI_API_KEY
|
||||
model: gpt-4.1
|
||||
usage: understand and explain existing code snippets, functions, or libraries
|
||||
- model: openai/gpt-4o
|
||||
access_key: $OPENAI_API_KEY
|
||||
routing_preferences:
|
||||
- name: code understanding
|
||||
description: understand and explain existing code snippets, functions, or libraries
|
||||
|
||||
- model: openai/gpt-4.1
|
||||
access_key: $OPENAI_API_KEY
|
||||
routing_preferences:
|
||||
- name: code generation
|
||||
description: generating new code snippets, functions, or boilerplate based on user prompts or requirements
|
||||
|
||||
Example Use Cases
|
||||
-------------------------
|
||||
|
|
|
|||
|
|
@ -30,21 +30,16 @@ endpoints:
|
|||
|
||||
# Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way
|
||||
llm_providers:
|
||||
- name: OpenAI
|
||||
provider_interface: openai
|
||||
- name: openai/gpt-4o
|
||||
access_key: $OPENAI_API_KEY
|
||||
model: gpt-4o
|
||||
model: openai/gpt-4o
|
||||
default: true
|
||||
|
||||
- name: Mistral8x7b
|
||||
provider_interface: openai
|
||||
access_key: $MISTRAL_API_KEY
|
||||
model: mistral-8x7b
|
||||
- access_key: $MISTRAL_API_KEY
|
||||
model: mistral/mistral-8x7b
|
||||
|
||||
- name: MistralLocal7b
|
||||
provider_interface: openai
|
||||
model: mistral-7b-instruct
|
||||
endpoint: mistral_local
|
||||
- model: mistral/mistral-7b-instruct
|
||||
base_url: http://mistral_local
|
||||
|
||||
# provides a way to override default settings for the arch system
|
||||
overrides:
|
||||
|
|
|
|||
|
|
@ -0,0 +1,81 @@
|
|||
endpoints:
|
||||
app_server:
|
||||
connect_timeout: 0.005s
|
||||
endpoint: 127.0.0.1
|
||||
port: 80
|
||||
error_target:
|
||||
endpoint: error_target_1
|
||||
port: 80
|
||||
mistral_local:
|
||||
endpoint: 127.0.0.1
|
||||
port: 8001
|
||||
listeners:
|
||||
egress_traffic:
|
||||
address: 0.0.0.0
|
||||
message_format: openai
|
||||
port: 12000
|
||||
timeout: 5s
|
||||
ingress_traffic:
|
||||
address: 0.0.0.0
|
||||
message_format: openai
|
||||
port: 10000
|
||||
timeout: 5s
|
||||
llm_providers:
|
||||
- access_key: $OPENAI_API_KEY
|
||||
default: true
|
||||
model: gpt-4o
|
||||
name: openai/gpt-4o
|
||||
provider_interface: openai
|
||||
- access_key: $MISTRAL_API_KEY
|
||||
model: mistral-8x7b
|
||||
name: mistral/mistral-8x7b
|
||||
provider_interface: mistral
|
||||
- base_url: http://mistral_local
|
||||
endpoint: mistral_local
|
||||
model: mistral-7b-instruct
|
||||
name: mistral/mistral-7b-instruct
|
||||
port: 80
|
||||
protocol: http
|
||||
provider_interface: mistral
|
||||
overrides:
|
||||
prompt_target_intent_matching_threshold: 0.6
|
||||
prompt_guards:
|
||||
input_guards:
|
||||
jailbreak:
|
||||
on_exception:
|
||||
message: Looks like you're curious about my abilities, but I can only provide
|
||||
assistance within my programmed parameters.
|
||||
prompt_targets:
|
||||
- auto_llm_dispatch_on_response: true
|
||||
default: true
|
||||
description: handel all scenarios that are question and answer in nature. Like summarization,
|
||||
information extraction, etc.
|
||||
endpoint:
|
||||
http_method: POST
|
||||
name: app_server
|
||||
path: /agent/summary
|
||||
name: information_extraction
|
||||
system_prompt: You are a helpful information extraction assistant. Use the information
|
||||
that is provided to you.
|
||||
- description: Reboot a specific network device
|
||||
endpoint:
|
||||
name: app_server
|
||||
path: /agent/action
|
||||
name: reboot_network_device
|
||||
parameters:
|
||||
- description: Identifier of the network device to reboot.
|
||||
name: device_id
|
||||
required: true
|
||||
type: str
|
||||
- default: false
|
||||
description: Confirmation flag to proceed with reboot.
|
||||
enum:
|
||||
- true
|
||||
- false
|
||||
name: confirmation
|
||||
type: bool
|
||||
system_prompt: You are a network assistant that just offers facts; not advice on manufacturers
|
||||
or purchasing decisions.
|
||||
tracing:
|
||||
sampling_rate: 0.1
|
||||
version: v0.1
|
||||
1
model_server/.vscode/launch.json
vendored
1
model_server/.vscode/launch.json
vendored
|
|
@ -4,6 +4,7 @@
|
|||
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
|
||||
{
|
||||
"name": "model server",
|
||||
"type": "debugpy",
|
||||
|
|
|
|||
7
model_server/.vscode/settings.json
vendored
Normal file
7
model_server/.vscode/settings.json
vendored
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
{
|
||||
"python.testing.pytestArgs": [
|
||||
"."
|
||||
],
|
||||
"python.testing.unittestEnabled": false,
|
||||
"python.testing.pytestEnabled": true
|
||||
}
|
||||
15
tests/archgw/.vscode/launch.json
vendored
Normal file
15
tests/archgw/.vscode/launch.json
vendored
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
{
|
||||
// Use IntelliSense to learn about possible attributes.
|
||||
// Hover to view descriptions of existing attributes.
|
||||
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
{
|
||||
"name": "Python Debugger: Current File",
|
||||
"type": "debugpy",
|
||||
"request": "launch",
|
||||
"program": "${file}",
|
||||
"console": "integratedTerminal"
|
||||
}
|
||||
]
|
||||
}
|
||||
7
tests/archgw/.vscode/settings.json
vendored
Normal file
7
tests/archgw/.vscode/settings.json
vendored
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
{
|
||||
"python.testing.pytestArgs": [
|
||||
"."
|
||||
],
|
||||
"python.testing.unittestEnabled": false,
|
||||
"python.testing.pytestEnabled": true
|
||||
}
|
||||
|
|
@ -13,21 +13,15 @@ endpoints:
|
|||
connect_timeout: 0.005s
|
||||
|
||||
llm_providers:
|
||||
- name: gpt-4o-mini
|
||||
access_key: $OPENAI_API_KEY
|
||||
provider_interface: openai
|
||||
model: gpt-4o-mini
|
||||
- access_key: $OPENAI_API_KEY
|
||||
model: openai/gpt-4o-mini
|
||||
default: true
|
||||
|
||||
- name: gpt-3.5-turbo-0125
|
||||
access_key: $OPENAI_API_KEY
|
||||
provider_interface: openai
|
||||
model: gpt-3.5-turbo-0125
|
||||
- access_key: $OPENAI_API_KEY
|
||||
model: openai/gpt-3.5-turbo-0125
|
||||
|
||||
- name: gpt-4o
|
||||
access_key: $OPENAI_API_KEY
|
||||
provider_interface: openai
|
||||
model: gpt-4o
|
||||
- access_key: $OPENAI_API_KEY
|
||||
model: openai/gpt-4o
|
||||
|
||||
system_prompt: |
|
||||
You are a helpful assistant.
|
||||
|
|
|
|||
15
tests/e2e/.vscode/launch.json
vendored
Normal file
15
tests/e2e/.vscode/launch.json
vendored
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
{
|
||||
// Use IntelliSense to learn about possible attributes.
|
||||
// Hover to view descriptions of existing attributes.
|
||||
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
{
|
||||
"name": "Python Debugger: Current File",
|
||||
"type": "debugpy",
|
||||
"request": "launch",
|
||||
"program": "${file}",
|
||||
"console": "integratedTerminal"
|
||||
}
|
||||
]
|
||||
}
|
||||
7
tests/e2e/.vscode/settings.json
vendored
Normal file
7
tests/e2e/.vscode/settings.json
vendored
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
{
|
||||
"python.testing.pytestArgs": [
|
||||
"."
|
||||
],
|
||||
"python.testing.unittestEnabled": false,
|
||||
"python.testing.pytestEnabled": true
|
||||
}
|
||||
15
tests/modelserver/.vscode/launch.json
vendored
Normal file
15
tests/modelserver/.vscode/launch.json
vendored
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
{
|
||||
// Use IntelliSense to learn about possible attributes.
|
||||
// Hover to view descriptions of existing attributes.
|
||||
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
{
|
||||
"name": "Python Debugger: Current File",
|
||||
"type": "debugpy",
|
||||
"request": "launch",
|
||||
"program": "${file}",
|
||||
"console": "integratedTerminal"
|
||||
}
|
||||
]
|
||||
}
|
||||
7
tests/modelserver/.vscode/settings.json
vendored
Normal file
7
tests/modelserver/.vscode/settings.json
vendored
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
{
|
||||
"python.testing.pytestArgs": [
|
||||
"."
|
||||
],
|
||||
"python.testing.unittestEnabled": false,
|
||||
"python.testing.pytestEnabled": true
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue