diff --git a/.github/workflows/arch_tools_tests.yml b/.github/workflows/arch_tools_tests.yml
new file mode 100644
index 00000000..da845b20
--- /dev/null
+++ b/.github/workflows/arch_tools_tests.yml
@@ -0,0 +1,40 @@
+name: arch tools tests
+
+permissions:
+  contents: read
+
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+
+jobs:
+  arch_tools_tests:
+    runs-on: ubuntu-latest-m
+    defaults:
+      run:
+        working-directory: ./arch/tools
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v3
+
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: "3.12"
+
+      - name: install poetry
+        run: |
+          export POETRY_VERSION=1.8.5
+          curl -sSL https://install.python-poetry.org | python3 -
+          export PATH="$HOME/.local/bin:$PATH"
+
+      - name: install arch tools
+        run: |
+          poetry install
+
+      - name: run tests
+        run: |
+          poetry run pytest
diff --git a/.gitignore b/.gitignore
index dc20274c..f155cd1a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -102,13 +102,10 @@ venv.bak/
 # mypy
 .mypy_cache/
 
-# VSCode stuff:
-.vscode/
-
 # MacOS Metadata
 *.DS_Store
 
-
+*.yaml_rendered
 
 # =========================================
 
diff --git a/README.md b/README.md
index d87b2737..836f5315 100644
--- a/README.md
+++ b/README.md
@@ -104,10 +104,8 @@ listeners:
     timeout: 30s
 
 llm_providers:
-  - name: gpt-4o
-    access_key: $OPENAI_API_KEY
-    provider: openai
-    model: gpt-4o
+  - access_key: $OPENAI_API_KEY
+    model: openai/gpt-4o
 
 system_prompt: |
   You are a helpful assistant.
@@ -204,16 +202,12 @@ listeners:
     timeout: 30s
 
 llm_providers:
-  - name: gpt-4o
-    access_key: $OPENAI_API_KEY
-    provider: openai
-    model: gpt-4o
+  - access_key: $OPENAI_API_KEY
+    model: openai/gpt-4o
     default: true
 
-  - name: mistral-3b
-    access_key: $MISTRAL_API_KEY
-    provider: openai
-    model: mistral-3b-latest
+  - access_key: $MISTRAL_API_KEY
+    model: mistral/mistral-3b-latest
 ```
 
 #### Preference-based Routing
@@ -230,17 +224,18 @@ listeners:
     timeout: 30s
 
 llm_providers:
-  - name: code_generation
+  - model: openai/gpt-4.1
     access_key: $OPENAI_API_KEY
-    provider_interface: openai
-    model: gpt-4.1
-    usage: generating new code snippets, functions, or boilerplate based on user prompts or requirements
+    default: true
+    routing_preferences:
+      - name: code generation
+        description: generating new code snippets, functions, or boilerplate based on user prompts or requirements
 
-  - name: code_understanding
-    provider_interface: openai
+  - model: openai/gpt-4o-mini
     access_key: $OPENAI_API_KEY
-    model: gpt-4o-mini
-    usage: understand and explain existing code snippets, functions, or libraries
+    routing_preferences:
+      - name: code understanding
+        description: understand and explain existing code snippets, functions, or libraries
 ```
 
 Arch uses a lightweight 1.5B autoregressive model to map prompts (and conversation context) to these policies. This approach adapts to intent drift, supports multi-turn conversations, and avoids the brittleness of embedding-based classifiers or manual if/else chains. No retraining is required when adding new models or updating policies — routing is governed entirely by human-readable rules. You can learn more about the design, benchmarks, and methodology behind preference-based routing in our paper:
diff --git a/arch/arch_config_schema.yaml b/arch/arch_config_schema.yaml
index 4212674c..9e9abac8 100644
--- a/arch/arch_config_schema.yaml
+++ b/arch/arch_config_schema.yaml
@@ -66,11 +66,16 @@ properties:
       properties:
         name:
           type: string
-        # provider field is deprecated, use provider_interface instead
-        provider:
+        access_key:
+          type: string
+        model:
+          type: string
+        default:
+          type: boolean
+        base_url:
+          type: string
+        http_host:
           type: string
-          enum:
-            - openai
         provider_interface:
           type: string
           enum:
@@ -81,29 +86,22 @@ properties:
             - mistral
             - openai
             - gemini
-        access_key:
-          type: string
-        model:
-          type: string
-        default:
-          type: boolean
-        # endpoint field is deprecated, use base_url instead
-        endpoint:
-          type: string
-        base_url:
-          type: string
-        protocol:
-          type: string
-          enum:
-            - http
-            - https
-        http_host:
-          type: string
-        usage:
-          type: string
+        routing_preferences:
+          type: array
+          items:
+            type: object
+            properties:
+              name:
+                type: string
+              description:
+                type: string
+          additionalProperties: false
+          required:
+            - name
+            - description
       additionalProperties: false
       required:
-        - name
+        - model
   overrides:
     type: object
     properties:
diff --git a/arch/tools/cli/config_generator.py b/arch/tools/cli/config_generator.py
index 4f4249fb..8346acc0 100644
--- a/arch/tools/cli/config_generator.py
+++ b/arch/tools/cli/config_generator.py
@@ -5,16 +5,16 @@ import yaml
 from jsonschema import validate
 from urllib.parse import urlparse
 
-ENVOY_CONFIG_TEMPLATE_FILE = os.getenv(
-    "ENVOY_CONFIG_TEMPLATE_FILE", "envoy.template.yaml"
-)
-ARCH_CONFIG_FILE = os.getenv("ARCH_CONFIG_FILE", "/app/arch_config.yaml")
-ENVOY_CONFIG_FILE_RENDERED = os.getenv(
-    "ENVOY_CONFIG_FILE_RENDERED", "/etc/envoy/envoy.yaml"
-)
-ARCH_CONFIG_SCHEMA_FILE = os.getenv(
-    "ARCH_CONFIG_SCHEMA_FILE", "arch_config_schema.yaml"
-)
+
+SUPPORTED_PROVIDERS = [
+    "arch",
+    "claude",
+    "deepseek",
+    "groq",
+    "mistral",
+    "openai",
+    "gemini",
+]
 
 
 def get_endpoint_and_port(endpoint, protocol):
@@ -32,8 +32,22 @@ def get_endpoint_and_port(endpoint, protocol):
 
 
 def validate_and_render_schema():
-    env = Environment(loader=FileSystemLoader("./"))
-    template = env.get_template("envoy.template.yaml")
+    ENVOY_CONFIG_TEMPLATE_FILE = os.getenv(
+        "ENVOY_CONFIG_TEMPLATE_FILE", "envoy.template.yaml"
+    )
+    ARCH_CONFIG_FILE = os.getenv("ARCH_CONFIG_FILE", "/app/arch_config.yaml")
+    ARCH_CONFIG_FILE_RENDERED = os.getenv(
+        "ARCH_CONFIG_FILE_RENDERED", "/app/arch_config_rendered.yaml"
+    )
+    ENVOY_CONFIG_FILE_RENDERED = os.getenv(
+        "ENVOY_CONFIG_FILE_RENDERED", "/etc/envoy/envoy.yaml"
+    )
+    ARCH_CONFIG_SCHEMA_FILE = os.getenv(
+        "ARCH_CONFIG_SCHEMA_FILE", "arch_config_schema.yaml"
+    )
+
+    env = Environment(loader=FileSystemLoader(os.getenv("TEMPLATE_ROOT", "./")))
+    template = env.get_template(ENVOY_CONFIG_TEMPLATE_FILE)
 
     try:
         validate_prompt_config(ARCH_CONFIG_FILE, ARCH_CONFIG_SCHEMA_FILE)
@@ -82,6 +96,8 @@ def validate_and_render_schema():
     updated_llm_providers = []
     llm_provider_name_set = set()
     llms_with_usage = []
+    model_name_keys = set()
+    model_usage_name_keys = set()
     for llm_provider in config_yaml["llm_providers"]:
         if llm_provider.get("usage", None):
             llms_with_usage.append(llm_provider["name"])
@@ -89,10 +105,52 @@ def validate_and_render_schema():
             raise Exception(
                 f"Duplicate llm_provider name {llm_provider.get('name')}, please provide unique name for each llm_provider"
             )
-        if llm_provider.get("name") is None:
+
+        model_name = llm_provider.get("model")
+        if model_name in model_name_keys:
             raise Exception(
-                f"llm_provider name is required, please provide name for llm_provider"
+                f"Duplicate model name {model_name}, please provide unique model name for each llm_provider"
             )
+        model_name_keys.add(model_name)
+        if llm_provider.get("name") is None:
+            llm_provider["name"] = model_name
+
+        model_name_tokens = model_name.split("/")
+        if len(model_name_tokens) < 2:
+            raise Exception(
+                f"Invalid model name {model_name}. Please provide model name in the format <provider>/<model_id>."
+            )
+        provider = model_name_tokens[0]
+        model_id = "/".join(model_name_tokens[1:])
+        if provider not in SUPPORTED_PROVIDERS:
+            if (
+                llm_provider.get("base_url", None) is None
+                or llm_provider.get("provider_interface", None) is None
+            ):
+                raise Exception(
+                    f"Must provide base_url and provider_interface for unsupported provider {provider} for model {model_name}. Supported providers are: {', '.join(SUPPORTED_PROVIDERS)}"
+                )
+            provider = llm_provider.get("provider_interface", None)
+        elif llm_provider.get("provider_interface", None) is not None:
+            raise Exception(
+                f"Please provide provider interface as part of model name {model_name} using the format <provider>/<model_id>. For example, use 'openai/gpt-3.5-turbo' instead of 'gpt-3.5-turbo' "
+            )
+
+        if model_id in model_name_keys:
+            raise Exception(
+                f"Duplicate model_id {model_id}, please provide unique model_id for each llm_provider"
+            )
+        model_name_keys.add(model_id)
+
+        for routing_preference in llm_provider.get("routing_preferences", []):
+            if routing_preference.get("name") in model_usage_name_keys:
+                raise Exception(
+                    f"Duplicate routing preference name \"{routing_preference.get('name')}\", please provide unique name for each routing preference"
+                )
+            model_usage_name_keys.add(routing_preference.get("name"))
+
+        llm_provider["model"] = model_id
+        llm_provider["provider_interface"] = provider
         llm_provider_name_set.add(llm_provider.get("name"))
         provider = None
         if llm_provider.get("provider") and llm_provider.get("provider_interface"):
@@ -105,21 +163,14 @@ def validate_and_render_schema():
             del llm_provider["provider"]
         updated_llm_providers.append(llm_provider)
 
-        if llm_provider.get("endpoint") and llm_provider.get("base_url"):
-            raise Exception("Please provide either endpoint or base_url, not both")
-
-        if llm_provider.get("endpoint", None):
-            endpoint = llm_provider["endpoint"]
-            protocol = llm_provider.get("protocol", "http")
-            llm_provider["endpoint"], llm_provider["port"] = get_endpoint_and_port(
-                endpoint, protocol
-            )
-            llms_with_endpoint.append(llm_provider)
-        elif llm_provider.get("base_url", None):
+        if llm_provider.get("base_url", None):
             base_url = llm_provider["base_url"]
             urlparse_result = urlparse(base_url)
-            if llm_provider.get("port"):
-                raise Exception("Please provider port in base_url")
+            url_path = urlparse_result.path
+            if url_path and url_path != "/":
+                raise Exception(
+                    f"Please provide base_url without path, got {base_url}. Use base_url like 'http://example.com' instead of 'http://example.com/path'."
+                )
             if urlparse_result.scheme == "" or urlparse_result.scheme not in [
                 "http",
                 "https",
@@ -140,7 +191,7 @@ def validate_and_render_schema():
             llm_provider["protocol"] = protocol
             llms_with_endpoint.append(llm_provider)
 
-    if len(llms_with_usage) > 0:
+    if len(model_usage_name_keys) > 0:
         routing_llm_provider = config_yaml.get("routing", {}).get("llm_provider", None)
         if routing_llm_provider and routing_llm_provider not in llm_provider_name_set:
             raise Exception(
@@ -198,6 +249,7 @@ def validate_and_render_schema():
             agent_orchestrator = list(endpoints.keys())[0]
 
     print("agent_orchestrator: ", agent_orchestrator)
+
     data = {
         "prompt_gateway_listener": prompt_gateway_listener,
         "llm_gateway_listener": llm_gateway_listener,
@@ -216,6 +268,9 @@ def validate_and_render_schema():
     with open(ENVOY_CONFIG_FILE_RENDERED, "w") as file:
         file.write(rendered)
 
+    with open(ARCH_CONFIG_FILE_RENDERED, "w") as file:
+        file.write(arch_config_string)
+
 
 def validate_prompt_config(arch_config_file, arch_config_schema_file):
     with open(arch_config_file, "r") as file:
@@ -231,7 +286,7 @@ def validate_prompt_config(arch_config_file, arch_config_schema_file):
         validate(config_yaml, config_schema_yaml)
     except Exception as e:
         print(
-            f"Error validating arch_config file: {arch_config_file}, schema file: {arch_config_schema_file}, error: {e.message}"
+            f"Error validating arch_config file: {arch_config_file}, schema file: {arch_config_schema_file}, error: {e}"
         )
         raise e
 
diff --git a/arch/tools/poetry.lock b/arch/tools/poetry.lock
index e530591c..1f55d718 100644
--- a/arch/tools/poetry.lock
+++ b/arch/tools/poetry.lock
@@ -57,6 +57,34 @@ files = [
     {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
 ]
 
+[[package]]
+name = "exceptiongroup"
+version = "1.3.0"
+description = "Backport of PEP 654 (exception groups)"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "exceptiongroup-1.3.0-py3-none-any.whl", hash = "sha256:4d111e6e0c13d0644cad6ddaa7ed0261a0b36971f6d23e7ec9b4b9097da78a10"},
+    {file = "exceptiongroup-1.3.0.tar.gz", hash = "sha256:b241f5885f560bc56a59ee63ca4c6a8bfa46ae4ad651af316d4e81817bb9fd88"},
+]
+
+[package.dependencies]
+typing-extensions = {version = ">=4.6.0", markers = "python_version < \"3.13\""}
+
+[package.extras]
+test = ["pytest (>=6)"]
+
+[[package]]
+name = "iniconfig"
+version = "2.1.0"
+description = "brain-dead simple config-ini parsing"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "iniconfig-2.1.0-py3-none-any.whl", hash = "sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760"},
+    {file = "iniconfig-2.1.0.tar.gz", hash = "sha256:3abbd2e30b36733fee78f9c7f7308f2d0050e88f0087fd25c2645f63c773e1c7"},
+]
+
 [[package]]
 name = "jinja2"
 version = "3.1.6"
@@ -179,6 +207,69 @@ files = [
     {file = "markupsafe-3.0.2.tar.gz", hash = "sha256:ee55d3edf80167e48ea11a923c7386f4669df67d7994554387f84e7d8b0a2bf0"},
 ]
 
+[[package]]
+name = "packaging"
+version = "25.0"
+description = "Core utilities for Python packages"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484"},
+    {file = "packaging-25.0.tar.gz", hash = "sha256:d443872c98d677bf60f6a1f2f8c1cb748e8fe762d2bf9d3148b5599295b0fc4f"},
+]
+
+[[package]]
+name = "pluggy"
+version = "1.6.0"
+description = "plugin and hook calling mechanisms for python"
+optional = false
+python-versions = ">=3.9"
+files = [
+    {file = "pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746"},
+    {file = "pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3"},
+]
+
+[package.extras]
+dev = ["pre-commit", "tox"]
+testing = ["coverage", "pytest", "pytest-benchmark"]
+
+[[package]]
+name = "pygments"
+version = "2.19.2"
+description = "Pygments is a syntax highlighting package written in Python."
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b"},
+    {file = "pygments-2.19.2.tar.gz", hash = "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887"},
+]
+
+[package.extras]
+windows-terminal = ["colorama (>=0.4.6)"]
+
+[[package]]
+name = "pytest"
+version = "8.4.1"
+description = "pytest: simple powerful testing with Python"
+optional = false
+python-versions = ">=3.9"
+files = [
+    {file = "pytest-8.4.1-py3-none-any.whl", hash = "sha256:539c70ba6fcead8e78eebbf1115e8b589e7565830d7d006a8723f19ac8a0afb7"},
+    {file = "pytest-8.4.1.tar.gz", hash = "sha256:7c67fd69174877359ed9371ec3af8a3d2b04741818c51e5e99cc1742251fa93c"},
+]
+
+[package.dependencies]
+colorama = {version = ">=0.4", markers = "sys_platform == \"win32\""}
+exceptiongroup = {version = ">=1", markers = "python_version < \"3.11\""}
+iniconfig = ">=1"
+packaging = ">=20"
+pluggy = ">=1.5,<2"
+pygments = ">=2.7.2"
+tomli = {version = ">=1", markers = "python_version < \"3.11\""}
+
+[package.extras]
+dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "requests", "setuptools", "xmlschema"]
+
 [[package]]
 name = "pyyaml"
 version = "6.0.2"
@@ -430,6 +521,47 @@ enabler = ["pytest-enabler (>=2.2)"]
 test = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "jaraco.test (>=5.5)", "packaging (>=24.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.*)", "pytest-home (>=0.5)", "pytest-perf", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel (>=0.44.0)"]
 type = ["importlib-metadata (>=7.0.2)", "jaraco.develop (>=7.21)", "mypy (>=1.12,<1.14)", "pytest-mypy"]
 
+[[package]]
+name = "tomli"
+version = "2.2.1"
+description = "A lil' TOML parser"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "tomli-2.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678e4fa69e4575eb77d103de3df8a895e1591b48e740211bd1067378c69e8249"},
+    {file = "tomli-2.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6"},
+    {file = "tomli-2.2.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ece47d672db52ac607a3d9599a9d48dcb2f2f735c6c2d1f34130085bb12b112a"},
+    {file = "tomli-2.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6972ca9c9cc9f0acaa56a8ca1ff51e7af152a9f87fb64623e31d5c83700080ee"},
+    {file = "tomli-2.2.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c954d2250168d28797dd4e3ac5cf812a406cd5a92674ee4c8f123c889786aa8e"},
+    {file = "tomli-2.2.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8dd28b3e155b80f4d54beb40a441d366adcfe740969820caf156c019fb5c7ec4"},
+    {file = "tomli-2.2.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:e59e304978767a54663af13c07b3d1af22ddee3bb2fb0618ca1593e4f593a106"},
+    {file = "tomli-2.2.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:33580bccab0338d00994d7f16f4c4ec25b776af3ffaac1ed74e0b3fc95e885a8"},
+    {file = "tomli-2.2.1-cp311-cp311-win32.whl", hash = "sha256:465af0e0875402f1d226519c9904f37254b3045fc5084697cefb9bdde1ff99ff"},
+    {file = "tomli-2.2.1-cp311-cp311-win_amd64.whl", hash = "sha256:2d0f2fdd22b02c6d81637a3c95f8cd77f995846af7414c5c4b8d0545afa1bc4b"},
+    {file = "tomli-2.2.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:4a8f6e44de52d5e6c657c9fe83b562f5f4256d8ebbfe4ff922c495620a7f6cea"},
+    {file = "tomli-2.2.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8d57ca8095a641b8237d5b079147646153d22552f1c637fd3ba7f4b0b29167a8"},
+    {file = "tomli-2.2.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4e340144ad7ae1533cb897d406382b4b6fede8890a03738ff1683af800d54192"},
+    {file = "tomli-2.2.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:db2b95f9de79181805df90bedc5a5ab4c165e6ec3fe99f970d0e302f384ad222"},
+    {file = "tomli-2.2.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:40741994320b232529c802f8bc86da4e1aa9f413db394617b9a256ae0f9a7f77"},
+    {file = "tomli-2.2.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:400e720fe168c0f8521520190686ef8ef033fb19fc493da09779e592861b78c6"},
+    {file = "tomli-2.2.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:02abe224de6ae62c19f090f68da4e27b10af2b93213d36cf44e6e1c5abd19fdd"},
+    {file = "tomli-2.2.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b82ebccc8c8a36f2094e969560a1b836758481f3dc360ce9a3277c65f374285e"},
+    {file = "tomli-2.2.1-cp312-cp312-win32.whl", hash = "sha256:889f80ef92701b9dbb224e49ec87c645ce5df3fa2cc548664eb8a25e03127a98"},
+    {file = "tomli-2.2.1-cp312-cp312-win_amd64.whl", hash = "sha256:7fc04e92e1d624a4a63c76474610238576942d6b8950a2d7f908a340494e67e4"},
+    {file = "tomli-2.2.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f4039b9cbc3048b2416cc57ab3bda989a6fcf9b36cf8937f01a6e731b64f80d7"},
+    {file = "tomli-2.2.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:286f0ca2ffeeb5b9bd4fcc8d6c330534323ec51b2f52da063b11c502da16f30c"},
+    {file = "tomli-2.2.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a92ef1a44547e894e2a17d24e7557a5e85a9e1d0048b0b5e7541f76c5032cb13"},
+    {file = "tomli-2.2.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9316dc65bed1684c9a98ee68759ceaed29d229e985297003e494aa825ebb0281"},
+    {file = "tomli-2.2.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e85e99945e688e32d5a35c1ff38ed0b3f41f43fad8df0bdf79f72b2ba7bc5272"},
+    {file = "tomli-2.2.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ac065718db92ca818f8d6141b5f66369833d4a80a9d74435a268c52bdfa73140"},
+    {file = "tomli-2.2.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:d920f33822747519673ee656a4b6ac33e382eca9d331c87770faa3eef562aeb2"},
+    {file = "tomli-2.2.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a198f10c4d1b1375d7687bc25294306e551bf1abfa4eace6650070a5c1ae2744"},
+    {file = "tomli-2.2.1-cp313-cp313-win32.whl", hash = "sha256:d3f5614314d758649ab2ab3a62d4f2004c825922f9e370b29416484086b264ec"},
+    {file = "tomli-2.2.1-cp313-cp313-win_amd64.whl", hash = "sha256:a38aa0308e754b0e3c67e344754dff64999ff9b513e691d0e786265c93583c69"},
+    {file = "tomli-2.2.1-py3-none-any.whl", hash = "sha256:cb55c73c5f4408779d0cf3eef9f762b9c9f147a77de7b258bef0a5628adc85cc"},
+    {file = "tomli-2.2.1.tar.gz", hash = "sha256:cd45e1dc79c835ce60f7404ec8119f2eb06d38b1deba146f07ced3bbc44505ff"},
+]
+
 [[package]]
 name = "typing-extensions"
 version = "4.14.1"
@@ -444,4 +576,4 @@ files = [
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.10"
-content-hash = "7d17c2f24f8ad4503a5248c3f7e1a74f458a5ea55a2fb63db382cb6abe6d52df"
+content-hash = "83d32fa807f6c7058ecbfc43b777c4d4c637695025cf774ff10532bff8f6712b"
diff --git a/arch/tools/pyproject.toml b/arch/tools/pyproject.toml
index af89fc31..dda97235 100644
--- a/arch/tools/pyproject.toml
+++ b/arch/tools/pyproject.toml
@@ -20,6 +20,13 @@ pyyaml = "^6.0.2"
 [tool.poetry.scripts]
 archgw = "cli.main:main"
 
+[tool.poetry.group.dev.dependencies]
+pytest = "^8.4.1"
+
 [build-system]
 requires = ["poetry-core>=1.0.0"]
 build-backend = "poetry.core.masonry.api"
+
+
+[tool.pytest.ini_options]
+addopts = ["-v"]
diff --git a/arch/tools/test/fastapi_test.py b/arch/tools/test/fastapi_test.py
deleted file mode 100644
index bedac8bd..00000000
--- a/arch/tools/test/fastapi_test.py
+++ /dev/null
@@ -1,45 +0,0 @@
-from fastapi import FastAPI
-from pydantic import BaseModel
-from typing import List, Dict, Set
-
-app = FastAPI()
-
-
-class User(BaseModel):
-    name: str = Field(
-        "John Doe", description="The name of the user."
-    )  # Default value and description for name
-    location: int = None
-    age: int = Field(
-        30, description="The age of the user."
-    )  # Default value and description for age
-    tags: Set[str] = Field(
-        default_factory=set, description="A set of tags associated with the user."
-    )  # Default empty set and description for tags
-    metadata: Dict[str, int] = Field(
-        default_factory=dict,
-        description="A dictionary storing metadata about the user, with string keys and integer values.",
-    )  # Default empty dict and description for metadata
-
-
-@app.get("/agent/default")
-async def default(request: User):
-    """
-    This endpoint handles information extraction queries.
-    It can summarize, extract details, and perform various other information-related tasks.
-    """
-    return {"info": f"Query: {request.name}, Count: {request.age}"}
-
-
-@app.post("/agent/action")
-async def reboot_network_device(device_id: str, confirmation: str):
-    """
-    This endpoint reboots a network device based on the device ID.
-    Confirmation is required to proceed with the reboot.
-
-    Args:
-        device_id: The device_id that you want to reboot.
-        confirmation: The confirmation that the user wants to reboot.
-        metadata: Ignore this parameter
-    """
-    return {"status": "Device rebooted", "device_id": device_id}
diff --git a/arch/tools/test/fastapi_test_prompt_targets.yml b/arch/tools/test/fastapi_test_prompt_targets.yml
deleted file mode 100644
index 7fb9d118..00000000
--- a/arch/tools/test/fastapi_test_prompt_targets.yml
+++ /dev/null
@@ -1,33 +0,0 @@
-prompt_targets:
-- name: default
-  path: /agent/default
-  description: "This endpoint handles information extraction queries.\n    It can\
-    \ summarize, extract details, and perform various other information-related tasks."
-  parameters:
-  - name: query
-    type: str
-    description: Field from Pydantic model DefaultRequest
-    default_value: null
-    required: false
-  - name: count
-    type: int
-    description: Field from Pydantic model DefaultRequest
-    default_value: null
-    required: false
-  type: default
-  auto-llm-dispatch-on-response: true
-- name: reboot_network_device
-  path: /agent/action
-  description: "This endpoint reboots a network device based on the device ID.\n \
-    \   Confirmation is required to proceed with the reboot."
-  parameters:
-  - name: device_id
-    type: str
-    description: Description for device_id
-    default_value: ''
-    required: true
-  - name: confirmation
-    type: int
-    description: Description for confirmation
-    default_value: ''
-    required: true
diff --git a/arch/tools/test/test_cli.py b/arch/tools/test/test_cli.py
deleted file mode 100644
index 0f7cb56c..00000000
--- a/arch/tools/test/test_cli.py
+++ /dev/null
@@ -1,42 +0,0 @@
-import pytest
-from click.testing import CliRunner
-from tools.cli.main import main  # Import your CLI's entry point
-import importlib.metadata
-
-
-def get_version():
-    """Helper function to fetch the version."""
-    try:
-        version = importlib.metadata.version("archgw")
-        return version
-    except importlib.metadata.PackageNotFoundError:
-        return None
-
-
-@pytest.fixture
-def runner():
-    """Fixture to create a Click test runner."""
-    return CliRunner()
-
-
-def test_version_option(runner):
-    """Test the --version option."""
-    result = runner.invoke(main, ["--version"])
-    assert result.exit_code == 0
-    expected_version = get_version()
-    assert f"archgw cli version: {expected_version}" in result.output
-
-
-def test_default_behavior(runner):
-    """Test the default behavior when no command is provided."""
-    result = runner.invoke(main)
-    assert result.exit_code == 0
-    assert "Arch (The Intelligent Prompt Gateway) CLI" in result.output
-    assert "Usage:" in result.output  # Ensure help text is shown
-
-
-def test_invalid_command(runner):
-    """Test that an invalid command returns an appropriate error message."""
-    result = runner.invoke(main, ["invalid_command"])
-    assert result.exit_code != 0  # Non-zero exit code for invalid command
-    assert "Error: No such command 'invalid_command'" in result.output
diff --git a/arch/tools/test/test_config_generator.py b/arch/tools/test/test_config_generator.py
new file mode 100644
index 00000000..0e4f8446
--- /dev/null
+++ b/arch/tools/test/test_config_generator.py
@@ -0,0 +1,272 @@
+import pytest
+from unittest import mock
+import sys
+from cli.config_generator import validate_and_render_schema
+
+# Patch sys.path to allow import from cli/
+import os
+
+sys.path.insert(
+    0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "cli"))
+)
+
+
+@pytest.fixture(autouse=True)
+def cleanup_env(monkeypatch):
+    # Clean up environment variables and mocks after each test
+    yield
+    monkeypatch.undo()
+
+
+def test_validate_and_render_happy_path(monkeypatch):
+    monkeypatch.setenv("ARCH_CONFIG_FILE", "fake_arch_config.yaml")
+    monkeypatch.setenv("ARCH_CONFIG_SCHEMA_FILE", "fake_arch_config_schema.yaml")
+    monkeypatch.setenv("ENVOY_CONFIG_TEMPLATE_FILE", "./envoy.template.yaml")
+    monkeypatch.setenv("ARCH_CONFIG_FILE_RENDERED", "fake_arch_config_rendered.yaml")
+    monkeypatch.setenv("ENVOY_CONFIG_FILE_RENDERED", "fake_envoy.yaml")
+    monkeypatch.setenv("TEMPLATE_ROOT", "../")
+
+    arch_config = """
+version: v0.1.0
+
+listeners:
+  egress_traffic:
+    address: 0.0.0.0
+    port: 12000
+    message_format: openai
+    timeout: 30s
+
+llm_providers:
+
+  - model: openai/gpt-4o-mini
+    access_key: $OPENAI_API_KEY
+    default: true
+
+  - model: openai/gpt-4o
+    access_key: $OPENAI_API_KEY
+    routing_preferences:
+      - name: code understanding
+        description: understand and explain existing code snippets, functions, or libraries
+
+  - model: openai/gpt-4.1
+    access_key: $OPENAI_API_KEY
+    routing_preferences:
+      - name: code generation
+        description: generating new code snippets, functions, or boilerplate based on user prompts or requirements
+
+tracing:
+  random_sampling: 100
+"""
+    arch_config_schema = ""
+    with open("../arch_config_schema.yaml", "r") as file:
+        arch_config_schema = file.read()
+
+    m_open = mock.mock_open()
+    # Provide enough file handles for all open() calls in validate_and_render_schema
+    m_open.side_effect = [
+        mock.mock_open(read_data="").return_value,
+        mock.mock_open(read_data=arch_config).return_value,  # ARCH_CONFIG_FILE
+        mock.mock_open(
+            read_data=arch_config_schema
+        ).return_value,  # ARCH_CONFIG_SCHEMA_FILE
+        mock.mock_open(read_data=arch_config).return_value,  # ARCH_CONFIG_FILE
+        mock.mock_open(
+            read_data=arch_config_schema
+        ).return_value,  # ARCH_CONFIG_SCHEMA_FILE
+        mock.mock_open().return_value,  # ENVOY_CONFIG_FILE_RENDERED (write)
+        mock.mock_open().return_value,  # ARCH_CONFIG_FILE_RENDERED (write)
+    ]
+    with mock.patch("builtins.open", m_open):
+        with mock.patch("config_generator.Environment"):
+            validate_and_render_schema()
+
+
+arch_config_test_cases = [
+    {
+        "id": "duplicate_provider_name",
+        "expected_error": "Duplicate llm_provider name",
+        "arch_config": """
+version: v0.1.0
+
+listeners:
+  egress_traffic:
+    address: 0.0.0.0
+    port: 12000
+    message_format: openai
+    timeout: 30s
+
+llm_providers:
+
+  - name: test1
+    model: openai/gpt-4o
+    access_key: $OPENAI_API_KEY
+
+  - name: test1
+    model: openai/gpt-4o
+    access_key: $OPENAI_API_KEY
+
+""",
+    },
+    {
+        "id": "provider_interface_with_model_id",
+        "expected_error": "Please provide provider interface as part of model name",
+        "arch_config": """
+version: v0.1.0
+
+listeners:
+  egress_traffic:
+    address: 0.0.0.0
+    port: 12000
+    message_format: openai
+    timeout: 30s
+
+llm_providers:
+
+  - model: openai/gpt-4o
+    access_key: $OPENAI_API_KEY
+    provider_interface: openai
+
+""",
+    },
+    {
+        "id": "duplicate_model_id",
+        "expected_error": "Duplicate model_id",
+        "arch_config": """
+version: v0.1.0
+
+listeners:
+  egress_traffic:
+    address: 0.0.0.0
+    port: 12000
+    message_format: openai
+    timeout: 30s
+
+llm_providers:
+
+  - model: openai/gpt-4o
+    access_key: $OPENAI_API_KEY
+
+  - model: mistral/gpt-4o
+
+""",
+    },
+    {
+        "id": "custom_provider_base_url",
+        "expected_error": "Must provide base_url and provider_interface",
+        "arch_config": """
+version: v0.1.0
+
+listeners:
+  egress_traffic:
+    address: 0.0.0.0
+    port: 12000
+    message_format: openai
+    timeout: 30s
+
+llm_providers:
+
+  - model: custom/gpt-4o
+
+""",
+    },
+    {
+        "id": "base_url_no_prefix",
+        "expected_error": "Please provide base_url without path",
+        "arch_config": """
+version: v0.1.0
+
+listeners:
+  egress_traffic:
+    address: 0.0.0.0
+    port: 12000
+    message_format: openai
+    timeout: 30s
+
+llm_providers:
+
+  - model: custom/gpt-4o
+    base_url: "http://custom.com/test"
+    provider_interface: openai
+
+""",
+    },
+    {
+        "id": "duplicate_routeing_preference_name",
+        "expected_error": "Duplicate routing preference name",
+        "arch_config": """
+version: v0.1.0
+
+listeners:
+  egress_traffic:
+    address: 0.0.0.0
+    port: 12000
+    message_format: openai
+    timeout: 30s
+
+llm_providers:
+
+  - model: openai/gpt-4o-mini
+    access_key: $OPENAI_API_KEY
+    default: true
+
+  - model: openai/gpt-4o
+    access_key: $OPENAI_API_KEY
+    routing_preferences:
+      - name: code understanding
+        description: understand and explain existing code snippets, functions, or libraries
+
+  - model: openai/gpt-4.1
+    access_key: $OPENAI_API_KEY
+    routing_preferences:
+      - name: code understanding
+        description: generating new code snippets, functions, or boilerplate based on user prompts or requirements
+
+tracing:
+  random_sampling: 100
+
+""",
+    },
+]
+
+
+@pytest.mark.parametrize(
+    "arch_config_test_case",
+    arch_config_test_cases,
+    ids=[case["id"] for case in arch_config_test_cases],
+)
+def test_validate_and_render_schema_tests(monkeypatch, arch_config_test_case):
+    monkeypatch.setenv("ARCH_CONFIG_FILE", "fake_arch_config.yaml")
+    monkeypatch.setenv("ARCH_CONFIG_SCHEMA_FILE", "fake_arch_config_schema.yaml")
+    monkeypatch.setenv("ENVOY_CONFIG_TEMPLATE_FILE", "./envoy.template.yaml")
+    monkeypatch.setenv("ARCH_CONFIG_FILE_RENDERED", "fake_arch_config_rendered.yaml")
+    monkeypatch.setenv("ENVOY_CONFIG_FILE_RENDERED", "fake_envoy.yaml")
+    monkeypatch.setenv("TEMPLATE_ROOT", "../")
+
+    arch_config = arch_config_test_case["arch_config"]
+    expected_error = arch_config_test_case["expected_error"]
+    test_id = arch_config_test_case["id"]
+
+    arch_config_schema = ""
+    with open("../arch_config_schema.yaml", "r") as file:
+        arch_config_schema = file.read()
+
+    m_open = mock.mock_open()
+    # Provide enough file handles for all open() calls in validate_and_render_schema
+    m_open.side_effect = [
+        mock.mock_open(read_data="").return_value,
+        mock.mock_open(read_data=arch_config).return_value,  # ARCH_CONFIG_FILE
+        mock.mock_open(
+            read_data=arch_config_schema
+        ).return_value,  # ARCH_CONFIG_SCHEMA_FILE
+        mock.mock_open(read_data=arch_config).return_value,  # ARCH_CONFIG_FILE
+        mock.mock_open(
+            read_data=arch_config_schema
+        ).return_value,  # ARCH_CONFIG_SCHEMA_FILE
+        mock.mock_open().return_value,  # ENVOY_CONFIG_FILE_RENDERED (write)
+        mock.mock_open().return_value,  # ARCH_CONFIG_FILE_RENDERED (write)
+    ]
+    with mock.patch("builtins.open", m_open):
+        with mock.patch("config_generator.Environment"):
+            with pytest.raises(Exception) as excinfo:
+                validate_and_render_schema()
+            assert expected_error in str(excinfo.value)
diff --git a/arch/validate_arch_config.sh b/arch/validate_arch_config.sh
index a3822e90..493d1b2f 100644
--- a/arch/validate_arch_config.sh
+++ b/arch/validate_arch_config.sh
@@ -3,11 +3,22 @@
 failed_files=()
 
 for file in $(find . -name arch_config.yaml -o -name arch_config_full_reference.yaml); do
-  echo "Validating $file..."
-  if ! docker run --rm -v "$(pwd)/$file:/app/arch_config.yaml:ro" --entrypoint /bin/sh katanemo/archgw:latest -c "python config_generator.py" 2>&1 > /dev/null ; then
+  echo "Validating ${file}..."
+  touch $(pwd)/${file}_rendered
+  if ! docker run --rm -v "$(pwd)/${file}:/app/arch_config.yaml:ro" -v "$(pwd)/${file}_rendered:/app/arch_config_rendered.yaml:rw" --entrypoint /bin/sh katanemo/archgw:latest -c "python config_generator.py" 2>&1 > /dev/null ; then
     echo "Validation failed for $file"
     failed_files+=("$file")
   fi
+  RENDERED_CHECKED_IN_FILE=$(echo $file | sed 's/\.yaml$/_rendered.yaml/')
+  if [ -f "$RENDERED_CHECKED_IN_FILE" ]; then
+    echo "Checking rendered file against checked-in version..."
+    if ! diff -q "${file}_rendered" "$RENDERED_CHECKED_IN_FILE" > /dev/null; then
+      echo "Rendered file ${file}_rendered does not match checked-in version ${RENDERED_CHECKED_IN_FILE}"
+      failed_files+=("${file}_rendered")
+    else
+      echo "Rendered file matches checked-in version."
+    fi
+  fi
 done
 
 # Print summary of failed files
diff --git a/crates/.vscode/launch.json b/crates/.vscode/launch.json
new file mode 100644
index 00000000..56a29b46
--- /dev/null
+++ b/crates/.vscode/launch.json
@@ -0,0 +1,21 @@
+{
+  "version": "0.2.0",
+  "configurations": [
+    {
+      "name": "Debug Brightstaff",
+      "type": "lldb",
+      "request": "launch",
+      "program": "${workspaceFolder}/target/debug/brightstaff",
+      "args": [],
+      "cwd": "${workspaceFolder}",
+      "stopOnEntry": false,
+      "sourceLanguages": ["rust"],
+      "env": {
+        "RUST_LOG": "debug",
+        "RUST_BACKTRACE": "1",
+        "ARCH_CONFIG_PATH_RENDERED": "../demos/use_cases/preference_based_routing/arch_config_rendered.yaml"
+      },
+      "preLaunchTask": "rust: cargo build"
+    }
+  ]
+}
diff --git a/crates/.vscode/tasks.json b/crates/.vscode/tasks.json
new file mode 100644
index 00000000..8d648bc7
--- /dev/null
+++ b/crates/.vscode/tasks.json
@@ -0,0 +1,21 @@
+{
+	"version": "2.0.0",
+	"tasks": [
+    {
+      "type": "cargo",
+      "command": "build",
+      "args": [
+        "--bin",
+        "brightstaff"
+      ],
+      "problemMatcher": [
+        "$rustc"
+      ],
+      "group": {
+        "kind": "build",
+        "isDefault": true
+      },
+      "label": "rust: cargo build"
+    }
+  ]
+}
diff --git a/crates/brightstaff/src/handlers/chat_completions.rs b/crates/brightstaff/src/handlers/chat_completions.rs
index 55f6d475..89c9ee13 100644
--- a/crates/brightstaff/src/handlers/chat_completions.rs
+++ b/crates/brightstaff/src/handlers/chat_completions.rs
@@ -12,7 +12,7 @@ use hyper::{Request, Response, StatusCode};
 use tokio::sync::mpsc;
 use tokio_stream::wrappers::ReceiverStream;
 use tokio_stream::StreamExt;
-use tracing::{debug, info, trace, warn};
+use tracing::{debug, info, warn};
 
 use crate::router::llm_router::RouterService;
 
@@ -81,8 +81,8 @@ pub async fn chat_completions(
         }
     }
 
-    trace!(
-        "arch-router request body: {}",
+    debug!(
+        "arch-router request received: {}",
         &serde_json::to_string(&chat_completion_request).unwrap()
     );
 
@@ -102,9 +102,9 @@ pub async fn chat_completions(
         .as_ref()
         .and_then(|s| serde_yaml::from_str(s).ok());
 
-    debug!("usage preferences: {:?}", usage_preferences);
+    debug!("usage preferences from request: {:?}", usage_preferences);
 
-    let mut selected_llm = match router_service
+    let mut determined_route = match router_service
         .determine_route(
             &chat_completion_request.messages,
             trace_parent.clone(),
@@ -121,14 +121,14 @@ pub async fn chat_completions(
         }
     };
 
-    if selected_llm.is_none() {
+    if determined_route.is_none() {
         debug!("No LLM model selected, using default from request");
-        selected_llm = Some(chat_completion_request.model.clone());
+        determined_route = Some(chat_completion_request.model.clone());
     }
 
     info!(
         "sending request to llm provider: {} with llm model: {:?}",
-        llm_provider_endpoint, selected_llm
+        llm_provider_endpoint, determined_route
     );
 
     if let Some(trace_parent) = trace_parent {
@@ -138,10 +138,10 @@ pub async fn chat_completions(
         );
     }
 
-    if let Some(selected_llm) = selected_llm {
+    if let Some(selected_route) = determined_route {
         request_headers.insert(
             ARCH_PROVIDER_HINT_HEADER,
-            header::HeaderValue::from_str(&selected_llm).unwrap(),
+            header::HeaderValue::from_str(&selected_route).unwrap(),
         );
     }
 
diff --git a/crates/brightstaff/src/main.rs b/crates/brightstaff/src/main.rs
index 05944a5f..4e4f18b7 100644
--- a/crates/brightstaff/src/main.rs
+++ b/crates/brightstaff/src/main.rs
@@ -44,9 +44,13 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
     let _tracer_provider = init_tracer();
     let bind_address = env::var("BIND_ADDRESS").unwrap_or_else(|_| BIND_ADDRESS.to_string());
 
+    info!(
+        "current working directory: {}",
+        env::current_dir().unwrap().display()
+    );
     // loading arch_config.yaml file
-    let arch_config_path =
-        env::var("ARCH_CONFIG_PATH").unwrap_or_else(|_| "./arch_config.yaml".to_string());
+    let arch_config_path = env::var("ARCH_CONFIG_PATH_RENDERED")
+        .unwrap_or_else(|_| "./arch_config_rendered.yaml".to_string());
     info!("Loading arch_config.yaml from {}", arch_config_path);
 
     let config_contents =
diff --git a/crates/brightstaff/src/router/llm_router.rs b/crates/brightstaff/src/router/llm_router.rs
index d4173b01..c1320c66 100644
--- a/crates/brightstaff/src/router/llm_router.rs
+++ b/crates/brightstaff/src/router/llm_router.rs
@@ -1,7 +1,7 @@
-use std::{collections::HashMap, sync::Arc};
+use std::sync::Arc;
 
 use common::{
-    configuration::{LlmProvider, LlmRoute, ModelUsagePreference},
+    configuration::{LlmProvider, ModelUsagePreference, RoutingPreference},
     consts::ARCH_PROVIDER_HINT_HEADER,
 };
 use hermesllm::providers::openai::types::{ChatCompletionsResponse, ContentType, Message};
@@ -19,7 +19,6 @@ pub struct RouterService {
     router_model: Arc<dyn RouterModel>,
     routing_provider_name: String,
     llm_usage_defined: bool,
-    llm_provider_map: HashMap<String, LlmProvider>,
 }
 
 #[derive(Debug, Error)]
@@ -45,11 +44,14 @@ impl RouterService {
     ) -> Self {
         let providers_with_usage = providers
             .iter()
-            .filter(|provider| provider.usage.is_some())
+            .filter(|provider| provider.routing_preferences.is_some())
             .cloned()
             .collect::<Vec<LlmProvider>>();
 
-        let llm_routes: Vec<LlmRoute> = providers_with_usage.iter().map(LlmRoute::from).collect();
+        let llm_routes: Vec<RoutingPreference> = providers_with_usage
+            .iter()
+            .flat_map(|provider| provider.routing_preferences.clone().unwrap_or_default())
+            .collect();
 
         let router_model = Arc::new(router_model_v1::RouterModelV1::new(
             llm_routes,
@@ -57,18 +59,12 @@ impl RouterService {
             router_model_v1::MAX_TOKEN_LEN,
         ));
 
-        let llm_provider_map: HashMap<String, LlmProvider> = providers
-            .into_iter()
-            .map(|provider| (provider.name.clone(), provider))
-            .collect();
-
         RouterService {
             router_url,
             client: reqwest::Client::new(),
             router_model,
             routing_provider_name,
             llm_usage_defined: !providers_with_usage.is_empty(),
-            llm_provider_map,
         }
     }
 
@@ -155,40 +151,21 @@ impl RouterService {
         if let Some(ContentType::Text(content)) =
             &chat_completion_response.choices[0].message.content
         {
-            let mut selected_model: Option<String> = None;
-            if let Some(selected_llm_name) = self.router_model.parse_response(content)? {
-                if selected_llm_name != "other" {
-                    if let Some(usage_preferences) = usage_preferences {
-                        for usage in usage_preferences {
-                            if usage.name == selected_llm_name {
-                                selected_model = Some(usage.model);
-                                break;
-                            }
-                        }
-                        if selected_model.is_none() {
-                            warn!(
-                                "Selected LLM model not found in usage preferences: {}",
-                                selected_llm_name
-                            );
-                        }
-                    } else if let Some(provider) = self.llm_provider_map.get(&selected_llm_name) {
-                        selected_model = provider.model.clone();
-                    } else {
-                        warn!(
-                            "Selected LLM model not found in provider map: {}",
-                            selected_llm_name
-                        );
-                    }
-                }
-            }
+            let route_name = self.router_model.parse_response(content)?;
             info!(
                 "router response: {}, selected_model: {:?}, response time: {}ms",
                 content.replace("\n", "\\n"),
-                selected_model,
+                route_name,
                 router_response_time.as_millis()
             );
 
-            Ok(selected_model)
+            if let Some(ref route) = route_name {
+                if route == "other" {
+                    return Ok(None);
+                }
+            }
+
+            Ok(route_name)
         } else {
             Ok(None)
         }
diff --git a/crates/brightstaff/src/router/router_model_v1.rs b/crates/brightstaff/src/router/router_model_v1.rs
index e6ccd912..0dcefff6 100644
--- a/crates/brightstaff/src/router/router_model_v1.rs
+++ b/crates/brightstaff/src/router/router_model_v1.rs
@@ -1,5 +1,5 @@
 use common::{
-    configuration::{LlmRoute, ModelUsagePreference},
+    configuration::{ModelUsagePreference, RoutingPreference},
     consts::{SYSTEM_ROLE, TOOL_ROLE, USER_ROLE},
 };
 use hermesllm::providers::openai::types::{ChatCompletionsRequest, ContentType, Message};
@@ -36,7 +36,11 @@ pub struct RouterModelV1 {
     max_token_length: usize,
 }
 impl RouterModelV1 {
-    pub fn new(llm_routes: Vec<LlmRoute>, routing_model: String, max_token_length: usize) -> Self {
+    pub fn new(
+        llm_routes: Vec<RoutingPreference>,
+        routing_model: String,
+        max_token_length: usize,
+    ) -> Self {
         let llm_route_json_str =
             serde_json::to_string(&llm_routes).unwrap_or_else(|_| "[]".to_string());
         RouterModelV1 {
@@ -138,9 +142,9 @@ impl RouterModel for RouterModelV1 {
         let llm_route_json = usage_preferences
             .as_ref()
             .map(|prefs| {
-                let llm_route: Vec<LlmRoute> = prefs
+                let llm_route: Vec<RoutingPreference> = prefs
                     .iter()
-                    .map(|pref| LlmRoute {
+                    .map(|pref| RoutingPreference {
                         name: pref.name.clone(),
                         description: pref.usage.clone().unwrap_or_default(),
                     })
@@ -255,7 +259,7 @@ Based on your analysis, provide your response in the following JSON formats if y
               {"name": "Speech Recognition", "description": "Converting spoken language into written text"}
           ]
         "#;
-        let llm_routes = serde_json::from_str::<Vec<LlmRoute>>(routes_str).unwrap();
+        let llm_routes = serde_json::from_str::<Vec<RoutingPreference>>(routes_str).unwrap();
         let routing_model = "test-model".to_string();
         let router = RouterModelV1::new(llm_routes, routing_model.clone(), usize::MAX);
 
@@ -314,7 +318,7 @@ Based on your analysis, provide your response in the following JSON formats if y
               {"name": "Speech Recognition", "description": "Converting spoken language into written text"}
           ]
         "#;
-        let llm_routes = serde_json::from_str::<Vec<LlmRoute>>(routes_str).unwrap();
+        let llm_routes = serde_json::from_str::<Vec<RoutingPreference>>(routes_str).unwrap();
         let routing_model = "test-model".to_string();
         let router = RouterModelV1::new(llm_routes, routing_model.clone(), usize::MAX);
 
@@ -379,7 +383,7 @@ Based on your analysis, provide your response in the following JSON formats if y
               {"name": "Speech Recognition", "description": "Converting spoken language into written text"}
           ]
         "#;
-        let llm_routes = serde_json::from_str::<Vec<LlmRoute>>(routes_str).unwrap();
+        let llm_routes = serde_json::from_str::<Vec<RoutingPreference>>(routes_str).unwrap();
         let routing_model = "test-model".to_string();
         let router = RouterModelV1::new(llm_routes, routing_model.clone(), 235);
 
@@ -440,7 +444,7 @@ Based on your analysis, provide your response in the following JSON formats if y
               {"name": "Speech Recognition", "description": "Converting spoken language into written text"}
           ]
         "#;
-        let llm_routes = serde_json::from_str::<Vec<LlmRoute>>(routes_str).unwrap();
+        let llm_routes = serde_json::from_str::<Vec<RoutingPreference>>(routes_str).unwrap();
         let routing_model = "test-model".to_string();
         let router = RouterModelV1::new(llm_routes, routing_model.clone(), 200);
 
@@ -501,7 +505,7 @@ Based on your analysis, provide your response in the following JSON formats if y
               {"name": "Speech Recognition", "description": "Converting spoken language into written text"}
           ]
         "#;
-        let llm_routes = serde_json::from_str::<Vec<LlmRoute>>(routes_str).unwrap();
+        let llm_routes = serde_json::from_str::<Vec<RoutingPreference>>(routes_str).unwrap();
         let routing_model = "test-model".to_string();
         let router = RouterModelV1::new(llm_routes, routing_model.clone(), 230);
 
@@ -569,7 +573,7 @@ Based on your analysis, provide your response in the following JSON formats if y
               {"name": "Speech Recognition", "description": "Converting spoken language into written text"}
           ]
         "#;
-        let llm_routes = serde_json::from_str::<Vec<LlmRoute>>(routes_str).unwrap();
+        let llm_routes = serde_json::from_str::<Vec<RoutingPreference>>(routes_str).unwrap();
         let routing_model = "test-model".to_string();
         let router = RouterModelV1::new(llm_routes, routing_model.clone(), usize::MAX);
 
@@ -639,7 +643,7 @@ Based on your analysis, provide your response in the following JSON formats if y
               {"name": "Speech Recognition", "description": "Converting spoken language into written text"}
           ]
         "#;
-        let llm_routes = serde_json::from_str::<Vec<LlmRoute>>(routes_str).unwrap();
+        let llm_routes = serde_json::from_str::<Vec<RoutingPreference>>(routes_str).unwrap();
         let routing_model = "test-model".to_string();
         let router = RouterModelV1::new(llm_routes, routing_model.clone(), usize::MAX);
 
@@ -716,7 +720,7 @@ Based on your analysis, provide your response in the following JSON formats if y
     {"name": "Speech Recognition", "description": "Converting spoken language into written text"}
 ]
 "#;
-        let llm_routes = serde_json::from_str::<Vec<LlmRoute>>(routes_str).unwrap();
+        let llm_routes = serde_json::from_str::<Vec<RoutingPreference>>(routes_str).unwrap();
 
         let router = RouterModelV1::new(llm_routes, "test-model".to_string(), 2000);
 
diff --git a/crates/common/src/configuration.rs b/crates/common/src/configuration.rs
index d92f38fb..0693c09b 100644
--- a/crates/common/src/configuration.rs
+++ b/crates/common/src/configuration.rs
@@ -187,24 +187,11 @@ pub struct ModelUsagePreference {
 }
 
 #[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct LlmRoute {
+pub struct RoutingPreference {
     pub name: String,
     pub description: String,
 }
 
-impl From<&LlmProvider> for LlmRoute {
-    fn from(provider: &LlmProvider) -> Self {
-        Self {
-            name: provider.name.to_string(),
-            description: provider
-                .usage
-                .as_ref()
-                .cloned()
-                .unwrap_or_else(|| "No description available".to_string()),
-        }
-    }
-}
-
 #[derive(Debug, Clone, Serialize, Deserialize)]
 //TODO: use enum for model, but if there is a new model, we need to update the code
 pub struct LlmProvider {
@@ -218,6 +205,7 @@ pub struct LlmProvider {
     pub port: Option<u16>,
     pub rate_limits: Option<LlmRatelimit>,
     pub usage: Option<String>,
+    pub routing_preferences: Option<Vec<RoutingPreference>>,
 }
 
 pub trait IntoModels {
@@ -256,6 +244,7 @@ impl Default for LlmProvider {
             port: None,
             rate_limits: None,
             usage: None,
+            routing_preferences: None,
         }
     }
 }
@@ -368,7 +357,7 @@ mod test {
     #[test]
     fn test_deserialize_configuration() {
         let ref_config = fs::read_to_string(
-            "../../docs/source/resources/includes/arch_config_full_reference.yaml",
+            "../../docs/source/resources/includes/arch_config_full_reference_rendered.yaml",
         )
         .expect("reference config file not found");
 
@@ -429,7 +418,7 @@ mod test {
     #[test]
     fn test_tool_conversion() {
         let ref_config = fs::read_to_string(
-            "../../docs/source/resources/includes/arch_config_full_reference.yaml",
+            "../../docs/source/resources/includes/arch_config_full_reference_rendered.yaml",
         )
         .expect("reference config file not found");
         let config: super::Configuration = serde_yaml::from_str(&ref_config).unwrap();
diff --git a/crates/common/src/llm_providers.rs b/crates/common/src/llm_providers.rs
index 8214f148..120be691 100644
--- a/crates/common/src/llm_providers.rs
+++ b/crates/common/src/llm_providers.rs
@@ -58,7 +58,16 @@ impl TryFrom<Vec<LlmProvider>> for LlmProviders {
             let name = llm_provider.name.clone();
             if llm_providers
                 .providers
-                .insert(name.clone(), llm_provider)
+                .insert(name.clone(), llm_provider.clone())
+                .is_some()
+            {
+                return Err(LlmProvidersNewError::DuplicateName(name));
+            }
+
+            // also add model_id as key for provider lookup
+            if llm_providers
+                .providers
+                .insert(llm_provider.model.clone().unwrap(), llm_provider)
                 .is_some()
             {
                 return Err(LlmProvidersNewError::DuplicateName(name));
diff --git a/crates/llm_gateway/src/stream_context.rs b/crates/llm_gateway/src/stream_context.rs
index 2fa29496..82b88509 100644
--- a/crates/llm_gateway/src/stream_context.rs
+++ b/crates/llm_gateway/src/stream_context.rs
@@ -113,16 +113,10 @@ impl StreamContext {
         }
 
         debug!(
-            "request received: llm provider hint: {}, selected llm: {}, model: {}",
+            "request received: llm provider hint: {}, selected provider: {}",
             self.get_http_request_header(ARCH_PROVIDER_HINT_HEADER)
                 .unwrap_or_default(),
-            self.llm_provider.as_ref().unwrap().name,
-            self.llm_provider
-                .as_ref()
-                .unwrap()
-                .model
-                .as_ref()
-                .unwrap_or(&String::new())
+            self.llm_provider.as_ref().unwrap().name
         );
     }
 
@@ -349,7 +343,7 @@ impl HttpContext for StreamContext {
         };
 
         info!(
-            "on_http_request_body: provider: {}, model requested: {}, model selected: {}",
+            "on_http_request_body: provider: {}, model requested (in body): {}, model selected: {}",
             self.llm_provider().name,
             model_requested,
             model_name.unwrap_or(&"None".to_string()),
diff --git a/crates/llm_gateway/tests/integration.rs b/crates/llm_gateway/tests/integration.rs
index 80c5e5da..108ab1ce 100644
--- a/crates/llm_gateway/tests/integration.rs
+++ b/crates/llm_gateway/tests/integration.rs
@@ -30,7 +30,10 @@ fn request_headers_expectations(module: &mut Tester, http_context: i32) {
             Some("x-arch-llm-provider-hint"),
         )
         .returning(None)
-        .expect_log(Some(LogLevel::Debug), Some("request received: llm provider hint: default, selected llm: open-ai-gpt-4, model: gpt-4"))
+        .expect_log(
+            Some(LogLevel::Debug),
+            Some("request received: llm provider hint: default, selected provider: open-ai-gpt-4"),
+        )
         .expect_add_header_map_value(
             Some(MapType::HttpRequestHeaders),
             Some("x-arch-llm-provider"),
@@ -263,7 +266,7 @@ fn llm_gateway_bad_request_to_open_ai_chat_completions() {
         .expect_get_buffer_bytes(Some(BufferType::HttpRequestBody))
         .returning(Some(incomplete_chat_completions_request_body))
         .expect_log(Some(LogLevel::Debug), None)
-        .expect_log(Some(LogLevel::Info), Some("on_http_request_body: provider: open-ai-gpt-4, model requested: gpt-1, model selected: gpt-4"))
+        .expect_log(Some(LogLevel::Info), Some("on_http_request_body: provider: open-ai-gpt-4, model requested (in body): gpt-1, model selected: gpt-4"))
         .expect_send_local_response(
             Some(StatusCode::BAD_REQUEST.as_u16().into()),
             None,
@@ -429,7 +432,7 @@ fn llm_gateway_override_model_name() {
         .returning(Some(chat_completions_request_body))
         // The actual call is not important in this test, we just need to grab the token_id
         .expect_log(Some(LogLevel::Debug), None)
-        .expect_log(Some(LogLevel::Info), Some("on_http_request_body: provider: open-ai-gpt-4, model requested: gpt-1, model selected: gpt-4"))
+        .expect_log(Some(LogLevel::Info), Some("on_http_request_body: provider: open-ai-gpt-4, model requested (in body): gpt-1, model selected: gpt-4"))
         .expect_log(Some(LogLevel::Debug), None)
         .expect_log(Some(LogLevel::Debug), None)
         .expect_metric_record("input_sequence_length", 29)
@@ -478,7 +481,7 @@ fn llm_gateway_override_use_default_model() {
         // The actual call is not important in this test, we just need to grab the token_id
         .expect_log(
             Some(LogLevel::Info),
-            Some("on_http_request_body: provider: open-ai-gpt-4, model requested: gpt-1, model selected: gpt-4"),
+            Some("on_http_request_body: provider: open-ai-gpt-4, model requested (in body): gpt-1, model selected: gpt-4"),
         )
         .expect_log(Some(LogLevel::Debug), None)
         .expect_log(Some(LogLevel::Debug), None)
@@ -526,7 +529,7 @@ fn llm_gateway_override_use_model_name_none() {
         .returning(Some(chat_completions_request_body))
         // The actual call is not important in this test, we just need to grab the token_id
         .expect_log(Some(LogLevel::Debug), None)
-        .expect_log(Some(LogLevel::Info), Some("on_http_request_body: provider: open-ai-gpt-4, model requested: none, model selected: gpt-4"))
+        .expect_log(Some(LogLevel::Info), Some("on_http_request_body: provider: open-ai-gpt-4, model requested (in body): none, model selected: gpt-4"))
         .expect_log(Some(LogLevel::Debug), None)
         .expect_log(Some(LogLevel::Debug), None)
         .expect_metric_record("input_sequence_length", 29)
diff --git a/demos/samples_java/weather_forcecast_service/.vscode/launch.json b/demos/samples_java/weather_forcecast_service/.vscode/launch.json
new file mode 100644
index 00000000..a9232a53
--- /dev/null
+++ b/demos/samples_java/weather_forcecast_service/.vscode/launch.json
@@ -0,0 +1,15 @@
+{
+  // Use IntelliSense to learn about possible attributes.
+  // Hover to view descriptions of existing attributes.
+  // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
+  "version": "0.2.0",
+  "configurations": [
+    {
+      "type": "java",
+      "name": "WeatherForecastApplication",
+      "request": "launch",
+      "mainClass": "weather.WeatherForecastApplication",
+      "projectName": "weather-forecast-service"
+    }
+  ]
+}
diff --git a/demos/samples_java/weather_forcecast_service/arch_config.yaml b/demos/samples_java/weather_forcecast_service/arch_config.yaml
index d11aaa96..68c3841d 100644
--- a/demos/samples_java/weather_forcecast_service/arch_config.yaml
+++ b/demos/samples_java/weather_forcecast_service/arch_config.yaml
@@ -9,10 +9,8 @@ listeners:
 
 # Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way
 llm_providers:
-  - name: OpenAI
-    provider_interface: openai
-    access_key: $OPENAI_API_KEY
-    model: gpt-4o-mini
+  - access_key: $OPENAI_API_KEY
+    model: openai/gpt-4o-mini
     default: true
 
 # Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem.
diff --git a/demos/samples_python/currency_exchange/arch_config.yaml b/demos/samples_python/currency_exchange/arch_config.yaml
index a9c0bce0..1c399449 100644
--- a/demos/samples_python/currency_exchange/arch_config.yaml
+++ b/demos/samples_python/currency_exchange/arch_config.yaml
@@ -8,10 +8,8 @@ listeners:
     timeout: 30s
 
 llm_providers:
-  - name: gpt-4o
-    access_key: $OPENAI_API_KEY
-    provider_interface: openai
-    model: gpt-4o
+  - access_key: $OPENAI_API_KEY
+    model: openai/gpt-4o
 
 endpoints:
   frankfurther_api:
diff --git a/demos/samples_python/human_resources_agent/arch_config.yaml b/demos/samples_python/human_resources_agent/arch_config.yaml
index f46a6c7a..2a96a6d5 100644
--- a/demos/samples_python/human_resources_agent/arch_config.yaml
+++ b/demos/samples_python/human_resources_agent/arch_config.yaml
@@ -9,10 +9,8 @@ listeners:
 
 # Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way
 llm_providers:
-  - name: OpenAI
-    provider_interface: openai
-    access_key: $OPENAI_API_KEY
-    model: gpt-4o-mini
+  - access_key: $OPENAI_API_KEY
+    model: openai/gpt-4o-mini
     default: true
 
 # Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem.
diff --git a/demos/samples_python/multi_turn_rag_agent/arch_config.yaml b/demos/samples_python/multi_turn_rag_agent/arch_config.yaml
index 706a374b..a29622ec 100644
--- a/demos/samples_python/multi_turn_rag_agent/arch_config.yaml
+++ b/demos/samples_python/multi_turn_rag_agent/arch_config.yaml
@@ -13,10 +13,8 @@ endpoints:
     connect_timeout: 0.005s
 
 llm_providers:
-  - name: gpt-4o-mini
-    access_key: $OPENAI_API_KEY
-    provider_interface: openai
-    model: gpt-4o-mini
+  - access_key: $OPENAI_API_KEY
+    model: openai/gpt-4o-mini
     default: true
 
 system_prompt: |
diff --git a/demos/samples_python/network_switch_operator_agent/arch_config.yaml b/demos/samples_python/network_switch_operator_agent/arch_config.yaml
index 0175e1c0..16c8951e 100644
--- a/demos/samples_python/network_switch_operator_agent/arch_config.yaml
+++ b/demos/samples_python/network_switch_operator_agent/arch_config.yaml
@@ -8,10 +8,8 @@ listeners:
 
 # Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way
 llm_providers:
-  - name: OpenAI
-    provider_interface: openai
-    access_key: $OPENAI_API_KEY
-    model: gpt-4o
+  - access_key: $OPENAI_API_KEY
+    model: openai/gpt-4o
     default: true
 
 # default system prompt used by all prompt targets
diff --git a/demos/samples_python/stock_quote/arch_config.yaml b/demos/samples_python/stock_quote/arch_config.yaml
index 6b4a427f..fcff2c0b 100644
--- a/demos/samples_python/stock_quote/arch_config.yaml
+++ b/demos/samples_python/stock_quote/arch_config.yaml
@@ -8,10 +8,8 @@ listeners:
     timeout: 30s
 
 llm_providers:
-  - name: gpt-4o
-    access_key: $OPENAI_API_KEY
-    provider_interface: openai
-    model: gpt-4o
+  - access_key: $OPENAI_API_KEY
+    model: openai/gpt-4o
 
 endpoints:
   twelvedata_api:
diff --git a/demos/samples_python/weather_forecast/arch_config.yaml b/demos/samples_python/weather_forecast/arch_config.yaml
index a51bf6a1..afc0ef04 100644
--- a/demos/samples_python/weather_forecast/arch_config.yaml
+++ b/demos/samples_python/weather_forecast/arch_config.yaml
@@ -17,15 +17,11 @@ overrides:
   prompt_target_intent_matching_threshold: 0.6
 
 llm_providers:
-  - name: groq
-    access_key: $GROQ_API_KEY
-    provider_interface: groq
-    model: llama-3.2-3b-preview
+  - access_key: $GROQ_API_KEY
+    model: groq/llama-3.2-3b-preview
 
-  - name: gpt-4o
-    access_key: $OPENAI_API_KEY
-    provider_interface: openai
-    model: gpt-4o
+  - access_key: $OPENAI_API_KEY
+    model: openai/gpt-4o
     default: true
 
 system_prompt: |
diff --git a/demos/use_cases/README.md b/demos/use_cases/README.md
index f0684e81..30cce822 100644
--- a/demos/use_cases/README.md
+++ b/demos/use_cases/README.md
@@ -13,16 +13,12 @@ listeners:
     timeout: 30s
 
 llm_providers:
-  - name: gpt-4o
-    access_key: $OPENAI_API_KEY
-    provider: openai
-    model: gpt-4o
+  - access_key: $OPENAI_API_KEY
+    model: openai/gpt-4o
     default: true
 
-  - name: ministral-3b
-    access_key: $MISTRAL_API_KEY
-    provider: openai
-    model: ministral-3b-latest
+  - access_key: $MISTRAL_API_KEY
+    model: mistral/ministral-3b-latest
 ```
 
 ### Step 2. Start arch gateway
diff --git a/demos/use_cases/llm_routing/README.md b/demos/use_cases/llm_routing/README.md
index 0d18d0bd..45c7c5a5 100644
--- a/demos/use_cases/llm_routing/README.md
+++ b/demos/use_cases/llm_routing/README.md
@@ -18,7 +18,7 @@ You can also pass in a header to override model when sending prompt. Following e
 ```bash
 
 $ curl --header 'Content-Type: application/json' \
-  --header 'x-arch-llm-provider-hint: ministral-3b' \
+  --header 'x-arch-llm-provider-hint: mistral/ministral-3b' \
   --data '{"messages": [{"role": "user","content": "hello"}], "model": "none"}' \
   http://localhost:12000/v1/chat/completions 2> /dev/null | jq .
 {
diff --git a/demos/use_cases/llm_routing/arch_config.yaml b/demos/use_cases/llm_routing/arch_config.yaml
index 43bdd495..cb3a42e6 100644
--- a/demos/use_cases/llm_routing/arch_config.yaml
+++ b/demos/use_cases/llm_routing/arch_config.yaml
@@ -9,46 +9,34 @@ listeners:
 
 llm_providers:
 
-  - name: gpt-4o-mini
-    access_key: $OPENAI_API_KEY
-    provider_interface: openai
-    model: gpt-4o-mini
+  - access_key: $OPENAI_API_KEY
+    model: openai/gpt-4o-mini
 
-  - name: gpt-4o
-    access_key: $OPENAI_API_KEY
-    provider_interface: openai
-    model: gpt-4o
+  - access_key: $OPENAI_API_KEY
+    model: openai/gpt-4o
     default: true
 
-  - name: ministral-3b
-    access_key: $MISTRAL_API_KEY
-    provider_interface: mistral
-    model: ministral-3b-latest
+  - access_key: $MISTRAL_API_KEY
+    model: mistral/ministral-3b-latest
 
-  - name: claude-sonnet
-    access_key: $ANTHROPIC_API_KEY
-    provider_interface: claude
-    model: claude-3-7-sonnet-latest
+  - access_key: $ANTHROPIC_API_KEY
+    model: claude/claude-3-7-sonnet-latest
 
-  - name: claude-sonnet-4
-    access_key: $ANTHROPIC_API_KEY
-    provider_interface: claude
-    model: claude-sonnet-4-0
+  - access_key: $ANTHROPIC_API_KEY
+    model: claude/claude-sonnet-4-0
 
-  - name: deepseek
-    access_key: $DEEPSEEK_API_KEY
-    provider_interface: deepseek
-    model: deepseek-reasoner
+  - access_key: $DEEPSEEK_API_KEY
+    model: deepseek/deepseek-reasoner
 
-  - name: groq
-    access_key: $GROQ_API_KEY
-    provider_interface: groq
-    model: llama-3.1-8b-instant
+  - access_key: $GROQ_API_KEY
+    model: groq/llama-3.1-8b-instant
 
-  - name: gemini
-    access_key: $GEMINI_API_KEY
-    provider_interface: gemini
-    model: gemini-1.5-pro-latest
+  - access_key: $GEMINI_API_KEY
+    model: gemini/gemini-1.5-pro-latest
+
+  - model: custom/test-model
+    base_url: http://host.docker.internal:11223
+    provider_interface: openai
 
 tracing:
   random_sampling: 100
diff --git a/demos/use_cases/ollama/arch_config.yaml b/demos/use_cases/ollama/arch_config.yaml
index 24eb1bf1..5f88b0a3 100644
--- a/demos/use_cases/ollama/arch_config.yaml
+++ b/demos/use_cases/ollama/arch_config.yaml
@@ -9,10 +9,9 @@ listeners:
 
 llm_providers:
 
-  - name: local-llama
+  - model: my_llm_provider/llama3.2
     provider_interface: openai
-    model: llama3.2
-    endpoint: host.docker.internal:11434
+    base_url: http://host.docker.internal:11434
     default: true
 
 system_prompt: |
diff --git a/demos/use_cases/orchestrating_agents/arch_config.yaml b/demos/use_cases/orchestrating_agents/arch_config.yaml
index 639c5242..b52ceb22 100644
--- a/demos/use_cases/orchestrating_agents/arch_config.yaml
+++ b/demos/use_cases/orchestrating_agents/arch_config.yaml
@@ -22,10 +22,8 @@ endpoints:
     connect_timeout: 0.005s
 
 llm_providers:
-  - name: gpt-4o-mini
-    access_key: $OPENAI_API_KEY
-    provider_interface: openai
-    model: gpt-4o-mini
+  - access_key: $OPENAI_API_KEY
+    model: openai/gpt-4o-mini
     default: true
 
 system_prompt: |
diff --git a/demos/use_cases/preference_based_routing/arch_config.yaml b/demos/use_cases/preference_based_routing/arch_config.yaml
index f8521811..33136325 100644
--- a/demos/use_cases/preference_based_routing/arch_config.yaml
+++ b/demos/use_cases/preference_based_routing/arch_config.yaml
@@ -9,28 +9,21 @@ listeners:
 
 llm_providers:
 
-  - name: gpt-4o-mini
-    provider_interface: openai
+  - model: openai/gpt-4o-mini
     access_key: $OPENAI_API_KEY
-    model: gpt-4o-mini
-
-  - name: gpt-4.1
-    provider_interface: openai
-    access_key: $OPENAI_API_KEY
-    model: gpt-4.1
     default: true
 
-  - name: code_generation
+  - model: openai/gpt-4o
     access_key: $OPENAI_API_KEY
-    provider_interface: openai
-    model: gpt-4.1
-    usage: generating new code snippets, functions, or boilerplate based on user prompts or requirements
+    routing_preferences:
+      - name: code understanding
+        description: understand and explain existing code snippets, functions, or libraries
 
-  - name: code_understanding
-    provider_interface: openai
+  - model: openai/gpt-4.1
     access_key: $OPENAI_API_KEY
-    model: gpt-4o-mini
-    usage: understand and explain existing code snippets, functions, or libraries
+    routing_preferences:
+      - name: code generation
+        description: generating new code snippets, functions, or boilerplate based on user prompts or requirements
 
 tracing:
   random_sampling: 100
diff --git a/demos/use_cases/preference_based_routing/arch_config_local.yaml b/demos/use_cases/preference_based_routing/arch_config_local.yaml
index 029918d0..b965cd0c 100644
--- a/demos/use_cases/preference_based_routing/arch_config_local.yaml
+++ b/demos/use_cases/preference_based_routing/arch_config_local.yaml
@@ -14,32 +14,24 @@ listeners:
 llm_providers:
 
   - name: arch-router
-    provider_interface: arch
-    model: hf.co/katanemo/Arch-Router-1.5B.gguf:Q4_K_M
-    endpoint: host.docker.internal:11434
+    model: arch/hf.co/katanemo/Arch-Router-1.5B.gguf:Q4_K_M
+    base_url: http://host.docker.internal:11434
 
-  - name: gpt-4o-mini
-    provider_interface: openai
+  - model: openai/gpt-4o-mini
     access_key: $OPENAI_API_KEY
-    model: gpt-4o-mini
-
-  - name: gpt-4.1
-    provider_interface: openai
-    access_key: $OPENAI_API_KEY
-    model: gpt-4.1
     default: true
 
-  - name: code_generation
+  - model: openai/gpt-4o
     access_key: $OPENAI_API_KEY
-    provider_interface: openai
-    model: gpt-4.1
-    usage: generating new code snippets, functions, or boilerplate based on user prompts or requirements
+    routing_preferences:
+      - name: code understanding
+        description: understand and explain existing code snippets, functions, or libraries
 
-  - name: code_understanding
-    provider_interface: openai
+  - model: openai/gpt-4.1
     access_key: $OPENAI_API_KEY
-    model: gpt-4.1
-    usage: understand and explain existing code snippets, functions, or libraries
+    routing_preferences:
+      - name: code generation
+        description: generating new code snippets, functions, or boilerplate based on user prompts or requirements
 
 tracing:
   random_sampling: 100
diff --git a/demos/use_cases/preference_based_routing/hurl_tests/simple.hurl b/demos/use_cases/preference_based_routing/hurl_tests/simple.hurl
index 432f0996..d9b243e7 100644
--- a/demos/use_cases/preference_based_routing/hurl_tests/simple.hurl
+++ b/demos/use_cases/preference_based_routing/hurl_tests/simple.hurl
@@ -2,13 +2,13 @@ POST http://localhost:12000/v1/chat/completions
 Content-Type: application/json
 
 {
+  "model": "openai/gpt-4.1",
   "messages": [
     {
       "role": "user",
       "content": "hi"
     }
-  ],
-  "model": "none"
+  ]
 }
 HTTP 200
 [Asserts]
diff --git a/demos/use_cases/preference_based_routing/hurl_tests/simple_stream.hurl b/demos/use_cases/preference_based_routing/hurl_tests/simple_stream.hurl
index 8b92bcc7..16c68c72 100644
--- a/demos/use_cases/preference_based_routing/hurl_tests/simple_stream.hurl
+++ b/demos/use_cases/preference_based_routing/hurl_tests/simple_stream.hurl
@@ -14,4 +14,4 @@ Content-Type: application/json
 HTTP 200
 [Asserts]
 header "content-type" matches /text\/event-stream/
-body matches /^data: .*?gpt-4.1.*?\n/
+body matches /^data: .*?gpt-4o-mini.*?\n/
diff --git a/demos/use_cases/spotify_bearer_auth/arch_config.yaml b/demos/use_cases/spotify_bearer_auth/arch_config.yaml
index 99a67401..b848d718 100644
--- a/demos/use_cases/spotify_bearer_auth/arch_config.yaml
+++ b/demos/use_cases/spotify_bearer_auth/arch_config.yaml
@@ -85,10 +85,8 @@ system_prompt: |
   Make sure your output is valid Markdown. And don't say "formatted in Markdown". Thanks!
 
 llm_providers:
-  - name: openai
-    provider_interface: openai
-    access_key: $OPENAI_API_KEY
-    model: gpt-4o
+  - access_key: $OPENAI_API_KEY
+    model: openai/gpt-4o
     default: true
 
 prompt_targets:
diff --git a/docs/source/concepts/includes/arch_config.yaml b/docs/source/concepts/includes/arch_config.yaml
index 4523ae32..69b24f41 100644
--- a/docs/source/concepts/includes/arch_config.yaml
+++ b/docs/source/concepts/includes/arch_config.yaml
@@ -9,10 +9,8 @@ listeners:
 
 # Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way
 llm_providers:
-  - name: OpenAI
-    provider_interface: openai
-    access_key: $OPENAI_API_KEY
-    model: gpt-4o
+  - access_key: $OPENAI_API_KEY
+    model: openai/gpt-4o
     default: true
 
 # default system prompt used by all prompt targets
diff --git a/docs/source/get_started/quickstart.rst b/docs/source/get_started/quickstart.rst
index 9c3499ea..34dbf64c 100644
--- a/docs/source/get_started/quickstart.rst
+++ b/docs/source/get_started/quickstart.rst
@@ -50,10 +50,8 @@ Create ``arch_config.yaml`` file with the following content:
       timeout: 30s
 
    llm_providers:
-     - name: gpt-4o
-       access_key: $OPENAI_API_KEY
-       provider: openai
-       model: gpt-4o
+     - access_key: $OPENAI_API_KEY
+       model: openai/gpt-4o
 
    system_prompt: |
      You are a helpful assistant.
@@ -153,16 +151,12 @@ Create ``arch_config.yaml`` file with the following content:
       timeout: 30s
 
    llm_providers:
-     - name: gpt-4o
-       access_key: $OPENAI_API_KEY
-       provider_interface: openai
-       model: gpt-4o
+     - access_key: $OPENAI_API_KEY
+       model: openai/gpt-4o
        default: true
 
-     - name: ministral-3b
-       access_key: $MISTRAL_API_KEY
-       provider_interface: openai
-       model: ministral-3b-latest
+     - access_key: $MISTRAL_API_KEY
+       model: mistralministral-3b-latest
 
 Step 2. Start arch gateway
 ~~~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/docs/source/guides/includes/arch_config.yaml b/docs/source/guides/includes/arch_config.yaml
index 4ee46cbb..89501f88 100644
--- a/docs/source/guides/includes/arch_config.yaml
+++ b/docs/source/guides/includes/arch_config.yaml
@@ -9,10 +9,8 @@ listeners:
 
 # Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way
 llm_providers:
-  - name: OpenAI
-    provider_interface: openai
-    access_key: $OPENAI_API_KEY
-    model: gpt-4o
+  - access_key: $OPENAI_API_KEY
+    model: openai/gpt-4o
     default: true
 
 # default system prompt used by all prompt targets
diff --git a/docs/source/guides/llm_router.rst b/docs/source/guides/llm_router.rst
index 0cde6ff8..f999860c 100644
--- a/docs/source/guides/llm_router.rst
+++ b/docs/source/guides/llm_router.rst
@@ -74,9 +74,6 @@ Below is an example to show how to set up a prompt target for the Arch Router:
     :caption: Route Config Example
 
 
-    routing:
-    model: archgw-v1-router-model
-
     listeners:
     egress_traffic:
         address: 0.0.0.0
@@ -85,29 +82,22 @@ Below is an example to show how to set up a prompt target for the Arch Router:
         timeout: 30s
 
     llm_providers:
-    - name: archgw-v1-router-model
-        provider_interface: openai
-        model: katanemo/Arch-Router-1.5B
-        base_url: ...
 
-    - name: gpt-4o-mini
-        provider_interface: openai
-        access_key: $OPENAI_API_KEY
-        model: gpt-4o-mini
-        default: true
+    - model: openai/gpt-4o-mini
+      access_key: $OPENAI_API_KEY
+      default: true
 
-    - name: code_generation
-        provider_interface: openai
-        access_key: $OPENAI_API_KEY
-        model: gpt-4o
-        usage: Generating new code snippets, functions, or boilerplate based on user prompts or requirements
-
-    - name: code_understanding
-        provider_interface: openai
-        access_key: $OPENAI_API_KEY
-        model: gpt-4.1
-        usage: understand and explain existing code snippets, functions, or libraries
+    - model: openai/gpt-4o
+      access_key: $OPENAI_API_KEY
+      routing_preferences:
+        - name: code understanding
+          description: understand and explain existing code snippets, functions, or libraries
 
+    - model: openai/gpt-4.1
+      access_key: $OPENAI_API_KEY
+      routing_preferences:
+        - name: code generation
+          description: generating new code snippets, functions, or boilerplate based on user prompts or requirements
 
 Example Use Cases
 -------------------------
diff --git a/docs/source/resources/includes/arch_config_full_reference.yaml b/docs/source/resources/includes/arch_config_full_reference.yaml
index 5ef2639c..808baff1 100644
--- a/docs/source/resources/includes/arch_config_full_reference.yaml
+++ b/docs/source/resources/includes/arch_config_full_reference.yaml
@@ -30,21 +30,16 @@ endpoints:
 
 # Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way
 llm_providers:
-  - name: OpenAI
-    provider_interface: openai
+  - name: openai/gpt-4o
     access_key: $OPENAI_API_KEY
-    model: gpt-4o
+    model: openai/gpt-4o
     default: true
 
-  - name: Mistral8x7b
-    provider_interface: openai
-    access_key: $MISTRAL_API_KEY
-    model: mistral-8x7b
+  - access_key: $MISTRAL_API_KEY
+    model: mistral/mistral-8x7b
 
-  - name: MistralLocal7b
-    provider_interface: openai
-    model: mistral-7b-instruct
-    endpoint: mistral_local
+  - model: mistral/mistral-7b-instruct
+    base_url: http://mistral_local
 
 # provides a way to override default settings for the arch system
 overrides:
diff --git a/docs/source/resources/includes/arch_config_full_reference_rendered.yaml b/docs/source/resources/includes/arch_config_full_reference_rendered.yaml
new file mode 100644
index 00000000..503f6a80
--- /dev/null
+++ b/docs/source/resources/includes/arch_config_full_reference_rendered.yaml
@@ -0,0 +1,81 @@
+endpoints:
+  app_server:
+    connect_timeout: 0.005s
+    endpoint: 127.0.0.1
+    port: 80
+  error_target:
+    endpoint: error_target_1
+    port: 80
+  mistral_local:
+    endpoint: 127.0.0.1
+    port: 8001
+listeners:
+  egress_traffic:
+    address: 0.0.0.0
+    message_format: openai
+    port: 12000
+    timeout: 5s
+  ingress_traffic:
+    address: 0.0.0.0
+    message_format: openai
+    port: 10000
+    timeout: 5s
+llm_providers:
+- access_key: $OPENAI_API_KEY
+  default: true
+  model: gpt-4o
+  name: openai/gpt-4o
+  provider_interface: openai
+- access_key: $MISTRAL_API_KEY
+  model: mistral-8x7b
+  name: mistral/mistral-8x7b
+  provider_interface: mistral
+- base_url: http://mistral_local
+  endpoint: mistral_local
+  model: mistral-7b-instruct
+  name: mistral/mistral-7b-instruct
+  port: 80
+  protocol: http
+  provider_interface: mistral
+overrides:
+  prompt_target_intent_matching_threshold: 0.6
+prompt_guards:
+  input_guards:
+    jailbreak:
+      on_exception:
+        message: Looks like you're curious about my abilities, but I can only provide
+          assistance within my programmed parameters.
+prompt_targets:
+- auto_llm_dispatch_on_response: true
+  default: true
+  description: handel all scenarios that are question and answer in nature. Like summarization,
+    information extraction, etc.
+  endpoint:
+    http_method: POST
+    name: app_server
+    path: /agent/summary
+  name: information_extraction
+  system_prompt: You are a helpful information extraction assistant. Use the information
+    that is provided to you.
+- description: Reboot a specific network device
+  endpoint:
+    name: app_server
+    path: /agent/action
+  name: reboot_network_device
+  parameters:
+  - description: Identifier of the network device to reboot.
+    name: device_id
+    required: true
+    type: str
+  - default: false
+    description: Confirmation flag to proceed with reboot.
+    enum:
+    - true
+    - false
+    name: confirmation
+    type: bool
+system_prompt: You are a network assistant that just offers facts; not advice on manufacturers
+  or purchasing decisions.
+tracing:
+  sampling_rate: 0.1
+version: v0.1
diff --git a/model_server/.vscode/launch.json b/model_server/.vscode/launch.json
index ca83be87..19ed7342 100644
--- a/model_server/.vscode/launch.json
+++ b/model_server/.vscode/launch.json
@@ -4,6 +4,7 @@
   // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
   "version": "0.2.0",
   "configurations": [
+
     {
       "name": "model server",
       "type": "debugpy",
diff --git a/model_server/.vscode/settings.json b/model_server/.vscode/settings.json
new file mode 100644
index 00000000..98ba633e
--- /dev/null
+++ b/model_server/.vscode/settings.json
@@ -0,0 +1,7 @@
+{
+  "python.testing.pytestArgs": [
+    "."
+  ],
+  "python.testing.unittestEnabled": false,
+  "python.testing.pytestEnabled": true
+}
diff --git a/tests/archgw/.vscode/launch.json b/tests/archgw/.vscode/launch.json
new file mode 100644
index 00000000..6a211d8e
--- /dev/null
+++ b/tests/archgw/.vscode/launch.json
@@ -0,0 +1,15 @@
+{
+    // Use IntelliSense to learn about possible attributes.
+    // Hover to view descriptions of existing attributes.
+    // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
+    "version": "0.2.0",
+    "configurations": [
+        {
+            "name": "Python Debugger: Current File",
+            "type": "debugpy",
+            "request": "launch",
+            "program": "${file}",
+            "console": "integratedTerminal"
+        }
+    ]
+}
diff --git a/tests/archgw/.vscode/settings.json b/tests/archgw/.vscode/settings.json
new file mode 100644
index 00000000..98ba633e
--- /dev/null
+++ b/tests/archgw/.vscode/settings.json
@@ -0,0 +1,7 @@
+{
+  "python.testing.pytestArgs": [
+    "."
+  ],
+  "python.testing.unittestEnabled": false,
+  "python.testing.pytestEnabled": true
+}
diff --git a/tests/archgw/arch_config.yaml b/tests/archgw/arch_config.yaml
index c702887b..70a95a3e 100644
--- a/tests/archgw/arch_config.yaml
+++ b/tests/archgw/arch_config.yaml
@@ -13,21 +13,15 @@ endpoints:
     connect_timeout: 0.005s
 
 llm_providers:
-  - name: gpt-4o-mini
-    access_key: $OPENAI_API_KEY
-    provider_interface: openai
-    model: gpt-4o-mini
+  - access_key: $OPENAI_API_KEY
+    model: openai/gpt-4o-mini
     default: true
 
-  - name: gpt-3.5-turbo-0125
-    access_key: $OPENAI_API_KEY
-    provider_interface: openai
-    model: gpt-3.5-turbo-0125
+  - access_key: $OPENAI_API_KEY
+    model: openai/gpt-3.5-turbo-0125
 
-  - name: gpt-4o
-    access_key: $OPENAI_API_KEY
-    provider_interface: openai
-    model: gpt-4o
+  - access_key: $OPENAI_API_KEY
+    model: openai/gpt-4o
 
 system_prompt: |
   You are a helpful assistant.
diff --git a/tests/e2e/.vscode/launch.json b/tests/e2e/.vscode/launch.json
new file mode 100644
index 00000000..6a211d8e
--- /dev/null
+++ b/tests/e2e/.vscode/launch.json
@@ -0,0 +1,15 @@
+{
+    // Use IntelliSense to learn about possible attributes.
+    // Hover to view descriptions of existing attributes.
+    // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
+    "version": "0.2.0",
+    "configurations": [
+        {
+            "name": "Python Debugger: Current File",
+            "type": "debugpy",
+            "request": "launch",
+            "program": "${file}",
+            "console": "integratedTerminal"
+        }
+    ]
+}
diff --git a/tests/e2e/.vscode/settings.json b/tests/e2e/.vscode/settings.json
new file mode 100644
index 00000000..98ba633e
--- /dev/null
+++ b/tests/e2e/.vscode/settings.json
@@ -0,0 +1,7 @@
+{
+  "python.testing.pytestArgs": [
+    "."
+  ],
+  "python.testing.unittestEnabled": false,
+  "python.testing.pytestEnabled": true
+}
diff --git a/tests/modelserver/.vscode/launch.json b/tests/modelserver/.vscode/launch.json
new file mode 100644
index 00000000..6a211d8e
--- /dev/null
+++ b/tests/modelserver/.vscode/launch.json
@@ -0,0 +1,15 @@
+{
+    // Use IntelliSense to learn about possible attributes.
+    // Hover to view descriptions of existing attributes.
+    // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
+    "version": "0.2.0",
+    "configurations": [
+        {
+            "name": "Python Debugger: Current File",
+            "type": "debugpy",
+            "request": "launch",
+            "program": "${file}",
+            "console": "integratedTerminal"
+        }
+    ]
+}
diff --git a/tests/modelserver/.vscode/settings.json b/tests/modelserver/.vscode/settings.json
new file mode 100644
index 00000000..98ba633e
--- /dev/null
+++ b/tests/modelserver/.vscode/settings.json
@@ -0,0 +1,7 @@
+{
+  "python.testing.pytestArgs": [
+    "."
+  ],
+  "python.testing.unittestEnabled": false,
+  "python.testing.pytestEnabled": true
+}