diff --git a/.github/workflows/validate_arch_config.yml b/.github/workflows/validate_arch_config.yml
new file mode 100644
index 00000000..9503dad2
--- /dev/null
+++ b/.github/workflows/validate_arch_config.yml
@@ -0,0 +1,31 @@
+name: arch config tests
+
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+
+jobs:
+  validate_arch_config:
+    runs-on: ubuntu-latest
+    defaults:
+      run:
+        working-directory: .
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v3
+
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: "3.12"
+
+      - name: build arch docker image
+        run: |
+          docker build  -f arch/Dockerfile . -t katanemo/archgw
+
+      - name: validate arch config
+        run: |
+          bash arch/validate_arch_config.sh
diff --git a/arch/arch_config_schema.yaml b/arch/arch_config_schema.yaml
index 1b32b730..1432c0b9 100644
--- a/arch/arch_config_schema.yaml
+++ b/arch/arch_config_schema.yaml
@@ -3,21 +3,38 @@ type: object
 properties:
   version:
     type: string
-  listener:
+  listeners:
     type: object
-    properties:
-      address:
-        type: string
-      port:
-        type: integer
-      message_format:
-        type: string
-      connect_timeout:
-        type: string
     additionalProperties: false
-    required:
-      - address
-      - port
+    properties:
+      ingress_traffic:
+        type: object
+        properties:
+          address:
+            type: string
+          port:
+            type: integer
+          message_format:
+            type: string
+            enum:
+              - openai
+          timeout:
+            type: string
+        additionalProperties: false
+      egress_traffic:
+        type: object
+        properties:
+          address:
+            type: string
+          port:
+            type: integer
+          message_format:
+            type: string
+            enum:
+              - openai
+          timeout:
+            type: string
+        additionalProperties: false
   endpoints:
     type: object
     patternProperties:
@@ -107,7 +124,10 @@ properties:
               required:
                 type: boolean
               default:
-                type: string
+                anyOf:
+                  - type: string
+                  - type: integer
+                  - type: boolean
               description:
                 type: string
               type:
@@ -115,7 +135,10 @@ properties:
               enum:
                 type: array
                 items:
-                  type: string
+                  anyOf:
+                    - type: string
+                    - type: integer
+                    - type: boolean
               in_path:
                 type: boolean
               format:
@@ -224,5 +247,4 @@ properties:
 additionalProperties: false
 required:
   - version
-  - listener
   - llm_providers
diff --git a/arch/envoy.template.yaml b/arch/envoy.template.yaml
index 0040b57b..ca722b7c 100644
--- a/arch/envoy.template.yaml
+++ b/arch/envoy.template.yaml
@@ -29,11 +29,11 @@ stats_config:
       - 180000
 static_resources:
   listeners:
-    - name: arch_listener_http
+    - name: ingress_traffic
       address:
         socket_address:
-          address: 0.0.0.0
-          port_value: 10000
+          address: {{ prompt_gateway_listener.address }}
+          port_value: {{ prompt_gateway_listener.port }}
       traffic_direction: INBOUND
       filter_chains:
         - filters:
@@ -55,7 +55,7 @@ static_resources:
                   random_sampling:
                     value: {{ arch_tracing.random_sampling }}
                 {% endif %}
-                stat_prefix: arch_listener_http
+                stat_prefix: ingress_traffic
                 codec_type: AUTO
                 scheme_header_transformation:
                   scheme_to_overwrite: https
@@ -76,13 +76,13 @@ static_resources:
                           route:
                             auto_host_rewrite: true
                             cluster: arch_prompt_gateway_listener
-                            timeout: 60s
+                            timeout: {{ prompt_gateway_listener.timeout }}
                 http_filters:
                   - name: envoy.filters.http.router
                     typed_config:
                       "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
 
-    - name: arch_prompt_gateway_listener
+    - name: ingress_traffic_prompt
       address:
         socket_address:
           address: 0.0.0.0
@@ -104,11 +104,11 @@ static_resources:
                         envoy_grpc:
                           cluster_name: opentelemetry_collector
                         timeout: 0.250s
-                      service_name: prompt_processor
+                      service_name: ingress_traffic
                   random_sampling:
                     value: {{ arch_tracing.random_sampling }}
                 {% endif %}
-                stat_prefix: arch_prompt_gateway_listener
+                stat_prefix: ingress_traffic
                 codec_type: AUTO
                 scheme_header_transformation:
                   scheme_to_overwrite: https
@@ -201,7 +201,7 @@ static_resources:
                     typed_config:
                       "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
 
-    - name: arch_internal
+    - name: egress_api_traffic
       address:
         socket_address:
           address: 0.0.0.0
@@ -223,11 +223,11 @@ static_resources:
                         envoy_grpc:
                           cluster_name: opentelemetry_collector
                         timeout: 0.250s
-                      service_name: prompt_processor
+                      service_name: egress_api_traffic
                   random_sampling:
                     value: {{ arch_tracing.random_sampling }}
                 {% endif %}
-                stat_prefix: arch_internal
+                stat_prefix: egress_api_traffic
                 codec_type: AUTO
                 scheme_header_transformation:
                   scheme_to_overwrite: https
@@ -273,13 +273,12 @@ static_resources:
                     typed_config:
                       "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
 
-
-    - name: arch_listener_http_llm
+    - name: egress_traffic
       address:
         socket_address:
-          address: 0.0.0.0
-          port_value: 12000
-      traffic_direction: INBOUND
+          address: {{ llm_gateway_listener.address }}
+          port_value: {{ llm_gateway_listener.port }}
+      traffic_direction: OUTBOUND
       filter_chains:
         - filters:
             - name: envoy.filters.network.http_connection_manager
@@ -300,7 +299,7 @@ static_resources:
                   random_sampling:
                     value: {{ arch_tracing.random_sampling }}
                 {% endif %}
-                stat_prefix: arch_listener_http
+                stat_prefix: egress_traffic
                 codec_type: AUTO
                 scheme_header_transformation:
                   scheme_to_overwrite: https
@@ -321,14 +320,13 @@ static_resources:
                           route:
                             auto_host_rewrite: true
                             cluster: arch_listener_llm
-                            timeout: 60s
+                            timeout: {{ llm_gateway_listener.timeout }}
                 http_filters:
                   - name: envoy.filters.http.router
                     typed_config:
                       "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
 
-
-    - name: arch_listener_llm
+    - name: egress_traffic_llm
       address:
         socket_address:
           address: 0.0.0.0
@@ -349,11 +347,11 @@ static_resources:
                         envoy_grpc:
                           cluster_name: opentelemetry_collector
                         timeout: 0.250s
-                      service_name: llm_gateway
+                      service_name: egress_traffic_llm
                   random_sampling:
                     value: {{ arch_tracing.random_sampling }}
                 {% endif %}
-                stat_prefix: arch_listener_http
+                stat_prefix: egress_traffic
                 codec_type: AUTO
                 scheme_header_transformation:
                   scheme_to_overwrite: https
@@ -443,7 +441,7 @@ static_resources:
 
   clusters:
     - name: openai
-      connect_timeout: 5s
+      connect_timeout: 0.5s
       type: LOGICAL_DNS
       dns_lookup_family: V4_ONLY
       lb_policy: ROUND_ROBIN
@@ -467,7 +465,7 @@ static_resources:
               tls_minimum_protocol_version: TLSv1_2
               tls_maximum_protocol_version: TLSv1_3
     - name: mistral
-      connect_timeout: 5s
+      connect_timeout: 0.5s
       type: LOGICAL_DNS
       dns_lookup_family: V4_ONLY
       lb_policy: ROUND_ROBIN
@@ -488,7 +486,7 @@ static_resources:
           sni: api.mistral.ai
     {% for internal_cluster in ["arch_fc", "model_server"] %}
     - name: {{ internal_cluster }}
-      connect_timeout: 5s
+      connect_timeout: 0.5s
       type: STRICT_DNS
       dns_lookup_family: V4_ONLY
       lb_policy: ROUND_ROBIN
@@ -504,7 +502,7 @@ static_resources:
                   hostname: {{ internal_cluster }}
     {% endfor %}
     - name: mistral_7b_instruct
-      connect_timeout: 5s
+      connect_timeout: 0.5s
       type: STRICT_DNS
       dns_lookup_family: V4_ONLY
       lb_policy: ROUND_ROBIN
@@ -523,7 +521,7 @@ static_resources:
       {% if cluster.connect_timeout -%}
       connect_timeout: {{ cluster.connect_timeout }}
       {% else -%}
-      connect_timeout: 5s
+      connect_timeout: 0.5s
       {% endif -%}
       type: LOGICAL_DNS
       dns_lookup_family: V4_ONLY
@@ -557,7 +555,7 @@ static_resources:
 
 {% for local_llm_provider in local_llms %}
     - name: {{ local_llm_provider.name }}
-      connect_timeout: 5s
+      connect_timeout: 0.5s
       type: LOGICAL_DNS
       dns_lookup_family: V4_ONLY
       lb_policy: ROUND_ROBIN
@@ -589,7 +587,7 @@ static_resources:
 
 {% endfor %}
     - name: arch_internal
-      connect_timeout: 5s
+      connect_timeout: 0.5s
       type: LOGICAL_DNS
       dns_lookup_family: V4_ONLY
       lb_policy: ROUND_ROBIN
@@ -605,7 +603,7 @@ static_resources:
                   hostname: arch_internal
 
     - name: arch_prompt_gateway_listener
-      connect_timeout: 5s
+      connect_timeout: 0.5s
       type: LOGICAL_DNS
       dns_lookup_family: V4_ONLY
       lb_policy: ROUND_ROBIN
@@ -621,7 +619,7 @@ static_resources:
                   hostname: arch_prompt_gateway_listener
 
     - name: arch_listener_llm
-      connect_timeout: 5s
+      connect_timeout: 0.5s
       type: LOGICAL_DNS
       dns_lookup_family: V4_ONLY
       lb_policy: ROUND_ROBIN
diff --git a/arch/tools/cli/config_generator.py b/arch/tools/cli/config_generator.py
index 447585fb..7392849e 100644
--- a/arch/tools/cli/config_generator.py
+++ b/arch/tools/cli/config_generator.py
@@ -104,7 +104,27 @@ def validate_and_render_schema():
     arch_config_string = yaml.dump(config_yaml)
     arch_llm_config_string = yaml.dump(config_yaml)
 
+    prompt_gateway_listener = config_yaml.get("listeners", {}).get(
+        "ingress_traffic", {}
+    )
+    if prompt_gateway_listener.get("port") == None:
+        prompt_gateway_listener["port"] = 10000  # default port for prompt gateway
+    if prompt_gateway_listener.get("address") == None:
+        prompt_gateway_listener["address"] = "127.0.0.1"
+    if prompt_gateway_listener.get("timeout") == None:
+        prompt_gateway_listener["timeout"] = "10s"
+
+    llm_gateway_listener = config_yaml.get("listeners", {}).get("egress_traffic", {})
+    if llm_gateway_listener.get("port") == None:
+        llm_gateway_listener["port"] = 12000  # default port for llm gateway
+    if llm_gateway_listener.get("address") == None:
+        llm_gateway_listener["address"] = "127.0.0.1"
+    if llm_gateway_listener.get("timeout") == None:
+        llm_gateway_listener["timeout"] = "10s"
+
     data = {
+        "prompt_gateway_listener": prompt_gateway_listener,
+        "llm_gateway_listener": llm_gateway_listener,
         "arch_config": arch_config_string,
         "arch_llm_config": arch_llm_config_string,
         "arch_clusters": inferred_clusters,
diff --git a/arch/tools/cli/core.py b/arch/tools/cli/core.py
index 78d9499c..291ca808 100644
--- a/arch/tools/cli/core.py
+++ b/arch/tools/cli/core.py
@@ -2,6 +2,8 @@ import subprocess
 import os
 import time
 import sys
+
+import yaml
 from cli.utils import getLogger
 from cli.consts import (
     ARCHGW_DOCKER_NAME,
@@ -22,6 +24,29 @@ from cli.docker_cli import (
 log = getLogger(__name__)
 
 
+def _get_gateway_ports(arch_config_file: str) -> tuple:
+    PROMPT_GATEWAY_DEFAULT_PORT = 10000
+    LLM_GATEWAY_DEFAULT_PORT = 12000
+
+    # parse arch_config_file yaml file and get prompt_gateway_port
+    arch_config_dict = {}
+    with open(arch_config_file) as f:
+        arch_config_dict = yaml.safe_load(f)
+
+    prompt_gateway_port = (
+        arch_config_dict.get("listeners", {})
+        .get("ingress_traffic", {})
+        .get("port", PROMPT_GATEWAY_DEFAULT_PORT)
+    )
+    llm_gateway_port = (
+        arch_config_dict.get("listeners", {})
+        .get("egress_traffic", {})
+        .get("port", LLM_GATEWAY_DEFAULT_PORT)
+    )
+
+    return prompt_gateway_port, llm_gateway_port
+
+
 def start_arch(arch_config_file, env, log_timeout=120, foreground=False):
     """
     Start Docker Compose in detached mode and stream logs until services are healthy.
@@ -39,8 +64,14 @@ def start_arch(arch_config_file, env, log_timeout=120, foreground=False):
             docker_stop_container(ARCHGW_DOCKER_NAME)
             docker_remove_container(ARCHGW_DOCKER_NAME)
 
+        prompt_gateway_port, llm_gateway_port = _get_gateway_ports(arch_config_file)
+
         return_code, _, archgw_stderr = docker_start_archgw_detached(
-            arch_config_file, os.path.expanduser("~/archgw_logs"), env
+            arch_config_file,
+            os.path.expanduser("~/archgw_logs"),
+            env,
+            prompt_gateway_port,
+            llm_gateway_port,
         )
         if return_code != 0:
             log.info("Failed to start arch gateway: " + str(return_code))
@@ -50,7 +81,7 @@ def start_arch(arch_config_file, env, log_timeout=120, foreground=False):
         start_time = time.time()
         while True:
             health_check_status = health_check_endpoint(
-                "http://localhost:10000/healthz"
+                f"http://localhost:{prompt_gateway_port}/healthz"
             )
             archgw_status = docker_container_status(ARCHGW_DOCKER_NAME)
             current_time = time.time()
diff --git a/arch/tools/cli/docker_cli.py b/arch/tools/cli/docker_cli.py
index 2ef9eb5f..edb8f764 100644
--- a/arch/tools/cli/docker_cli.py
+++ b/arch/tools/cli/docker_cli.py
@@ -1,7 +1,7 @@
 import subprocess
 import json
 import sys
-import requests  # Add this import
+import requests
 
 from cli.consts import ARCHGW_DOCKER_IMAGE, ARCHGW_DOCKER_NAME
 from cli.utils import getLogger
@@ -33,11 +33,19 @@ def docker_remove_container(container: str) -> str:
 
 
 def docker_start_archgw_detached(
-    arch_config_file: str, logs_path_abs: str, env: dict
+    arch_config_file: str,
+    logs_path_abs: str,
+    env: dict,
+    prompt_gateway_port,
+    llm_gateway_port,
 ) -> str:
     env_args = [item for key, value in env.items() for item in ["-e", f"{key}={value}"]]
 
-    port_mappings = ["10000:10000", "12000:12000", "9901:19901"]
+    port_mappings = [
+        f"{prompt_gateway_port}:{prompt_gateway_port}",
+        f"{llm_gateway_port}:{llm_gateway_port}",
+        "9901:19901",
+    ]
     port_mappings_args = [item for port in port_mappings for item in ("-p", port)]
 
     volume_mappings = [
diff --git a/arch/tools/poetry.lock b/arch/tools/poetry.lock
index e04c6db3..0ccc45d3 100644
--- a/arch/tools/poetry.lock
+++ b/arch/tools/poetry.lock
@@ -318,6 +318,28 @@ files = [
     {file = "distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed"},
 ]
 
+[[package]]
+name = "docker"
+version = "7.1.0"
+description = "A Python library for the Docker Engine API."
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "docker-7.1.0-py3-none-any.whl", hash = "sha256:c96b93b7f0a746f9e77d325bcfb87422a3d8bd4f03136ae8a85b37f1898d5fc0"},
+    {file = "docker-7.1.0.tar.gz", hash = "sha256:ad8c70e6e3f8926cb8a92619b832b4ea5299e2831c14284663184e200546fa6c"},
+]
+
+[package.dependencies]
+pywin32 = {version = ">=304", markers = "sys_platform == \"win32\""}
+requests = ">=2.26.0"
+urllib3 = ">=1.26.0"
+
+[package.extras]
+dev = ["coverage (==7.2.7)", "pytest (==7.4.2)", "pytest-cov (==4.1.0)", "pytest-timeout (==2.1.0)", "ruff (==0.1.8)"]
+docs = ["myst-parser (==0.18.0)", "sphinx (==5.1.1)"]
+ssh = ["paramiko (>=2.4.3)"]
+websockets = ["websocket-client (>=1.3.0)"]
+
 [[package]]
 name = "exceptiongroup"
 version = "1.2.2"
@@ -1602,6 +1624,20 @@ files = [
 [package.dependencies]
 six = ">=1.5"
 
+[[package]]
+name = "python-dotenv"
+version = "1.0.1"
+description = "Read key-value pairs from a .env file and set them as environment variables"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "python-dotenv-1.0.1.tar.gz", hash = "sha256:e324ee90a023d808f1959c46bcbc04446a10ced277783dc6ee09987c37ec10ca"},
+    {file = "python_dotenv-1.0.1-py3-none-any.whl", hash = "sha256:f7b63ef50f1b690dddf550d03497b66d609393b40b564ed0d674909a68ebf16a"},
+]
+
+[package.extras]
+cli = ["click (>=5.0)"]
+
 [[package]]
 name = "pytz"
 version = "2025.1"
@@ -1613,6 +1649,33 @@ files = [
     {file = "pytz-2025.1.tar.gz", hash = "sha256:c2db42be2a2518b28e65f9207c4d05e6ff547d1efa4086469ef855e4ab70178e"},
 ]
 
+[[package]]
+name = "pywin32"
+version = "308"
+description = "Python for Window Extensions"
+optional = false
+python-versions = "*"
+files = [
+    {file = "pywin32-308-cp310-cp310-win32.whl", hash = "sha256:796ff4426437896550d2981b9c2ac0ffd75238ad9ea2d3bfa67a1abd546d262e"},
+    {file = "pywin32-308-cp310-cp310-win_amd64.whl", hash = "sha256:4fc888c59b3c0bef905ce7eb7e2106a07712015ea1c8234b703a088d46110e8e"},
+    {file = "pywin32-308-cp310-cp310-win_arm64.whl", hash = "sha256:a5ab5381813b40f264fa3495b98af850098f814a25a63589a8e9eb12560f450c"},
+    {file = "pywin32-308-cp311-cp311-win32.whl", hash = "sha256:5d8c8015b24a7d6855b1550d8e660d8daa09983c80e5daf89a273e5c6fb5095a"},
+    {file = "pywin32-308-cp311-cp311-win_amd64.whl", hash = "sha256:575621b90f0dc2695fec346b2d6302faebd4f0f45c05ea29404cefe35d89442b"},
+    {file = "pywin32-308-cp311-cp311-win_arm64.whl", hash = "sha256:100a5442b7332070983c4cd03f2e906a5648a5104b8a7f50175f7906efd16bb6"},
+    {file = "pywin32-308-cp312-cp312-win32.whl", hash = "sha256:587f3e19696f4bf96fde9d8a57cec74a57021ad5f204c9e627e15c33ff568897"},
+    {file = "pywin32-308-cp312-cp312-win_amd64.whl", hash = "sha256:00b3e11ef09ede56c6a43c71f2d31857cf7c54b0ab6e78ac659497abd2834f47"},
+    {file = "pywin32-308-cp312-cp312-win_arm64.whl", hash = "sha256:9b4de86c8d909aed15b7011182c8cab38c8850de36e6afb1f0db22b8959e3091"},
+    {file = "pywin32-308-cp313-cp313-win32.whl", hash = "sha256:1c44539a37a5b7b21d02ab34e6a4d314e0788f1690d65b48e9b0b89f31abbbed"},
+    {file = "pywin32-308-cp313-cp313-win_amd64.whl", hash = "sha256:fd380990e792eaf6827fcb7e187b2b4b1cede0585e3d0c9e84201ec27b9905e4"},
+    {file = "pywin32-308-cp313-cp313-win_arm64.whl", hash = "sha256:ef313c46d4c18dfb82a2431e3051ac8f112ccee1a34f29c263c583c568db63cd"},
+    {file = "pywin32-308-cp37-cp37m-win32.whl", hash = "sha256:1f696ab352a2ddd63bd07430080dd598e6369152ea13a25ebcdd2f503a38f1ff"},
+    {file = "pywin32-308-cp37-cp37m-win_amd64.whl", hash = "sha256:13dcb914ed4347019fbec6697a01a0aec61019c1046c2b905410d197856326a6"},
+    {file = "pywin32-308-cp38-cp38-win32.whl", hash = "sha256:5794e764ebcabf4ff08c555b31bd348c9025929371763b2183172ff4708152f0"},
+    {file = "pywin32-308-cp38-cp38-win_amd64.whl", hash = "sha256:3b92622e29d651c6b783e368ba7d6722b1634b8e70bd376fd7610fe1992e19de"},
+    {file = "pywin32-308-cp39-cp39-win32.whl", hash = "sha256:7873ca4dc60ab3287919881a7d4f88baee4a6e639aa6962de25a98ba6b193341"},
+    {file = "pywin32-308-cp39-cp39-win_amd64.whl", hash = "sha256:71b3322d949b4cc20776436a9c9ba0eeedcbc9c650daa536df63f0ff111bb920"},
+]
+
 [[package]]
 name = "pyyaml"
 version = "6.0.2"
@@ -2481,4 +2544,4 @@ type = ["pytest-mypy"]
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.10"
-content-hash = "d4fb144073d4f8abcd8972892545d8ce47692d303ffa63dfe9b9bcdd0aea96f2"
+content-hash = "d02e43f0884294d48736e1b8df248f47af480baffcbb7a0194da4e16cc1ea502"
diff --git a/arch/tools/pyproject.toml b/arch/tools/pyproject.toml
index c32ea929..8ce28cbc 100644
--- a/arch/tools/pyproject.toml
+++ b/arch/tools/pyproject.toml
@@ -15,6 +15,9 @@ click = "^8.1.7"
 jinja2 = "^3.1.4"
 jsonschema = "^4.23.0"
 setuptools = "75.5.0"
+docker = "^7.1.0"
+python-dotenv = "^1.0.1"
+pyyaml = "^6.0.2"
 
 [tool.poetry.scripts]
 archgw = "cli.main:main"
diff --git a/arch/validate_arch_config.sh b/arch/validate_arch_config.sh
new file mode 100644
index 00000000..a3822e90
--- /dev/null
+++ b/arch/validate_arch_config.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+
+failed_files=()
+
+for file in $(find . -name arch_config.yaml -o -name arch_config_full_reference.yaml); do
+  echo "Validating $file..."
+  if ! docker run --rm -v "$(pwd)/$file:/app/arch_config.yaml:ro" --entrypoint /bin/sh katanemo/archgw:latest -c "python config_generator.py" 2>&1 > /dev/null ; then
+    echo "Validation failed for $file"
+    failed_files+=("$file")
+  fi
+done
+
+# Print summary of failed files
+if [ ${#failed_files[@]} -ne 0 ]; then
+  echo -e "\nValidation failed for the following files:"
+  printf '%s\n' "${failed_files[@]}"
+  exit 1
+else
+  echo -e "\nAll files validated successfully!"
+fi
diff --git a/crates/common/src/configuration.rs b/crates/common/src/configuration.rs
index 069695ba..1b954c3d 100644
--- a/crates/common/src/configuration.rs
+++ b/crates/common/src/configuration.rs
@@ -9,7 +9,6 @@ use crate::api::open_ai::{
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct Configuration {
     pub version: String,
-    pub listener: Listener,
     pub endpoints: Option<HashMap<String, Endpoint>>,
     pub llm_providers: Vec<LlmProvider>,
     pub overrides: Option<Overrides>,
@@ -48,32 +47,6 @@ pub struct ErrorTargetDetail {
     pub endpoint: Option<EndpointDetails>,
 }
 
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct Listener {
-    pub address: String,
-    pub port: u16,
-    pub message_format: MessageFormat,
-    // pub connect_timeout: Option<DurationString>,
-}
-
-impl Default for Listener {
-    fn default() -> Self {
-        Listener {
-            address: "".to_string(),
-            port: 0,
-            message_format: MessageFormat::default(),
-            // connect_timeout: None,
-        }
-    }
-}
-
-#[derive(Debug, Clone, Serialize, Deserialize, Default)]
-pub enum MessageFormat {
-    #[serde(rename = "huggingface")]
-    #[default]
-    Huggingface,
-}
-
 #[derive(Debug, Clone, Serialize, Deserialize, Default)]
 pub struct PromptGuards {
     pub input_guards: HashMap<GuardType, GuardOptions>,
@@ -353,16 +326,6 @@ mod test {
             Some("/agent/summary".to_string())
         );
 
-        let error_target = config.error_target.as_ref().unwrap();
-        assert_eq!(
-            error_target.endpoint.as_ref().unwrap().name,
-            "error_target_1".to_string()
-        );
-        assert_eq!(
-            error_target.endpoint.as_ref().unwrap().path,
-            Some("/error".to_string())
-        );
-
         let tracing = config.tracing.as_ref().unwrap();
         assert_eq!(tracing.sampling_rate.unwrap(), 0.1);
 
diff --git a/crates/common/src/consts.rs b/crates/common/src/consts.rs
index 561dbae3..cd52220e 100644
--- a/crates/common/src/consts.rs
+++ b/crates/common/src/consts.rs
@@ -3,7 +3,10 @@ pub const SYSTEM_ROLE: &str = "system";
 pub const USER_ROLE: &str = "user";
 pub const TOOL_ROLE: &str = "tool";
 pub const ASSISTANT_ROLE: &str = "assistant";
-pub const ARCH_FC_REQUEST_TIMEOUT_MS: u64 = 120000; // 2 minutes
+pub const ARCH_FC_REQUEST_TIMEOUT_MS: u64 = 30000; // 30 seconds
+pub const DEFAULT_TARGET_REQUEST_TIMEOUT_MS: u64 = 30000; // 30 seconds
+pub const API_REQUEST_TIMEOUT_MS: u64 = 30000; // 30 seconds
+pub const MODEL_SERVER_REQUEST_TIMEOUT_MS: u64 = 30000; // 30 seconds
 pub const MODEL_SERVER_NAME: &str = "model_server";
 pub const ARCH_ROUTING_HEADER: &str = "x-arch-llm-provider";
 pub const MESSAGES_KEY: &str = "messages";
diff --git a/crates/common/src/tracing.rs b/crates/common/src/tracing.rs
index 85a95b0f..363a0870 100644
--- a/crates/common/src/tracing.rs
+++ b/crates/common/src/tracing.rs
@@ -166,7 +166,7 @@ impl TraceData {
                 attributes: vec![Attribute {
                     key: "service.name".to_string(),
                     value: AttributeValue {
-                        string_value: Some("upstream-llm".to_string()),
+                        string_value: Some("egress_llm_traffic".to_string()),
                     },
                 }],
             };
diff --git a/crates/llm_gateway/src/stream_context.rs b/crates/llm_gateway/src/stream_context.rs
index 528358a3..69496a61 100644
--- a/crates/llm_gateway/src/stream_context.rs
+++ b/crates/llm_gateway/src/stream_context.rs
@@ -381,7 +381,7 @@ impl HttpContext for StreamContext {
                     Ok(traceparent) => {
                         let mut trace_data = common::tracing::TraceData::new();
                         let mut llm_span = Span::new(
-                            "upstream_llm_time".to_string(),
+                            "egress_traffic".to_string(),
                             Some(traceparent.trace_id),
                             Some(traceparent.parent_id),
                             self.request_body_sent_time.unwrap(),
diff --git a/crates/prompt_gateway/src/http_context.rs b/crates/prompt_gateway/src/http_context.rs
index 1ff7f91d..53a2d25b 100644
--- a/crates/prompt_gateway/src/http_context.rs
+++ b/crates/prompt_gateway/src/http_context.rs
@@ -6,7 +6,8 @@ use common::{
     consts::{
         ARCH_FC_MODEL_NAME, ARCH_INTERNAL_CLUSTER_NAME, ARCH_STATE_HEADER,
         ARCH_UPSTREAM_HOST_HEADER, ASSISTANT_ROLE, CHAT_COMPLETIONS_PATH, HEALTHZ_PATH,
-        MODEL_SERVER_NAME, REQUEST_ID_HEADER, TOOL_ROLE, TRACE_PARENT_HEADER, USER_ROLE,
+        MODEL_SERVER_NAME, MODEL_SERVER_REQUEST_TIMEOUT_MS, REQUEST_ID_HEADER, TOOL_ROLE,
+        TRACE_PARENT_HEADER, USER_ROLE,
     },
     errors::ServerError,
     http::{CallArgs, Client},
@@ -144,7 +145,10 @@ impl HttpContext for StreamContext {
                 if metadata.is_none() {
                     metadata = Some(HashMap::new());
                 }
-                metadata.as_mut().unwrap().insert("optimize_context_window".to_string(), "true".to_string());
+                metadata
+                    .as_mut()
+                    .unwrap()
+                    .insert("optimize_context_window".to_string(), "true".to_string());
             }
         }
 
@@ -170,12 +174,15 @@ impl HttpContext for StreamContext {
         debug!("sending request to model server");
         trace!("request body: {}", json_data);
 
+        let timeout_str = MODEL_SERVER_REQUEST_TIMEOUT_MS.to_string();
+
         let mut headers = vec![
             (ARCH_UPSTREAM_HOST_HEADER, MODEL_SERVER_NAME),
             (":method", "POST"),
             (":path", "/function_calling"),
             ("content-type", "application/json"),
             (":authority", MODEL_SERVER_NAME),
+            ("x-envoy-upstream-rq-timeout-ms", timeout_str.as_str()),
         ];
 
         if self.request_id.is_some() {
diff --git a/crates/prompt_gateway/src/stream_context.rs b/crates/prompt_gateway/src/stream_context.rs
index e6db7f59..d197b3e0 100644
--- a/crates/prompt_gateway/src/stream_context.rs
+++ b/crates/prompt_gateway/src/stream_context.rs
@@ -6,9 +6,9 @@ use common::api::open_ai::{
 };
 use common::configuration::{Overrides, PromptTarget, Tracing};
 use common::consts::{
-    ARCH_FC_MODEL_NAME, ARCH_FC_REQUEST_TIMEOUT_MS, ARCH_INTERNAL_CLUSTER_NAME,
-    ARCH_UPSTREAM_HOST_HEADER, ASSISTANT_ROLE, MESSAGES_KEY, REQUEST_ID_HEADER, SYSTEM_ROLE,
-    TOOL_ROLE, TRACE_PARENT_HEADER, USER_ROLE,
+    API_REQUEST_TIMEOUT_MS, ARCH_FC_MODEL_NAME, ARCH_INTERNAL_CLUSTER_NAME,
+    ARCH_UPSTREAM_HOST_HEADER, ASSISTANT_ROLE, DEFAULT_TARGET_REQUEST_TIMEOUT_MS, MESSAGES_KEY,
+    REQUEST_ID_HEADER, SYSTEM_ROLE, TOOL_ROLE, TRACE_PARENT_HEADER, USER_ROLE,
 };
 use common::errors::ServerError;
 use common::http::{CallArgs, Client};
@@ -89,7 +89,7 @@ impl StreamContext {
             streaming_response: false,
             user_prompt: None,
             is_chat_completions_request: false,
-            overrides: overrides,
+            overrides,
             request_id: None,
             traceparent: None,
             _tracing: tracing,
@@ -160,7 +160,7 @@ impl StreamContext {
                             callout_context.request_body.messages.clone(),
                         );
                         let arch_messages_json = serde_json::to_string(&params).unwrap();
-                        let timeout_str = ARCH_FC_REQUEST_TIMEOUT_MS.to_string();
+                        let timeout_str = DEFAULT_TARGET_REQUEST_TIMEOUT_MS.to_string();
 
                         let mut headers = vec![
                             (":method", "POST"),
@@ -302,6 +302,8 @@ impl StreamContext {
             }
         };
 
+        let timeout_str = API_REQUEST_TIMEOUT_MS.to_string();
+
         let http_method_str = http_method.to_string();
         let mut headers: HashMap<_, _> = [
             (ARCH_UPSTREAM_HOST_HEADER, endpoint_details.name.as_str()),
@@ -310,6 +312,7 @@ impl StreamContext {
             (":authority", endpoint_details.name.as_str()),
             ("content-type", "application/json"),
             ("x-envoy-max-retries", "3"),
+            ("x-envoy-upstream-rq-timeout-ms", timeout_str.as_str()),
         ]
         .into_iter()
         .collect();
diff --git a/crates/prompt_gateway/tests/integration.rs b/crates/prompt_gateway/tests/integration.rs
index 4af98166..0792a319 100644
--- a/crates/prompt_gateway/tests/integration.rs
+++ b/crates/prompt_gateway/tests/integration.rs
@@ -81,10 +81,11 @@ fn normal_flow(module: &mut Tester, filter_context: i32, http_context: i32) {
                 (":path", "/function_calling"),
                 ("content-type", "application/json"),
                 (":authority", "model_server"),
+                ("x-envoy-upstream-rq-timeout-ms", "30000"),
             ]),
             None,
             None,
-            None,
+            Some(5000),
         )
         .returning(Some(1))
         .expect_log(Some(LogLevel::Trace), None)
@@ -387,10 +388,11 @@ fn prompt_gateway_request_to_llm_gateway() {
                 (":authority", "api_server"),
                 ("x-envoy-max-retries", "3"),
                 (":path", "/weather"),
+                ("x-envoy-upstream-rq-timeout-ms", "30000"),
             ]),
             Some(expected_body),
             None,
-            None,
+            Some(5000),
         )
         .returning(Some(2))
         .expect_metric_increment("active_http_calls", 1)
diff --git a/demos/samples_java/weather_forcecast_service/arch_config.yaml b/demos/samples_java/weather_forcecast_service/arch_config.yaml
index 10c22819..0e3007e2 100644
--- a/demos/samples_java/weather_forcecast_service/arch_config.yaml
+++ b/demos/samples_java/weather_forcecast_service/arch_config.yaml
@@ -1,8 +1,10 @@
 version: v0.1
-listener:
-  address: 127.0.0.1
-  port: 10000 #If you configure port 443, you'll need to update the listener with tls_certificates
-  message_format: huggingface
+listeners:
+  ingress_traffic:
+    address: 0.0.0.0
+    port: 10000
+    message_format: openai
+    timeout: 30s
 
 # Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way
 llm_providers:
diff --git a/demos/samples_python/currency_exchange/arch_config.yaml b/demos/samples_python/currency_exchange/arch_config.yaml
index 89a9e65b..1475abca 100644
--- a/demos/samples_python/currency_exchange/arch_config.yaml
+++ b/demos/samples_python/currency_exchange/arch_config.yaml
@@ -1,10 +1,11 @@
 version: v0.1
 
-listener:
-  address: 0.0.0.0
-  port: 10000
-  message_format: huggingface
-  connect_timeout: 0.005s
+listeners:
+  ingress_traffic:
+    address: 0.0.0.0
+    port: 10000
+    message_format: openai
+    timeout: 30s
 
 llm_providers:
   - name: gpt-4o
diff --git a/demos/samples_python/human_resources_agent/arch_config.yaml b/demos/samples_python/human_resources_agent/arch_config.yaml
index 09264821..5b1a9aa2 100644
--- a/demos/samples_python/human_resources_agent/arch_config.yaml
+++ b/demos/samples_python/human_resources_agent/arch_config.yaml
@@ -1,8 +1,10 @@
 version: v0.1
-listener:
-  address: 127.0.0.1
-  port: 8080 #If you configure port 443, you'll need to update the listener with tls_certificates
-  message_format: huggingface
+listeners:
+  ingress_traffic:
+    address: 0.0.0.0
+    port: 10000
+    message_format: openai
+    timeout: 30s
 
 # Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way
 llm_providers:
diff --git a/demos/samples_python/multi_turn_rag_agent/arch_config.yaml b/demos/samples_python/multi_turn_rag_agent/arch_config.yaml
index 1399965f..8e8feb4f 100644
--- a/demos/samples_python/multi_turn_rag_agent/arch_config.yaml
+++ b/demos/samples_python/multi_turn_rag_agent/arch_config.yaml
@@ -1,10 +1,11 @@
 version: v0.1
 
-listener:
-  address: 127.0.0.1
-  port: 10000
-  message_format: huggingface
-  connect_timeout: 0.005s
+listeners:
+  ingress_traffic:
+    address: 0.0.0.0
+    port: 10000
+    message_format: openai
+    timeout: 30s
 
 endpoints:
   rag_energy_source_agent:
diff --git a/demos/samples_python/network_switch_operator_agent/arch_config.yaml b/demos/samples_python/network_switch_operator_agent/arch_config.yaml
index ad3bfae5..40d529a2 100644
--- a/demos/samples_python/network_switch_operator_agent/arch_config.yaml
+++ b/demos/samples_python/network_switch_operator_agent/arch_config.yaml
@@ -1,8 +1,10 @@
 version: v0.1
-listener:
-  address: 127.0.0.1
-  port: 8080 #If you configure port 443, you'll need to update the listener with tls_certificates
-  message_format: huggingface
+listeners:
+  ingress_traffic:
+    address: 0.0.0.0
+    port: 10000
+    message_format: openai
+    timeout: 30s
 
 # Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way
 llm_providers:
diff --git a/demos/samples_python/stock_quote/arch_config.yaml b/demos/samples_python/stock_quote/arch_config.yaml
index c763d4ca..96901620 100644
--- a/demos/samples_python/stock_quote/arch_config.yaml
+++ b/demos/samples_python/stock_quote/arch_config.yaml
@@ -1,10 +1,11 @@
 version: v0.1
 
-listener:
-  address: 0.0.0.0
-  port: 10000
-  message_format: huggingface
-  connect_timeout: 0.005s
+listeners:
+  ingress_traffic:
+    address: 0.0.0.0
+    port: 10000
+    message_format: openai
+    timeout: 30s
 
 llm_providers:
   - name: gpt-4o
diff --git a/demos/samples_python/weather_forecast/arch_config.yaml b/demos/samples_python/weather_forecast/arch_config.yaml
index 94a6bdfb..8b0f4ca0 100644
--- a/demos/samples_python/weather_forecast/arch_config.yaml
+++ b/demos/samples_python/weather_forecast/arch_config.yaml
@@ -1,10 +1,11 @@
 version: "0.1-beta"
 
-listener:
-  address: 0.0.0.0
-  port: 10000
-  message_format: huggingface
-  connect_timeout: 0.005s
+listeners:
+  ingress_traffic:
+    address: 0.0.0.0
+    port: 10000
+    message_format: openai
+    timeout: 30s
 
 endpoints:
   weather_forecast_service:
diff --git a/demos/use_cases/llm_routing/arch_config.yaml b/demos/use_cases/llm_routing/arch_config.yaml
index f7ce78cd..e3238484 100644
--- a/demos/use_cases/llm_routing/arch_config.yaml
+++ b/demos/use_cases/llm_routing/arch_config.yaml
@@ -1,10 +1,11 @@
 version: "0.1-beta"
 
-listener:
-  address: 0.0.0.0
-  port: 10000
-  message_format: huggingface
-  connect_timeout: 0.005s
+listeners:
+  egress_traffic:
+    address: 0.0.0.0
+    port: 12000
+    message_format: openai
+    timeout: 30s
 
 llm_providers:
   - name: gpt-4o-mini
diff --git a/demos/use_cases/ollama/arch_config.yaml b/demos/use_cases/ollama/arch_config.yaml
index 5cb77750..394d3d6c 100644
--- a/demos/use_cases/ollama/arch_config.yaml
+++ b/demos/use_cases/ollama/arch_config.yaml
@@ -1,10 +1,11 @@
 version: v0.1
 
-listener:
-  address: 0.0.0.0
-  port: 10000
-  message_format: huggingface
-  connect_timeout: 0.005s
+listeners:
+  ingress_traffic:
+    address: 0.0.0.0
+    port: 10000
+    message_format: openai
+    timeout: 30s
 
 llm_providers:
 
diff --git a/demos/use_cases/spotify_bearer_auth/arch_config.yaml b/demos/use_cases/spotify_bearer_auth/arch_config.yaml
index a259a539..1d82a426 100644
--- a/demos/use_cases/spotify_bearer_auth/arch_config.yaml
+++ b/demos/use_cases/spotify_bearer_auth/arch_config.yaml
@@ -1,8 +1,10 @@
 version: v0.1
-listener:
-  address: 127.0.0.1
-  port: 8080 #If you configure port 443, you'll need to update the listener with tls_certificates
-  message_format: huggingface
+listeners:
+  ingress_traffic:
+    address: 0.0.0.0
+    port: 10000
+    message_format: openai
+    timeout: 30s
 
 overrides:
   optimize_context_window: true
diff --git a/docs/source/concepts/includes/arch_config.yaml b/docs/source/concepts/includes/arch_config.yaml
index c78f35f7..a7d0a289 100644
--- a/docs/source/concepts/includes/arch_config.yaml
+++ b/docs/source/concepts/includes/arch_config.yaml
@@ -1,10 +1,11 @@
 version: v0.1
 
-listener:
-  address: 0.0.0.0 # or 127.0.0.1
-  port: 10000
-  # Defines how Arch should parse the content from application/json or text/pain Content-type in the http request
-  message_format: huggingface
+listeners:
+  ingress_traffic:
+    address: 0.0.0.0
+    port: 10000
+    message_format: openai
+    timeout: 30s
 
 # Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way
 llm_providers:
@@ -13,7 +14,6 @@ llm_providers:
     access_key: $OPENAI_API_KEY
     model: gpt-4o
     default: true
-    stream: true
 
 # default system prompt used by all prompt targets
 system_prompt: You are a network assistant that just offers facts; not advice on manufacturers or purchasing decisions.
@@ -52,11 +52,6 @@ prompt_targets:
         default: false
         enum: [true, false]
 
-error_target:
-  endpoint:
-    name: error_target_1
-    path: /error
-
 # Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem.
 endpoints:
   app_server:
diff --git a/docs/source/get_started/quickstart.rst b/docs/source/get_started/quickstart.rst
index 268bf45d..d73ef7ca 100644
--- a/docs/source/get_started/quickstart.rst
+++ b/docs/source/get_started/quickstart.rst
@@ -42,11 +42,12 @@ Create ``arch_config.yaml`` file with the following content:
 
    version: v0.1
 
-   listener:
-     address: 0.0.0.0
-     port: 10000
-     message_format: huggingface
-     connect_timeout: 0.005s
+  listeners:
+    ingress_traffic:
+      address: 0.0.0.0
+      port: 10000
+      message_format: openai
+      timeout: 30s
 
    llm_providers:
      - name: gpt-4o
@@ -144,22 +145,23 @@ Create ``arch_config.yaml`` file with the following content:
 
    version: v0.1
 
-   listener:
-     address: 0.0.0.0
-     port: 10000
-     message_format: huggingface
-     connect_timeout: 0.005s
+  listeners:
+    egress_traffic:
+      address: 0.0.0.0
+      port: 12000
+      message_format: openai
+      timeout: 30s
 
    llm_providers:
      - name: gpt-4o
        access_key: $OPENAI_API_KEY
-       provider: openai
+       provider_interface: openai
        model: gpt-4o
        default: true
 
      - name: ministral-3b
        access_key: $MISTRAL_API_KEY
-       provider: mistral
+       provider_interface: openai
        model: ministral-3b-latest
 
 Step 2. Start arch gateway
diff --git a/docs/source/guides/includes/arch_config.yaml b/docs/source/guides/includes/arch_config.yaml
index 415c74aa..e86c6072 100644
--- a/docs/source/guides/includes/arch_config.yaml
+++ b/docs/source/guides/includes/arch_config.yaml
@@ -1,10 +1,11 @@
 version: v0.1
 
-listener:
-  address: 0.0.0.0 # or 127.0.0.1
-  port: 10000
-  # Defines how Arch should parse the content from application/json or text/pain Content-type in the http request
-  message_format: huggingface
+listeners:
+  ingress_traffic:
+    address: 0.0.0.0
+    port: 10000
+    message_format: openai
+    timeout: 30s
 
 # Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way
 llm_providers:
@@ -13,7 +14,6 @@ llm_providers:
     access_key: $OPENAI_API_KEY
     model: gpt-4o
     default: true
-    stream: true
 
 # default system prompt used by all prompt targets
 system_prompt: You are a network assistant that just offers facts; not advice on manufacturers or purchasing decisions.
@@ -54,11 +54,6 @@ prompt_targets:
         default: false
         enum: [true, false]
 
-error_target:
-  endpoint:
-    name: error_target_1
-    path: /error
-
 # Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem.
 endpoints:
   app_server:
diff --git a/docs/source/resources/includes/arch_config_full_reference.yaml b/docs/source/resources/includes/arch_config_full_reference.yaml
index f21fc1f5..90bbef56 100644
--- a/docs/source/resources/includes/arch_config_full_reference.yaml
+++ b/docs/source/resources/includes/arch_config_full_reference.yaml
@@ -1,16 +1,16 @@
 version: v0.1
 
-listener:
-  address: 0.0.0.0 # or 127.0.0.1
-  port: 10000
-  # Defines how Arch should parse the content from application/json or text/pain Content-type in the http request
-  message_format: huggingface
-  common_tls_context: # If you configure port 443, you'll need to update the listener with your TLS certificates
-    tls_certificates:
-      - certificate_chain:
-          filename: /etc/certs/cert.pem
-        private_key:
-          filename: /etc/certs/key.pem
+listeners:
+  ingress_traffic:
+    address: 0.0.0.0
+    port: 10000
+    message_format: openai
+    timeout: 5s
+  egress_traffic:
+    address: 0.0.0.0
+    port: 12000
+    message_format: openai
+    timeout: 5s
 
 # Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem.
 endpoints:
@@ -35,15 +35,6 @@ llm_providers:
     access_key: $OPENAI_API_KEY
     model: gpt-4o
     default: true
-    stream: true
-    rate_limits:
-      selector: #optional headers, to add rate limiting based on http headers like JWT tokens or API keys
-        http_header:
-          name: Authorization
-          value: "" # Empty value means each separate value has a separate limit
-      limit:
-        tokens: 100000 # Tokens per unit
-        unit: minute
 
   - name: Mistral8x7b
     provider_interface: openai
@@ -99,11 +90,6 @@ prompt_targets:
         default: false
         enum: [true, false]
 
-error_target:
-  endpoint:
-    name: error_target_1
-    path: /error
-
 tracing:
   # sampling rate. Note by default Arch works on OpenTelemetry compatible tracing.
   sampling_rate: 0.1
diff --git a/tests/archgw/arch_config.yaml b/tests/archgw/arch_config.yaml
index 2c3d85d5..d1990330 100644
--- a/tests/archgw/arch_config.yaml
+++ b/tests/archgw/arch_config.yaml
@@ -1,10 +1,11 @@
 version: "0.1-beta"
 
-listener:
-  address: 0.0.0.0
-  port: 10000
-  message_format: huggingface
-  connect_timeout: 0.005s
+listeners:
+  ingress_traffic:
+    address: 0.0.0.0
+    port: 10000
+    message_format: openai
+    timeout: 30s
 
 endpoints:
   weather_forecast_service: