diff --git a/arch/arch_config_schema.yaml b/arch/arch_config_schema.yaml
index d9c9349e..b76efc15 100644
--- a/arch/arch_config_schema.yaml
+++ b/arch/arch_config_schema.yaml
@@ -124,7 +124,10 @@ properties:
               required:
                 type: boolean
               default:
-                type: string
+                anyOf:
+                  - type: string
+                  - type: integer
+                  - type: boolean
               description:
                 type: string
               type:
@@ -132,7 +135,10 @@ properties:
               enum:
                 type: array
                 items:
-                  type: string
+                  anyOf:
+                    - type: string
+                    - type: integer
+                    - type: boolean
               in_path:
                 type: boolean
               format:
diff --git a/demos/samples_java/weather_forcecast_service/arch_config.yaml b/demos/samples_java/weather_forcecast_service/arch_config.yaml
index 10c22819..8727a606 100644
--- a/demos/samples_java/weather_forcecast_service/arch_config.yaml
+++ b/demos/samples_java/weather_forcecast_service/arch_config.yaml
@@ -1,8 +1,10 @@
 version: v0.1
-listener:
-  address: 127.0.0.1
-  port: 10000 #If you configure port 443, you'll need to update the listener with tls_certificates
-  message_format: huggingface
+listeners:
+  prompt_gateway:
+    address: 0.0.0.0
+    port: 10000
+    message_format: openai
+    timeout: 30s
 
 # Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way
 llm_providers:
diff --git a/demos/samples_python/human_resources_agent/arch_config.yaml b/demos/samples_python/human_resources_agent/arch_config.yaml
index 09264821..29978db5 100644
--- a/demos/samples_python/human_resources_agent/arch_config.yaml
+++ b/demos/samples_python/human_resources_agent/arch_config.yaml
@@ -1,8 +1,10 @@
 version: v0.1
-listener:
-  address: 127.0.0.1
-  port: 8080 #If you configure port 443, you'll need to update the listener with tls_certificates
-  message_format: huggingface
+listeners:
+  prompt_gateway:
+    address: 0.0.0.0
+    port: 10000
+    message_format: openai
+    timeout: 30s
 
 # Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way
 llm_providers:
diff --git a/demos/samples_python/multi_turn_rag_agent/arch_config.yaml b/demos/samples_python/multi_turn_rag_agent/arch_config.yaml
index 1399965f..4c940a6c 100644
--- a/demos/samples_python/multi_turn_rag_agent/arch_config.yaml
+++ b/demos/samples_python/multi_turn_rag_agent/arch_config.yaml
@@ -1,10 +1,11 @@
 version: v0.1
 
-listener:
-  address: 127.0.0.1
-  port: 10000
-  message_format: huggingface
-  connect_timeout: 0.005s
+listeners:
+  prompt_gateway:
+    address: 0.0.0.0
+    port: 10000
+    message_format: openai
+    timeout: 30s
 
 endpoints:
   rag_energy_source_agent:
diff --git a/demos/samples_python/network_switch_operator_agent/arch_config.yaml b/demos/samples_python/network_switch_operator_agent/arch_config.yaml
index ad3bfae5..9c29fbae 100644
--- a/demos/samples_python/network_switch_operator_agent/arch_config.yaml
+++ b/demos/samples_python/network_switch_operator_agent/arch_config.yaml
@@ -1,8 +1,10 @@
 version: v0.1
-listener:
-  address: 127.0.0.1
-  port: 8080 #If you configure port 443, you'll need to update the listener with tls_certificates
-  message_format: huggingface
+listeners:
+  prompt_gateway:
+    address: 0.0.0.0
+    port: 10000
+    message_format: openai
+    timeout: 30s
 
 # Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way
 llm_providers:
diff --git a/demos/samples_python/stock_quote/arch_config.yaml b/demos/samples_python/stock_quote/arch_config.yaml
index c763d4ca..7c38527b 100644
--- a/demos/samples_python/stock_quote/arch_config.yaml
+++ b/demos/samples_python/stock_quote/arch_config.yaml
@@ -1,11 +1,11 @@
 version: v0.1
 
-listener:
-  address: 0.0.0.0
-  port: 10000
-  message_format: huggingface
-  connect_timeout: 0.005s
-
+listeners:
+  prompt_gateway:
+    address: 0.0.0.0
+    port: 10000
+    message_format: openai
+    timeout: 30s
 llm_providers:
   - name: gpt-4o
     access_key: $OPENAI_API_KEY
diff --git a/demos/samples_python/weather_forecast/arch_config.yaml b/demos/samples_python/weather_forecast/arch_config.yaml
index 94a6bdfb..a99f8002 100644
--- a/demos/samples_python/weather_forecast/arch_config.yaml
+++ b/demos/samples_python/weather_forecast/arch_config.yaml
@@ -1,10 +1,11 @@
 version: "0.1-beta"
 
-listener:
-  address: 0.0.0.0
-  port: 10000
-  message_format: huggingface
-  connect_timeout: 0.005s
+listeners:
+  prompt_gateway:
+    address: 0.0.0.0
+    port: 10000
+    message_format: openai
+    timeout: 30s
 
 endpoints:
   weather_forecast_service:
diff --git a/demos/use_cases/llm_routing/arch_config.yaml b/demos/use_cases/llm_routing/arch_config.yaml
index f7ce78cd..250ab279 100644
--- a/demos/use_cases/llm_routing/arch_config.yaml
+++ b/demos/use_cases/llm_routing/arch_config.yaml
@@ -1,10 +1,11 @@
 version: "0.1-beta"
 
-listener:
-  address: 0.0.0.0
-  port: 10000
-  message_format: huggingface
-  connect_timeout: 0.005s
+listeners:
+  prompt_gateway:
+    address: 0.0.0.0
+    port: 10000
+    message_format: openai
+    timeout: 30s
 
 llm_providers:
   - name: gpt-4o-mini
diff --git a/demos/use_cases/ollama/arch_config.yaml b/demos/use_cases/ollama/arch_config.yaml
index 5cb77750..1111b650 100644
--- a/demos/use_cases/ollama/arch_config.yaml
+++ b/demos/use_cases/ollama/arch_config.yaml
@@ -1,10 +1,11 @@
 version: v0.1
 
-listener:
-  address: 0.0.0.0
-  port: 10000
-  message_format: huggingface
-  connect_timeout: 0.005s
+listeners:
+  prompt_gateway:
+    address: 0.0.0.0
+    port: 10000
+    message_format: openai
+    timeout: 30s
 
 llm_providers:
 
diff --git a/demos/use_cases/spotify_bearer_auth/arch_config.yaml b/demos/use_cases/spotify_bearer_auth/arch_config.yaml
index a259a539..8dd13eb5 100644
--- a/demos/use_cases/spotify_bearer_auth/arch_config.yaml
+++ b/demos/use_cases/spotify_bearer_auth/arch_config.yaml
@@ -1,8 +1,10 @@
 version: v0.1
-listener:
-  address: 127.0.0.1
-  port: 8080 #If you configure port 443, you'll need to update the listener with tls_certificates
-  message_format: huggingface
+listeners:
+  prompt_gateway:
+    address: 0.0.0.0
+    port: 10000
+    message_format: openai
+    timeout: 30s
 
 overrides:
   optimize_context_window: true
diff --git a/docs/source/concepts/includes/arch_config.yaml b/docs/source/concepts/includes/arch_config.yaml
index 93164401..dcf64c94 100644
--- a/docs/source/concepts/includes/arch_config.yaml
+++ b/docs/source/concepts/includes/arch_config.yaml
@@ -1,10 +1,11 @@
 version: v0.1
 
-listener:
-  address: 0.0.0.0 # or 127.0.0.1
-  port: 10000
-  # Defines how Arch should parse the content from application/json or text/pain Content-type in the http request
-  message_format: huggingface
+listeners:
+  prompt_gateway:
+    address: 0.0.0.0
+    port: 10000
+    message_format: openai
+    timeout: 30s
 
 # Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way
 llm_providers:
@@ -51,11 +52,6 @@ prompt_targets:
         default: false
         enum: [true, false]
 
-error_target:
-  endpoint:
-    name: error_target_1
-    path: /error
-
 # Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem.
 endpoints:
   app_server:
diff --git a/docs/source/get_started/quickstart.rst b/docs/source/get_started/quickstart.rst
index 268bf45d..22bd532a 100644
--- a/docs/source/get_started/quickstart.rst
+++ b/docs/source/get_started/quickstart.rst
@@ -42,11 +42,12 @@ Create ``arch_config.yaml`` file with the following content:
 
    version: v0.1
 
-   listener:
-     address: 0.0.0.0
-     port: 10000
-     message_format: huggingface
-     connect_timeout: 0.005s
+  listeners:
+    prompt_gateway:
+      address: 0.0.0.0
+      port: 10000
+      message_format: openai
+      timeout: 30s
 
    llm_providers:
      - name: gpt-4o
@@ -144,22 +145,23 @@ Create ``arch_config.yaml`` file with the following content:
 
    version: v0.1
 
-   listener:
-     address: 0.0.0.0
-     port: 10000
-     message_format: huggingface
-     connect_timeout: 0.005s
+  listeners:
+    prompt_gateway:
+      address: 0.0.0.0
+      port: 10000
+      message_format: openai
+      timeout: 30s
 
    llm_providers:
      - name: gpt-4o
        access_key: $OPENAI_API_KEY
-       provider: openai
+       provider_interface: openai
        model: gpt-4o
        default: true
 
      - name: ministral-3b
        access_key: $MISTRAL_API_KEY
-       provider: mistral
+       provider_interface: openai
        model: ministral-3b-latest
 
 Step 2. Start arch gateway
diff --git a/docs/source/guides/includes/arch_config.yaml b/docs/source/guides/includes/arch_config.yaml
index 33c1748c..9899fb26 100644
--- a/docs/source/guides/includes/arch_config.yaml
+++ b/docs/source/guides/includes/arch_config.yaml
@@ -1,10 +1,11 @@
 version: v0.1
 
-listener:
-  address: 0.0.0.0 # or 127.0.0.1
-  port: 10000
-  # Defines how Arch should parse the content from application/json or text/pain Content-type in the http request
-  message_format: huggingface
+listeners:
+  prompt_gateway:
+    address: 0.0.0.0
+    port: 10000
+    message_format: openai
+    timeout: 30s
 
 # Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way
 llm_providers:
@@ -53,11 +54,6 @@ prompt_targets:
         default: false
         enum: [true, false]
 
-error_target:
-  endpoint:
-    name: error_target_1
-    path: /error
-
 # Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem.
 endpoints:
   app_server:
diff --git a/docs/source/resources/includes/arch_config_full_reference.yaml b/docs/source/resources/includes/arch_config_full_reference.yaml
index 2389389c..dca180e3 100644
--- a/docs/source/resources/includes/arch_config_full_reference.yaml
+++ b/docs/source/resources/includes/arch_config_full_reference.yaml
@@ -33,14 +33,6 @@ llm_providers:
     access_key: $OPENAI_API_KEY
     model: gpt-4o
     default: true
-    rate_limits:
-      selector: #optional headers, to add rate limiting based on http headers like JWT tokens or API keys
-        http_header:
-          name: Authorization
-          value: "" # Empty value means each separate value has a separate limit
-      limit:
-        tokens: 100000 # Tokens per unit
-        unit: minute
 
   - name: Mistral8x7b
     provider_interface: openai
@@ -96,11 +88,6 @@ prompt_targets:
         default: false
         enum: [true, false]
 
-error_target:
-  endpoint:
-    name: error_target_1
-    path: /error
-
 tracing:
   # sampling rate. Note by default Arch works on OpenTelemetry compatible tracing.
   sampling_rate: 0.1
diff --git a/tests/archgw/arch_config.yaml b/tests/archgw/arch_config.yaml
index 2c3d85d5..52fe0c30 100644
--- a/tests/archgw/arch_config.yaml
+++ b/tests/archgw/arch_config.yaml
@@ -1,10 +1,11 @@
 version: "0.1-beta"
 
-listener:
-  address: 0.0.0.0
-  port: 10000
-  message_format: huggingface
-  connect_timeout: 0.005s
+listeners:
+  prompt_gateway:
+    address: 0.0.0.0
+    port: 10000
+    message_format: openai
+    timeout: 30s
 
 endpoints:
   weather_forecast_service: