From 9cb04756c5127dec074e5f93d8dae37515f35aef Mon Sep 17 00:00:00 2001
From: Adil Hafeez <adil.hafeez@gmail.com>
Date: Wed, 12 Feb 2025 14:48:23 -0800
Subject: [PATCH] fix more

---
 arch/arch_config_schema.yaml                  | 45 +++++++++----
 arch/envoy.template.yaml                      | 32 +++++-----
 arch/tools/cli/config_generator.py            | 18 ++++++
 arch/tools/cli/core.py                        | 36 +++++++++--
 arch/tools/poetry.lock                        | 64 ++++++++++++++++++-
 arch/tools/pyproject.toml                     |  1 +
 crates/common/src/configuration.rs            | 27 --------
 crates/common/src/consts.rs                   |  8 +--
 demos/acm_k8s/arch_config_openshift.yaml      | 15 +++--
 demos/shared/jaeger/Dockerfile                |  2 +-
 .../source/concepts/includes/arch_config.yaml |  1 -
 docs/source/guides/includes/arch_config.yaml  |  1 -
 .../includes/arch_config_full_reference.yaml  | 21 +++---
 13 files changed, 181 insertions(+), 90 deletions(-)

diff --git a/arch/arch_config_schema.yaml b/arch/arch_config_schema.yaml
index 1b32b730..d9c9349e 100644
--- a/arch/arch_config_schema.yaml
+++ b/arch/arch_config_schema.yaml
@@ -3,21 +3,38 @@ type: object
 properties:
   version:
     type: string
-  listener:
+  listeners:
     type: object
     properties:
-      address:
-        type: string
-      port:
-        type: integer
-      message_format:
-        type: string
-      connect_timeout:
-        type: string
-    additionalProperties: false
-    required:
-      - address
-      - port
+      prompt_gateway:
+        type: object
+        properties:
+          address:
+            type: string
+          port:
+            type: integer
+          message_format:
+            type: string
+            enum:
+              - openai
+          timeout:
+            type: string
+        additionalProperties: false
+      llm_gateway:
+        type: object
+        properties:
+          address:
+            type: string
+          port:
+            type: integer
+          message_format:
+            type: string
+            enum:
+              - openai
+              - huggingface
+          timeout:
+            type: string
+        additionalProperties: false
   endpoints:
     type: object
     patternProperties:
@@ -224,5 +241,5 @@ properties:
 additionalProperties: false
 required:
   - version
-  - listener
+  - listeners
   - llm_providers
diff --git a/arch/envoy.template.yaml b/arch/envoy.template.yaml
index 0040b57b..313ece97 100644
--- a/arch/envoy.template.yaml
+++ b/arch/envoy.template.yaml
@@ -32,8 +32,8 @@ static_resources:
     - name: arch_listener_http
       address:
         socket_address:
-          address: 0.0.0.0
-          port_value: 10000
+          address: {{ prompt_gateway_listener.address }}
+          port_value: {{ prompt_gateway_listener.port }}
       traffic_direction: INBOUND
       filter_chains:
         - filters:
@@ -76,7 +76,7 @@ static_resources:
                           route:
                             auto_host_rewrite: true
                             cluster: arch_prompt_gateway_listener
-                            timeout: 60s
+                            timeout: {{ prompt_gateway_listener.timeout }}
                 http_filters:
                   - name: envoy.filters.http.router
                     typed_config:
@@ -273,12 +273,11 @@ static_resources:
                     typed_config:
                       "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
 
-
     - name: arch_listener_http_llm
       address:
         socket_address:
-          address: 0.0.0.0
-          port_value: 12000
+          address: {{ llm_gateway_listener.address }}
+          port_value: {{ llm_gateway_listener.port }}
       traffic_direction: INBOUND
       filter_chains:
         - filters:
@@ -321,13 +320,12 @@ static_resources:
                           route:
                             auto_host_rewrite: true
                             cluster: arch_listener_llm
-                            timeout: 60s
+                            timeout: {{ llm_gateway_listener.timeout }}
                 http_filters:
                   - name: envoy.filters.http.router
                     typed_config:
                       "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
 
-
     - name: arch_listener_llm
       address:
         socket_address:
@@ -443,7 +441,7 @@ static_resources:
 
   clusters:
     - name: openai
-      connect_timeout: 5s
+      connect_timeout: 0.5s
       type: LOGICAL_DNS
       dns_lookup_family: V4_ONLY
       lb_policy: ROUND_ROBIN
@@ -467,7 +465,7 @@ static_resources:
               tls_minimum_protocol_version: TLSv1_2
               tls_maximum_protocol_version: TLSv1_3
     - name: mistral
-      connect_timeout: 5s
+      connect_timeout: 0.5s
       type: LOGICAL_DNS
       dns_lookup_family: V4_ONLY
       lb_policy: ROUND_ROBIN
@@ -488,7 +486,7 @@ static_resources:
           sni: api.mistral.ai
     {% for internal_cluster in ["arch_fc", "model_server"] %}
     - name: {{ internal_cluster }}
-      connect_timeout: 5s
+      connect_timeout: 0.5s
       type: STRICT_DNS
       dns_lookup_family: V4_ONLY
       lb_policy: ROUND_ROBIN
@@ -504,7 +502,7 @@ static_resources:
                   hostname: {{ internal_cluster }}
     {% endfor %}
     - name: mistral_7b_instruct
-      connect_timeout: 5s
+      connect_timeout: 0.5s
       type: STRICT_DNS
       dns_lookup_family: V4_ONLY
       lb_policy: ROUND_ROBIN
@@ -523,7 +521,7 @@ static_resources:
       {% if cluster.connect_timeout -%}
       connect_timeout: {{ cluster.connect_timeout }}
       {% else -%}
-      connect_timeout: 5s
+      connect_timeout: 0.5s
       {% endif -%}
       type: LOGICAL_DNS
       dns_lookup_family: V4_ONLY
@@ -557,7 +555,7 @@ static_resources:
 
 {% for local_llm_provider in local_llms %}
     - name: {{ local_llm_provider.name }}
-      connect_timeout: 5s
+      connect_timeout: 0.5s
       type: LOGICAL_DNS
       dns_lookup_family: V4_ONLY
       lb_policy: ROUND_ROBIN
@@ -589,7 +587,7 @@ static_resources:
 
 {% endfor %}
     - name: arch_internal
-      connect_timeout: 5s
+      connect_timeout: 0.5s
       type: LOGICAL_DNS
       dns_lookup_family: V4_ONLY
       lb_policy: ROUND_ROBIN
@@ -605,7 +603,7 @@ static_resources:
                   hostname: arch_internal
 
     - name: arch_prompt_gateway_listener
-      connect_timeout: 5s
+      connect_timeout: 0.5s
       type: LOGICAL_DNS
       dns_lookup_family: V4_ONLY
       lb_policy: ROUND_ROBIN
@@ -621,7 +619,7 @@ static_resources:
                   hostname: arch_prompt_gateway_listener
 
     - name: arch_listener_llm
-      connect_timeout: 5s
+      connect_timeout: 0.5s
       type: LOGICAL_DNS
       dns_lookup_family: V4_ONLY
       lb_policy: ROUND_ROBIN
diff --git a/arch/tools/cli/config_generator.py b/arch/tools/cli/config_generator.py
index 447585fb..33ea3c3b 100644
--- a/arch/tools/cli/config_generator.py
+++ b/arch/tools/cli/config_generator.py
@@ -104,7 +104,25 @@ def validate_and_render_schema():
     arch_config_string = yaml.dump(config_yaml)
     arch_llm_config_string = yaml.dump(config_yaml)
 
+    prompt_gateway_listener = config_yaml.get("listeners", {}).get("prompt_gateway", {})
+    if prompt_gateway_listener.get("port") == None:
+        prompt_gateway_listener["port"] = 10000  # default port for prompt gateway
+    if prompt_gateway_listener.get("address") == None:
+        prompt_gateway_listener["address"] = "127.0.0.1"
+    if prompt_gateway_listener.get("timeout") == None:
+        prompt_gateway_listener["timeout"] = "10s"
+
+    llm_gateway_listener = config_yaml.get("listeners", {}).get("llm_gateway", {})
+    if llm_gateway_listener.get("port") == None:
+        llm_gateway_listener["port"] = 12000  # default port for llm gateway
+    if llm_gateway_listener.get("address") == None:
+        llm_gateway_listener["address"] = "127.0.0.1"
+    if llm_gateway_listener.get("timeout") == None:
+        llm_gateway_listener["timeout"] = "10s"
+
     data = {
+        "prompt_gateway_listener": prompt_gateway_listener,
+        "llm_gateway_listener": llm_gateway_listener,
         "arch_config": arch_config_string,
         "arch_llm_config": arch_llm_config_string,
         "arch_clusters": inferred_clusters,
diff --git a/arch/tools/cli/core.py b/arch/tools/cli/core.py
index c91120df..4ffbd873 100644
--- a/arch/tools/cli/core.py
+++ b/arch/tools/cli/core.py
@@ -15,12 +15,15 @@ from cli.consts import (
 )
 from huggingface_hub import snapshot_download
 from dotenv import dotenv_values
+import yaml
 
 
 log = getLogger(__name__)
 
 
-def start_archgw_docker(client, arch_config_file, env):
+def start_archgw_docker(
+    client, arch_config_file, env, prompt_gateway_port, llm_gateway_port
+):
     logs_path = "~/archgw_logs"
     logs_path_abs = os.path.expanduser(logs_path)
 
@@ -29,10 +32,10 @@ def start_archgw_docker(client, arch_config_file, env):
         image=ARCHGW_DOCKER_IMAGE,
         detach=True,  # Run in detached mode
         ports={
-            "10000/tcp": 10000,
+            f"{prompt_gateway_port}/tcp": prompt_gateway_port,
             "10001/tcp": 10001,
             "11000/tcp": 11000,
-            "12000/tcp": 12000,
+            f"{llm_gateway_port}/tcp": llm_gateway_port,
             "9901/tcp": 19901,
         },
         volumes={
@@ -50,7 +53,12 @@ def start_archgw_docker(client, arch_config_file, env):
         },
         extra_hosts={"host.docker.internal": "host-gateway"},
         healthcheck={
-            "test": ["CMD", "curl", "-f", "http://localhost:10000/healthz"],
+            "test": [
+                "CMD",
+                "curl",
+                "-f",
+                f"http://localhost:{prompt_gateway_port}/healthz",
+            ],
             "interval": 5000000000,  # 5 seconds
             "timeout": 1000000000,  # 1 seconds
             "retries": 3,
@@ -128,7 +136,25 @@ def start_arch(arch_config_file, env, log_timeout=120, foreground=False):
         except docker.errors.NotFound as e:
             pass
 
-        container = start_archgw_docker(client, arch_config_file, env)
+        # parse arch_config_file yaml file and get prompt_gateway_port
+        arch_config_dict = {}
+        with open(arch_config_file) as f:
+            arch_config_dict = yaml.safe_load(f)
+
+        prompt_gateway_port = (
+            arch_config_dict.get("listeners", {})
+            .get("prompt_gateway", {})
+            .get("port", 10000)
+        )
+        llm_gateway_port = (
+            arch_config_dict.get("listeners", {})
+            .get("llm_gateway", {})
+            .get("port", 12000)
+        )
+
+        container = start_archgw_docker(
+            client, arch_config_file, env, prompt_gateway_port, llm_gateway_port
+        )
 
         start_time = time.time()
 
diff --git a/arch/tools/poetry.lock b/arch/tools/poetry.lock
index d5a45050..bcb89dae 100644
--- a/arch/tools/poetry.lock
+++ b/arch/tools/poetry.lock
@@ -368,6 +368,68 @@ files = [
     {file = "pywin32-308-cp39-cp39-win_amd64.whl", hash = "sha256:71b3322d949b4cc20776436a9c9ba0eeedcbc9c650daa536df63f0ff111bb920"},
 ]
 
+[[package]]
+name = "pyyaml"
+version = "6.0.2"
+description = "YAML parser and emitter for Python"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "PyYAML-6.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0a9a2848a5b7feac301353437eb7d5957887edbf81d56e903999a75a3d743086"},
+    {file = "PyYAML-6.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:29717114e51c84ddfba879543fb232a6ed60086602313ca38cce623c1d62cfbf"},
+    {file = "PyYAML-6.0.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8824b5a04a04a047e72eea5cec3bc266db09e35de6bdfe34c9436ac5ee27d237"},
+    {file = "PyYAML-6.0.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7c36280e6fb8385e520936c3cb3b8042851904eba0e58d277dca80a5cfed590b"},
+    {file = "PyYAML-6.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ec031d5d2feb36d1d1a24380e4db6d43695f3748343d99434e6f5f9156aaa2ed"},
+    {file = "PyYAML-6.0.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:936d68689298c36b53b29f23c6dbb74de12b4ac12ca6cfe0e047bedceea56180"},
+    {file = "PyYAML-6.0.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:23502f431948090f597378482b4812b0caae32c22213aecf3b55325e049a6c68"},
+    {file = "PyYAML-6.0.2-cp310-cp310-win32.whl", hash = "sha256:2e99c6826ffa974fe6e27cdb5ed0021786b03fc98e5ee3c5bfe1fd5015f42b99"},
+    {file = "PyYAML-6.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:a4d3091415f010369ae4ed1fc6b79def9416358877534caf6a0fdd2146c87a3e"},
+    {file = "PyYAML-6.0.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:cc1c1159b3d456576af7a3e4d1ba7e6924cb39de8f67111c735f6fc832082774"},
+    {file = "PyYAML-6.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1e2120ef853f59c7419231f3bf4e7021f1b936f6ebd222406c3b60212205d2ee"},
+    {file = "PyYAML-6.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5d225db5a45f21e78dd9358e58a98702a0302f2659a3c6cd320564b75b86f47c"},
+    {file = "PyYAML-6.0.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5ac9328ec4831237bec75defaf839f7d4564be1e6b25ac710bd1a96321cc8317"},
+    {file = "PyYAML-6.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ad2a3decf9aaba3d29c8f537ac4b243e36bef957511b4766cb0057d32b0be85"},
+    {file = "PyYAML-6.0.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:ff3824dc5261f50c9b0dfb3be22b4567a6f938ccce4587b38952d85fd9e9afe4"},
+    {file = "PyYAML-6.0.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:797b4f722ffa07cc8d62053e4cff1486fa6dc094105d13fea7b1de7d8bf71c9e"},
+    {file = "PyYAML-6.0.2-cp311-cp311-win32.whl", hash = "sha256:11d8f3dd2b9c1207dcaf2ee0bbbfd5991f571186ec9cc78427ba5bd32afae4b5"},
+    {file = "PyYAML-6.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:e10ce637b18caea04431ce14fabcf5c64a1c61ec9c56b071a4b7ca131ca52d44"},
+    {file = "PyYAML-6.0.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:c70c95198c015b85feafc136515252a261a84561b7b1d51e3384e0655ddf25ab"},
+    {file = "PyYAML-6.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ce826d6ef20b1bc864f0a68340c8b3287705cae2f8b4b1d932177dcc76721725"},
+    {file = "PyYAML-6.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f71ea527786de97d1a0cc0eacd1defc0985dcf6b3f17bb77dcfc8c34bec4dc5"},
+    {file = "PyYAML-6.0.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9b22676e8097e9e22e36d6b7bda33190d0d400f345f23d4065d48f4ca7ae0425"},
+    {file = "PyYAML-6.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:80bab7bfc629882493af4aa31a4cfa43a4c57c83813253626916b8c7ada83476"},
+    {file = "PyYAML-6.0.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:0833f8694549e586547b576dcfaba4a6b55b9e96098b36cdc7ebefe667dfed48"},
+    {file = "PyYAML-6.0.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8b9c7197f7cb2738065c481a0461e50ad02f18c78cd75775628afb4d7137fb3b"},
+    {file = "PyYAML-6.0.2-cp312-cp312-win32.whl", hash = "sha256:ef6107725bd54b262d6dedcc2af448a266975032bc85ef0172c5f059da6325b4"},
+    {file = "PyYAML-6.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:7e7401d0de89a9a855c839bc697c079a4af81cf878373abd7dc625847d25cbd8"},
+    {file = "PyYAML-6.0.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:efdca5630322a10774e8e98e1af481aad470dd62c3170801852d752aa7a783ba"},
+    {file = "PyYAML-6.0.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:50187695423ffe49e2deacb8cd10510bc361faac997de9efef88badc3bb9e2d1"},
+    {file = "PyYAML-6.0.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0ffe8360bab4910ef1b9e87fb812d8bc0a308b0d0eef8c8f44e0254ab3b07133"},
+    {file = "PyYAML-6.0.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:17e311b6c678207928d649faa7cb0d7b4c26a0ba73d41e99c4fff6b6c3276484"},
+    {file = "PyYAML-6.0.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:70b189594dbe54f75ab3a1acec5f1e3faa7e8cf2f1e08d9b561cb41b845f69d5"},
+    {file = "PyYAML-6.0.2-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:41e4e3953a79407c794916fa277a82531dd93aad34e29c2a514c2c0c5fe971cc"},
+    {file = "PyYAML-6.0.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:68ccc6023a3400877818152ad9a1033e3db8625d899c72eacb5a668902e4d652"},
+    {file = "PyYAML-6.0.2-cp313-cp313-win32.whl", hash = "sha256:bc2fa7c6b47d6bc618dd7fb02ef6fdedb1090ec036abab80d4681424b84c1183"},
+    {file = "PyYAML-6.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:8388ee1976c416731879ac16da0aff3f63b286ffdd57cdeb95f3f2e085687563"},
+    {file = "PyYAML-6.0.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:24471b829b3bf607e04e88d79542a9d48bb037c2267d7927a874e6c205ca7e9a"},
+    {file = "PyYAML-6.0.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7fded462629cfa4b685c5416b949ebad6cec74af5e2d42905d41e257e0869f5"},
+    {file = "PyYAML-6.0.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d84a1718ee396f54f3a086ea0a66d8e552b2ab2017ef8b420e92edbc841c352d"},
+    {file = "PyYAML-6.0.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9056c1ecd25795207ad294bcf39f2db3d845767be0ea6e6a34d856f006006083"},
+    {file = "PyYAML-6.0.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:82d09873e40955485746739bcb8b4586983670466c23382c19cffecbf1fd8706"},
+    {file = "PyYAML-6.0.2-cp38-cp38-win32.whl", hash = "sha256:43fa96a3ca0d6b1812e01ced1044a003533c47f6ee8aca31724f78e93ccc089a"},
+    {file = "PyYAML-6.0.2-cp38-cp38-win_amd64.whl", hash = "sha256:01179a4a8559ab5de078078f37e5c1a30d76bb88519906844fd7bdea1b7729ff"},
+    {file = "PyYAML-6.0.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:688ba32a1cffef67fd2e9398a2efebaea461578b0923624778664cc1c914db5d"},
+    {file = "PyYAML-6.0.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a8786accb172bd8afb8be14490a16625cbc387036876ab6ba70912730faf8e1f"},
+    {file = "PyYAML-6.0.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d8e03406cac8513435335dbab54c0d385e4a49e4945d2909a581c83647ca0290"},
+    {file = "PyYAML-6.0.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f753120cb8181e736c57ef7636e83f31b9c0d1722c516f7e86cf15b7aa57ff12"},
+    {file = "PyYAML-6.0.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3b1fdb9dc17f5a7677423d508ab4f243a726dea51fa5e70992e59a7411c89d19"},
+    {file = "PyYAML-6.0.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:0b69e4ce7a131fe56b7e4d770c67429700908fc0752af059838b1cfb41960e4e"},
+    {file = "PyYAML-6.0.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:a9f8c2e67970f13b16084e04f134610fd1d374bf477b17ec1599185cf611d725"},
+    {file = "PyYAML-6.0.2-cp39-cp39-win32.whl", hash = "sha256:6395c297d42274772abc367baaa79683958044e5d3835486c16da75d2a694631"},
+    {file = "PyYAML-6.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:39693e1f8320ae4f43943590b49779ffb98acb81f788220ea932a6b6c51004d8"},
+    {file = "pyyaml-6.0.2.tar.gz", hash = "sha256:d584d9ec91ad65861cc08d42e834324ef890a082e591037abe114850ff7bbc3e"},
+]
+
 [[package]]
 name = "referencing"
 version = "0.36.2"
@@ -568,4 +630,4 @@ zstd = ["zstandard (>=0.18.0)"]
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.10"
-content-hash = "6b29791896ec1680e2c841ac42e835c1bada672b056d8208ab24388f70f9badb"
+content-hash = "d02e43f0884294d48736e1b8df248f47af480baffcbb7a0194da4e16cc1ea502"
diff --git a/arch/tools/pyproject.toml b/arch/tools/pyproject.toml
index 7ed79a36..8ce28cbc 100644
--- a/arch/tools/pyproject.toml
+++ b/arch/tools/pyproject.toml
@@ -17,6 +17,7 @@ jsonschema = "^4.23.0"
 setuptools = "75.5.0"
 docker = "^7.1.0"
 python-dotenv = "^1.0.1"
+pyyaml = "^6.0.2"
 
 [tool.poetry.scripts]
 archgw = "cli.main:main"
diff --git a/crates/common/src/configuration.rs b/crates/common/src/configuration.rs
index 069695ba..acd97fc3 100644
--- a/crates/common/src/configuration.rs
+++ b/crates/common/src/configuration.rs
@@ -9,7 +9,6 @@ use crate::api::open_ai::{
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct Configuration {
     pub version: String,
-    pub listener: Listener,
     pub endpoints: Option<HashMap<String, Endpoint>>,
     pub llm_providers: Vec<LlmProvider>,
     pub overrides: Option<Overrides>,
@@ -48,32 +47,6 @@ pub struct ErrorTargetDetail {
     pub endpoint: Option<EndpointDetails>,
 }
 
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct Listener {
-    pub address: String,
-    pub port: u16,
-    pub message_format: MessageFormat,
-    // pub connect_timeout: Option<DurationString>,
-}
-
-impl Default for Listener {
-    fn default() -> Self {
-        Listener {
-            address: "".to_string(),
-            port: 0,
-            message_format: MessageFormat::default(),
-            // connect_timeout: None,
-        }
-    }
-}
-
-#[derive(Debug, Clone, Serialize, Deserialize, Default)]
-pub enum MessageFormat {
-    #[serde(rename = "huggingface")]
-    #[default]
-    Huggingface,
-}
-
 #[derive(Debug, Clone, Serialize, Deserialize, Default)]
 pub struct PromptGuards {
     pub input_guards: HashMap<GuardType, GuardOptions>,
diff --git a/crates/common/src/consts.rs b/crates/common/src/consts.rs
index 630ed6d5..cd52220e 100644
--- a/crates/common/src/consts.rs
+++ b/crates/common/src/consts.rs
@@ -3,10 +3,10 @@ pub const SYSTEM_ROLE: &str = "system";
 pub const USER_ROLE: &str = "user";
 pub const TOOL_ROLE: &str = "tool";
 pub const ASSISTANT_ROLE: &str = "assistant";
-pub const ARCH_FC_REQUEST_TIMEOUT_MS: u64 = 120000; // 2 minutes
-pub const DEFAULT_TARGET_REQUEST_TIMEOUT_MS: u64 = 120000; // 2 minutes
-pub const API_REQUEST_TIMEOUT_MS: u64 = 120000; // 2 minutes
-pub const MODEL_SERVER_REQUEST_TIMEOUT_MS: u64 = 120000; // 2 minutes
+pub const ARCH_FC_REQUEST_TIMEOUT_MS: u64 = 30000; // 30 seconds
+pub const DEFAULT_TARGET_REQUEST_TIMEOUT_MS: u64 = 30000; // 30 seconds
+pub const API_REQUEST_TIMEOUT_MS: u64 = 30000; // 30 seconds
+pub const MODEL_SERVER_REQUEST_TIMEOUT_MS: u64 = 30000; // 30 seconds
 pub const MODEL_SERVER_NAME: &str = "model_server";
 pub const ARCH_ROUTING_HEADER: &str = "x-arch-llm-provider";
 pub const MESSAGES_KEY: &str = "messages";
diff --git a/demos/acm_k8s/arch_config_openshift.yaml b/demos/acm_k8s/arch_config_openshift.yaml
index 53352db4..d151a95e 100644
--- a/demos/acm_k8s/arch_config_openshift.yaml
+++ b/demos/acm_k8s/arch_config_openshift.yaml
@@ -1,23 +1,24 @@
 version: "0.1-beta"
 
-listener:
-  address: 0.0.0.0
-  port: 10000
-  message_format: huggingface
-  connect_timeout: 0.005s
+listeners:
+  prompt_gateway:
+    address: 0.0.0.0
+    message_format: openai
+    timeout: 30s
 
 overrides:
   # confidence threshold for prompt target intent matching
   prompt_target_intent_matching_threshold: 0.6
+  optimize_context_window: true
 
 endpoints:
   acm_service:
     endpoint: host.docker.internal:8001
-    connect_timeout: 0.005s
+    connect_timeout: 0.25s
     http_host: localhost
   local_proxy_service:
     endpoint: host.docker.internal:8002
-    connect_timeout: 0.005s
+    connect_timeout: 0.25s
     http_host: localhost
 
 llm_providers:
diff --git a/demos/shared/jaeger/Dockerfile b/demos/shared/jaeger/Dockerfile
index 64fd1404..ee50d82e 100644
--- a/demos/shared/jaeger/Dockerfile
+++ b/demos/shared/jaeger/Dockerfile
@@ -1,4 +1,4 @@
-FROM jaegertracing/all-in-one:1.62.0
+FROM jaegertracing/jaeger:2.3.0
 HEALTHCHECK \
     --interval=1s \
     --timeout=1s \
diff --git a/docs/source/concepts/includes/arch_config.yaml b/docs/source/concepts/includes/arch_config.yaml
index c78f35f7..93164401 100644
--- a/docs/source/concepts/includes/arch_config.yaml
+++ b/docs/source/concepts/includes/arch_config.yaml
@@ -13,7 +13,6 @@ llm_providers:
     access_key: $OPENAI_API_KEY
     model: gpt-4o
     default: true
-    stream: true
 
 # default system prompt used by all prompt targets
 system_prompt: You are a network assistant that just offers facts; not advice on manufacturers or purchasing decisions.
diff --git a/docs/source/guides/includes/arch_config.yaml b/docs/source/guides/includes/arch_config.yaml
index 415c74aa..33c1748c 100644
--- a/docs/source/guides/includes/arch_config.yaml
+++ b/docs/source/guides/includes/arch_config.yaml
@@ -13,7 +13,6 @@ llm_providers:
     access_key: $OPENAI_API_KEY
     model: gpt-4o
     default: true
-    stream: true
 
 # default system prompt used by all prompt targets
 system_prompt: You are a network assistant that just offers facts; not advice on manufacturers or purchasing decisions.
diff --git a/docs/source/resources/includes/arch_config_full_reference.yaml b/docs/source/resources/includes/arch_config_full_reference.yaml
index f21fc1f5..2389389c 100644
--- a/docs/source/resources/includes/arch_config_full_reference.yaml
+++ b/docs/source/resources/includes/arch_config_full_reference.yaml
@@ -1,16 +1,14 @@
 version: v0.1
 
-listener:
-  address: 0.0.0.0 # or 127.0.0.1
-  port: 10000
-  # Defines how Arch should parse the content from application/json or text/pain Content-type in the http request
-  message_format: huggingface
-  common_tls_context: # If you configure port 443, you'll need to update the listener with your TLS certificates
-    tls_certificates:
-      - certificate_chain:
-          filename: /etc/certs/cert.pem
-        private_key:
-          filename: /etc/certs/key.pem
+listeners:
+  prompt_gateway:
+    address: 0.0.0.0
+    port: 10000
+    message_format: openai
+    timeout: 5s
+  llm_gateway:
+    address: 0.0.0.0
+    port: 12000
 
 # Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem.
 endpoints:
@@ -35,7 +33,6 @@ llm_providers:
     access_key: $OPENAI_API_KEY
     model: gpt-4o
     default: true
-    stream: true
     rate_limits:
       selector: #optional headers, to add rate limiting based on http headers like JWT tokens or API keys
         http_header: