fix more

2026-06-17 15:25:17 +02:00 · 2025-02-12 14:48:23 -08:00 · 2025-02-12 14:48:23 -08:00 · 9cb04756c5
commit 9cb04756c5
parent d2ad943f63
13 changed files with 181 additions and 90 deletions
--- a/arch/arch_config_schema.yaml
+++ b/arch/arch_config_schema.yaml
@ -3,21 +3,38 @@ type: object
 properties:
  version:
    type: string
-  listener:
+  listeners:
    type: object
    properties:
-      address:
-        type: string
-      port:
-        type: integer
-      message_format:
-        type: string
-      connect_timeout:
-        type: string
-    additionalProperties: false
-    required:
-      - address
-      - port
+      prompt_gateway:
+        type: object
+        properties:
+          address:
+            type: string
+          port:
+            type: integer
+          message_format:
+            type: string
+            enum:
+              - openai
+          timeout:
+            type: string
+        additionalProperties: false
+      llm_gateway:
+        type: object
+        properties:
+          address:
+            type: string
+          port:
+            type: integer
+          message_format:
+            type: string
+            enum:
+              - openai
+              - huggingface
+          timeout:
+            type: string
+        additionalProperties: false
  endpoints:
    type: object
    patternProperties:
@ -224,5 +241,5 @@ properties:
 additionalProperties: false
 required:
  - version
-  - listener
+  - listeners
  - llm_providers
--- a/arch/envoy.template.yaml
+++ b/arch/envoy.template.yaml
@ -32,8 +32,8 @@ static_resources:
    - name: arch_listener_http
      address:
        socket_address:
-          address: 0.0.0.0
-          port_value: 10000
+          address: {{ prompt_gateway_listener.address }}
+          port_value: {{ prompt_gateway_listener.port }}
      traffic_direction: INBOUND
      filter_chains:
        - filters:
@ -76,7 +76,7 @@ static_resources:
                          route:
                            auto_host_rewrite: true
                            cluster: arch_prompt_gateway_listener
-                            timeout: 60s
+                            timeout: {{ prompt_gateway_listener.timeout }}
                http_filters:
                  - name: envoy.filters.http.router
                    typed_config:
@ -273,12 +273,11 @@ static_resources:
                    typed_config:
                      "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router

-
    - name: arch_listener_http_llm
      address:
        socket_address:
-          address: 0.0.0.0
-          port_value: 12000
+          address: {{ llm_gateway_listener.address }}
+          port_value: {{ llm_gateway_listener.port }}
      traffic_direction: INBOUND
      filter_chains:
        - filters:
@ -321,13 +320,12 @@ static_resources:
                          route:
                            auto_host_rewrite: true
                            cluster: arch_listener_llm
-                            timeout: 60s
+                            timeout: {{ llm_gateway_listener.timeout }}
                http_filters:
                  - name: envoy.filters.http.router
                    typed_config:
                      "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router

-
    - name: arch_listener_llm
      address:
        socket_address:
@ -443,7 +441,7 @@ static_resources:

  clusters:
    - name: openai
-      connect_timeout: 5s
+      connect_timeout: 0.5s
      type: LOGICAL_DNS
      dns_lookup_family: V4_ONLY
      lb_policy: ROUND_ROBIN
@ -467,7 +465,7 @@ static_resources:
              tls_minimum_protocol_version: TLSv1_2
              tls_maximum_protocol_version: TLSv1_3
    - name: mistral
-      connect_timeout: 5s
+      connect_timeout: 0.5s
      type: LOGICAL_DNS
      dns_lookup_family: V4_ONLY
      lb_policy: ROUND_ROBIN
@ -488,7 +486,7 @@ static_resources:
          sni: api.mistral.ai
    {% for internal_cluster in ["arch_fc", "model_server"] %}
    - name: {{ internal_cluster }}
-      connect_timeout: 5s
+      connect_timeout: 0.5s
      type: STRICT_DNS
      dns_lookup_family: V4_ONLY
      lb_policy: ROUND_ROBIN
@ -504,7 +502,7 @@ static_resources:
                  hostname: {{ internal_cluster }}
    {% endfor %}
    - name: mistral_7b_instruct
-      connect_timeout: 5s
+      connect_timeout: 0.5s
      type: STRICT_DNS
      dns_lookup_family: V4_ONLY
      lb_policy: ROUND_ROBIN
@ -523,7 +521,7 @@ static_resources:
      {% if cluster.connect_timeout -%}
      connect_timeout: {{ cluster.connect_timeout }}
      {% else -%}
-      connect_timeout: 5s
+      connect_timeout: 0.5s
      {% endif -%}
      type: LOGICAL_DNS
      dns_lookup_family: V4_ONLY
@ -557,7 +555,7 @@ static_resources:

 {% for local_llm_provider in local_llms %}
    - name: {{ local_llm_provider.name }}
-      connect_timeout: 5s
+      connect_timeout: 0.5s
      type: LOGICAL_DNS
      dns_lookup_family: V4_ONLY
      lb_policy: ROUND_ROBIN
@ -589,7 +587,7 @@ static_resources:

 {% endfor %}
    - name: arch_internal
-      connect_timeout: 5s
+      connect_timeout: 0.5s
      type: LOGICAL_DNS
      dns_lookup_family: V4_ONLY
      lb_policy: ROUND_ROBIN
@ -605,7 +603,7 @@ static_resources:
                  hostname: arch_internal

    - name: arch_prompt_gateway_listener
-      connect_timeout: 5s
+      connect_timeout: 0.5s
      type: LOGICAL_DNS
      dns_lookup_family: V4_ONLY
      lb_policy: ROUND_ROBIN
@ -621,7 +619,7 @@ static_resources:
                  hostname: arch_prompt_gateway_listener

    - name: arch_listener_llm
-      connect_timeout: 5s
+      connect_timeout: 0.5s
      type: LOGICAL_DNS
      dns_lookup_family: V4_ONLY
      lb_policy: ROUND_ROBIN
--- a/arch/tools/cli/config_generator.py
+++ b/arch/tools/cli/config_generator.py
@ -104,7 +104,25 @@ def validate_and_render_schema():
    arch_config_string = yaml.dump(config_yaml)
    arch_llm_config_string = yaml.dump(config_yaml)

+    prompt_gateway_listener = config_yaml.get("listeners", {}).get("prompt_gateway", {})
+    if prompt_gateway_listener.get("port") == None:
+        prompt_gateway_listener["port"] = 10000  # default port for prompt gateway
+    if prompt_gateway_listener.get("address") == None:
+        prompt_gateway_listener["address"] = "127.0.0.1"
+    if prompt_gateway_listener.get("timeout") == None:
+        prompt_gateway_listener["timeout"] = "10s"
+
+    llm_gateway_listener = config_yaml.get("listeners", {}).get("llm_gateway", {})
+    if llm_gateway_listener.get("port") == None:
+        llm_gateway_listener["port"] = 12000  # default port for llm gateway
+    if llm_gateway_listener.get("address") == None:
+        llm_gateway_listener["address"] = "127.0.0.1"
+    if llm_gateway_listener.get("timeout") == None:
+        llm_gateway_listener["timeout"] = "10s"
+
    data = {
+        "prompt_gateway_listener": prompt_gateway_listener,
+        "llm_gateway_listener": llm_gateway_listener,
        "arch_config": arch_config_string,
        "arch_llm_config": arch_llm_config_string,
        "arch_clusters": inferred_clusters,
--- a/arch/tools/cli/core.py
+++ b/arch/tools/cli/core.py
@ -15,12 +15,15 @@ from cli.consts import (
 )
 from huggingface_hub import snapshot_download
 from dotenv import dotenv_values
+import yaml


 log = getLogger(__name__)


-def start_archgw_docker(client, arch_config_file, env):
+def start_archgw_docker(
+    client, arch_config_file, env, prompt_gateway_port, llm_gateway_port
+):
    logs_path = "~/archgw_logs"
    logs_path_abs = os.path.expanduser(logs_path)

@ -29,10 +32,10 @@ def start_archgw_docker(client, arch_config_file, env):
        image=ARCHGW_DOCKER_IMAGE,
        detach=True,  # Run in detached mode
        ports={
-            "10000/tcp": 10000,
+            f"{prompt_gateway_port}/tcp": prompt_gateway_port,
            "10001/tcp": 10001,
            "11000/tcp": 11000,
-            "12000/tcp": 12000,
+            f"{llm_gateway_port}/tcp": llm_gateway_port,
            "9901/tcp": 19901,
        },
        volumes={
@ -50,7 +53,12 @@ def start_archgw_docker(client, arch_config_file, env):
        },
        extra_hosts={"host.docker.internal": "host-gateway"},
        healthcheck={
-            "test": ["CMD", "curl", "-f", "http://localhost:10000/healthz"],
+            "test": [
+                "CMD",
+                "curl",
+                "-f",
+                f"http://localhost:{prompt_gateway_port}/healthz",
+            ],
            "interval": 5000000000,  # 5 seconds
            "timeout": 1000000000,  # 1 seconds
            "retries": 3,
@ -128,7 +136,25 @@ def start_arch(arch_config_file, env, log_timeout=120, foreground=False):
        except docker.errors.NotFound as e:
            pass

-        container = start_archgw_docker(client, arch_config_file, env)
+        # parse arch_config_file yaml file and get prompt_gateway_port
+        arch_config_dict = {}
+        with open(arch_config_file) as f:
+            arch_config_dict = yaml.safe_load(f)
+
+        prompt_gateway_port = (
+            arch_config_dict.get("listeners", {})
+            .get("prompt_gateway", {})
+            .get("port", 10000)
+        )
+        llm_gateway_port = (
+            arch_config_dict.get("listeners", {})
+            .get("llm_gateway", {})
+            .get("port", 12000)
+        )
+
+        container = start_archgw_docker(
+            client, arch_config_file, env, prompt_gateway_port, llm_gateway_port
+        )

        start_time = time.time()

--- a/arch/tools/poetry.lock
+++ b/arch/tools/poetry.lock
@ -368,6 +368,68 @@ files = [
    {file = "pywin32-308-cp39-cp39-win_amd64.whl", hash = "sha256:71b3322d949b4cc20776436a9c9ba0eeedcbc9c650daa536df63f0ff111bb920"},
 ]

+[[package]]
+name = "pyyaml"
+version = "6.0.2"
+description = "YAML parser and emitter for Python"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "PyYAML-6.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0a9a2848a5b7feac301353437eb7d5957887edbf81d56e903999a75a3d743086"},
+    {file = "PyYAML-6.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:29717114e51c84ddfba879543fb232a6ed60086602313ca38cce623c1d62cfbf"},
+    {file = "PyYAML-6.0.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8824b5a04a04a047e72eea5cec3bc266db09e35de6bdfe34c9436ac5ee27d237"},
+    {file = "PyYAML-6.0.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7c36280e6fb8385e520936c3cb3b8042851904eba0e58d277dca80a5cfed590b"},
+    {file = "PyYAML-6.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ec031d5d2feb36d1d1a24380e4db6d43695f3748343d99434e6f5f9156aaa2ed"},
+    {file = "PyYAML-6.0.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:936d68689298c36b53b29f23c6dbb74de12b4ac12ca6cfe0e047bedceea56180"},
+    {file = "PyYAML-6.0.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:23502f431948090f597378482b4812b0caae32c22213aecf3b55325e049a6c68"},
+    {file = "PyYAML-6.0.2-cp310-cp310-win32.whl", hash = "sha256:2e99c6826ffa974fe6e27cdb5ed0021786b03fc98e5ee3c5bfe1fd5015f42b99"},
+    {file = "PyYAML-6.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:a4d3091415f010369ae4ed1fc6b79def9416358877534caf6a0fdd2146c87a3e"},
+    {file = "PyYAML-6.0.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:cc1c1159b3d456576af7a3e4d1ba7e6924cb39de8f67111c735f6fc832082774"},
+    {file = "PyYAML-6.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1e2120ef853f59c7419231f3bf4e7021f1b936f6ebd222406c3b60212205d2ee"},
+    {file = "PyYAML-6.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5d225db5a45f21e78dd9358e58a98702a0302f2659a3c6cd320564b75b86f47c"},
+    {file = "PyYAML-6.0.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5ac9328ec4831237bec75defaf839f7d4564be1e6b25ac710bd1a96321cc8317"},
+    {file = "PyYAML-6.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ad2a3decf9aaba3d29c8f537ac4b243e36bef957511b4766cb0057d32b0be85"},
+    {file = "PyYAML-6.0.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:ff3824dc5261f50c9b0dfb3be22b4567a6f938ccce4587b38952d85fd9e9afe4"},
+    {file = "PyYAML-6.0.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:797b4f722ffa07cc8d62053e4cff1486fa6dc094105d13fea7b1de7d8bf71c9e"},
+    {file = "PyYAML-6.0.2-cp311-cp311-win32.whl", hash = "sha256:11d8f3dd2b9c1207dcaf2ee0bbbfd5991f571186ec9cc78427ba5bd32afae4b5"},
+    {file = "PyYAML-6.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:e10ce637b18caea04431ce14fabcf5c64a1c61ec9c56b071a4b7ca131ca52d44"},
+    {file = "PyYAML-6.0.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:c70c95198c015b85feafc136515252a261a84561b7b1d51e3384e0655ddf25ab"},
+    {file = "PyYAML-6.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ce826d6ef20b1bc864f0a68340c8b3287705cae2f8b4b1d932177dcc76721725"},
+    {file = "PyYAML-6.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f71ea527786de97d1a0cc0eacd1defc0985dcf6b3f17bb77dcfc8c34bec4dc5"},
+    {file = "PyYAML-6.0.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9b22676e8097e9e22e36d6b7bda33190d0d400f345f23d4065d48f4ca7ae0425"},
+    {file = "PyYAML-6.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:80bab7bfc629882493af4aa31a4cfa43a4c57c83813253626916b8c7ada83476"},
+    {file = "PyYAML-6.0.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:0833f8694549e586547b576dcfaba4a6b55b9e96098b36cdc7ebefe667dfed48"},
+    {file = "PyYAML-6.0.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8b9c7197f7cb2738065c481a0461e50ad02f18c78cd75775628afb4d7137fb3b"},
+    {file = "PyYAML-6.0.2-cp312-cp312-win32.whl", hash = "sha256:ef6107725bd54b262d6dedcc2af448a266975032bc85ef0172c5f059da6325b4"},
+    {file = "PyYAML-6.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:7e7401d0de89a9a855c839bc697c079a4af81cf878373abd7dc625847d25cbd8"},
+    {file = "PyYAML-6.0.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:efdca5630322a10774e8e98e1af481aad470dd62c3170801852d752aa7a783ba"},
+    {file = "PyYAML-6.0.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:50187695423ffe49e2deacb8cd10510bc361faac997de9efef88badc3bb9e2d1"},
+    {file = "PyYAML-6.0.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0ffe8360bab4910ef1b9e87fb812d8bc0a308b0d0eef8c8f44e0254ab3b07133"},
+    {file = "PyYAML-6.0.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:17e311b6c678207928d649faa7cb0d7b4c26a0ba73d41e99c4fff6b6c3276484"},
+    {file = "PyYAML-6.0.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:70b189594dbe54f75ab3a1acec5f1e3faa7e8cf2f1e08d9b561cb41b845f69d5"},
+    {file = "PyYAML-6.0.2-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:41e4e3953a79407c794916fa277a82531dd93aad34e29c2a514c2c0c5fe971cc"},
+    {file = "PyYAML-6.0.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:68ccc6023a3400877818152ad9a1033e3db8625d899c72eacb5a668902e4d652"},
+    {file = "PyYAML-6.0.2-cp313-cp313-win32.whl", hash = "sha256:bc2fa7c6b47d6bc618dd7fb02ef6fdedb1090ec036abab80d4681424b84c1183"},
+    {file = "PyYAML-6.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:8388ee1976c416731879ac16da0aff3f63b286ffdd57cdeb95f3f2e085687563"},
+    {file = "PyYAML-6.0.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:24471b829b3bf607e04e88d79542a9d48bb037c2267d7927a874e6c205ca7e9a"},
+    {file = "PyYAML-6.0.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7fded462629cfa4b685c5416b949ebad6cec74af5e2d42905d41e257e0869f5"},
+    {file = "PyYAML-6.0.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d84a1718ee396f54f3a086ea0a66d8e552b2ab2017ef8b420e92edbc841c352d"},
+    {file = "PyYAML-6.0.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9056c1ecd25795207ad294bcf39f2db3d845767be0ea6e6a34d856f006006083"},
+    {file = "PyYAML-6.0.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:82d09873e40955485746739bcb8b4586983670466c23382c19cffecbf1fd8706"},
+    {file = "PyYAML-6.0.2-cp38-cp38-win32.whl", hash = "sha256:43fa96a3ca0d6b1812e01ced1044a003533c47f6ee8aca31724f78e93ccc089a"},
+    {file = "PyYAML-6.0.2-cp38-cp38-win_amd64.whl", hash = "sha256:01179a4a8559ab5de078078f37e5c1a30d76bb88519906844fd7bdea1b7729ff"},
+    {file = "PyYAML-6.0.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:688ba32a1cffef67fd2e9398a2efebaea461578b0923624778664cc1c914db5d"},
+    {file = "PyYAML-6.0.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a8786accb172bd8afb8be14490a16625cbc387036876ab6ba70912730faf8e1f"},
+    {file = "PyYAML-6.0.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d8e03406cac8513435335dbab54c0d385e4a49e4945d2909a581c83647ca0290"},
+    {file = "PyYAML-6.0.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f753120cb8181e736c57ef7636e83f31b9c0d1722c516f7e86cf15b7aa57ff12"},
+    {file = "PyYAML-6.0.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3b1fdb9dc17f5a7677423d508ab4f243a726dea51fa5e70992e59a7411c89d19"},
+    {file = "PyYAML-6.0.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:0b69e4ce7a131fe56b7e4d770c67429700908fc0752af059838b1cfb41960e4e"},
+    {file = "PyYAML-6.0.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:a9f8c2e67970f13b16084e04f134610fd1d374bf477b17ec1599185cf611d725"},
+    {file = "PyYAML-6.0.2-cp39-cp39-win32.whl", hash = "sha256:6395c297d42274772abc367baaa79683958044e5d3835486c16da75d2a694631"},
+    {file = "PyYAML-6.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:39693e1f8320ae4f43943590b49779ffb98acb81f788220ea932a6b6c51004d8"},
+    {file = "pyyaml-6.0.2.tar.gz", hash = "sha256:d584d9ec91ad65861cc08d42e834324ef890a082e591037abe114850ff7bbc3e"},
+]
+
 [[package]]
 name = "referencing"
 version = "0.36.2"
@ -568,4 +630,4 @@ zstd = ["zstandard (>=0.18.0)"]
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.10"
-content-hash = "6b29791896ec1680e2c841ac42e835c1bada672b056d8208ab24388f70f9badb"
+content-hash = "d02e43f0884294d48736e1b8df248f47af480baffcbb7a0194da4e16cc1ea502"
--- a/arch/tools/pyproject.toml
+++ b/arch/tools/pyproject.toml
@ -17,6 +17,7 @@ jsonschema = "^4.23.0"
 setuptools = "75.5.0"
 docker = "^7.1.0"
 python-dotenv = "^1.0.1"
+pyyaml = "^6.0.2"

 [tool.poetry.scripts]
 archgw = "cli.main:main"
--- a/crates/common/src/configuration.rs
+++ b/crates/common/src/configuration.rs
@ -9,7 +9,6 @@ use crate::api::open_ai::{
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct Configuration {
    pub version: String,
-    pub listener: Listener,
    pub endpoints: Option<HashMap<String, Endpoint>>,
    pub llm_providers: Vec<LlmProvider>,
    pub overrides: Option<Overrides>,
@ -48,32 +47,6 @@ pub struct ErrorTargetDetail {
    pub endpoint: Option<EndpointDetails>,
 }

-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct Listener {
-    pub address: String,
-    pub port: u16,
-    pub message_format: MessageFormat,
-    // pub connect_timeout: Option<DurationString>,
-}
-
-impl Default for Listener {
-    fn default() -> Self {
-        Listener {
-            address: "".to_string(),
-            port: 0,
-            message_format: MessageFormat::default(),
-            // connect_timeout: None,
-        }
-    }
-}
-
-#[derive(Debug, Clone, Serialize, Deserialize, Default)]
-pub enum MessageFormat {
-    #[serde(rename = "huggingface")]
-    #[default]
-    Huggingface,
-}
-
 #[derive(Debug, Clone, Serialize, Deserialize, Default)]
 pub struct PromptGuards {
    pub input_guards: HashMap<GuardType, GuardOptions>,
--- a/crates/common/src/consts.rs
+++ b/crates/common/src/consts.rs
@ -3,10 +3,10 @@ pub const SYSTEM_ROLE: &str = "system";
 pub const USER_ROLE: &str = "user";
 pub const TOOL_ROLE: &str = "tool";
 pub const ASSISTANT_ROLE: &str = "assistant";
-pub const ARCH_FC_REQUEST_TIMEOUT_MS: u64 = 120000; // 2 minutes
-pub const DEFAULT_TARGET_REQUEST_TIMEOUT_MS: u64 = 120000; // 2 minutes
-pub const API_REQUEST_TIMEOUT_MS: u64 = 120000; // 2 minutes
-pub const MODEL_SERVER_REQUEST_TIMEOUT_MS: u64 = 120000; // 2 minutes
+pub const ARCH_FC_REQUEST_TIMEOUT_MS: u64 = 30000; // 30 seconds
+pub const DEFAULT_TARGET_REQUEST_TIMEOUT_MS: u64 = 30000; // 30 seconds
+pub const API_REQUEST_TIMEOUT_MS: u64 = 30000; // 30 seconds
+pub const MODEL_SERVER_REQUEST_TIMEOUT_MS: u64 = 30000; // 30 seconds
 pub const MODEL_SERVER_NAME: &str = "model_server";
 pub const ARCH_ROUTING_HEADER: &str = "x-arch-llm-provider";
 pub const MESSAGES_KEY: &str = "messages";
--- a/demos/acm_k8s/arch_config_openshift.yaml
+++ b/demos/acm_k8s/arch_config_openshift.yaml
@ -1,23 +1,24 @@
 version: "0.1-beta"

-listener:
-  address: 0.0.0.0
-  port: 10000
-  message_format: huggingface
-  connect_timeout: 0.005s
+listeners:
+  prompt_gateway:
+    address: 0.0.0.0
+    message_format: openai
+    timeout: 30s

 overrides:
  # confidence threshold for prompt target intent matching
  prompt_target_intent_matching_threshold: 0.6
+  optimize_context_window: true

 endpoints:
  acm_service:
    endpoint: host.docker.internal:8001
-    connect_timeout: 0.005s
+    connect_timeout: 0.25s
    http_host: localhost
  local_proxy_service:
    endpoint: host.docker.internal:8002
-    connect_timeout: 0.005s
+    connect_timeout: 0.25s
    http_host: localhost

 llm_providers:
--- a/demos/shared/jaeger/Dockerfile
+++ b/demos/shared/jaeger/Dockerfile
@ -1,4 +1,4 @@
-FROM jaegertracing/all-in-one:1.62.0
+FROM jaegertracing/jaeger:2.3.0
 HEALTHCHECK \
    --interval=1s \
    --timeout=1s \
--- a/docs/source/concepts/includes/arch_config.yaml
+++ b/docs/source/concepts/includes/arch_config.yaml
@ -13,7 +13,6 @@ llm_providers:
    access_key: $OPENAI_API_KEY
    model: gpt-4o
    default: true
-    stream: true

 # default system prompt used by all prompt targets
 system_prompt: You are a network assistant that just offers facts; not advice on manufacturers or purchasing decisions.
--- a/docs/source/guides/includes/arch_config.yaml
+++ b/docs/source/guides/includes/arch_config.yaml
@ -13,7 +13,6 @@ llm_providers:
    access_key: $OPENAI_API_KEY
    model: gpt-4o
    default: true
-    stream: true

 # default system prompt used by all prompt targets
 system_prompt: You are a network assistant that just offers facts; not advice on manufacturers or purchasing decisions.
--- a/docs/source/resources/includes/arch_config_full_reference.yaml
+++ b/docs/source/resources/includes/arch_config_full_reference.yaml
@ -1,16 +1,14 @@
 version: v0.1

-listener:
-  address: 0.0.0.0 # or 127.0.0.1
-  port: 10000
-  # Defines how Arch should parse the content from application/json or text/pain Content-type in the http request
-  message_format: huggingface
-  common_tls_context: # If you configure port 443, you'll need to update the listener with your TLS certificates
-    tls_certificates:
-      - certificate_chain:
-          filename: /etc/certs/cert.pem
-        private_key:
-          filename: /etc/certs/key.pem
+listeners:
+  prompt_gateway:
+    address: 0.0.0.0
+    port: 10000
+    message_format: openai
+    timeout: 5s
+  llm_gateway:
+    address: 0.0.0.0
+    port: 12000

 # Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem.
 endpoints:
@ -35,7 +33,6 @@ llm_providers:
    access_key: $OPENAI_API_KEY
    model: gpt-4o
    default: true
-    stream: true
    rate_limits:
      selector: #optional headers, to add rate limiting based on http headers like JWT tokens or API keys
        http_header: