Update arch_config and add tests for arch config file (#407)

2026-05-01 20:03:40 +02:00 · 2025-02-14 19:28:10 -08:00 · 2025-02-14 19:28:10 -08:00 · e40b13be05
commit e40b13be05
parent d0a783cca8
31 changed files with 379 additions and 212 deletions
--- a/docs/source/concepts/includes/arch_config.yaml
+++ b/docs/source/concepts/includes/arch_config.yaml
@ -1,10 +1,11 @@
 version: v0.1

-listener:
-  address: 0.0.0.0 # or 127.0.0.1
-  port: 10000
-  # Defines how Arch should parse the content from application/json or text/pain Content-type in the http request
-  message_format: huggingface
+listeners:
+  ingress_traffic:
+    address: 0.0.0.0
+    port: 10000
+    message_format: openai
+    timeout: 30s

 # Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way
 llm_providers:
@ -13,7 +14,6 @@ llm_providers:
    access_key: $OPENAI_API_KEY
    model: gpt-4o
    default: true
-    stream: true

 # default system prompt used by all prompt targets
 system_prompt: You are a network assistant that just offers facts; not advice on manufacturers or purchasing decisions.
@ -52,11 +52,6 @@ prompt_targets:
        default: false
        enum: [true, false]

-error_target:
-  endpoint:
-    name: error_target_1
-    path: /error
-
 # Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem.
 endpoints:
  app_server:
--- a/docs/source/get_started/quickstart.rst
+++ b/docs/source/get_started/quickstart.rst
@ -42,11 +42,12 @@ Create ``arch_config.yaml`` file with the following content:

   version: v0.1

-   listener:
-     address: 0.0.0.0
-     port: 10000
-     message_format: huggingface
-     connect_timeout: 0.005s
+  listeners:
+    ingress_traffic:
+      address: 0.0.0.0
+      port: 10000
+      message_format: openai
+      timeout: 30s

   llm_providers:
     - name: gpt-4o
@ -144,22 +145,23 @@ Create ``arch_config.yaml`` file with the following content:

   version: v0.1

-   listener:
-     address: 0.0.0.0
-     port: 10000
-     message_format: huggingface
-     connect_timeout: 0.005s
+  listeners:
+    egress_traffic:
+      address: 0.0.0.0
+      port: 12000
+      message_format: openai
+      timeout: 30s

   llm_providers:
     - name: gpt-4o
       access_key: $OPENAI_API_KEY
-       provider: openai
+       provider_interface: openai
       model: gpt-4o
       default: true

     - name: ministral-3b
       access_key: $MISTRAL_API_KEY
-       provider: mistral
+       provider_interface: openai
       model: ministral-3b-latest

 Step 2. Start arch gateway
--- a/docs/source/guides/includes/arch_config.yaml
+++ b/docs/source/guides/includes/arch_config.yaml
@ -1,10 +1,11 @@
 version: v0.1

-listener:
-  address: 0.0.0.0 # or 127.0.0.1
-  port: 10000
-  # Defines how Arch should parse the content from application/json or text/pain Content-type in the http request
-  message_format: huggingface
+listeners:
+  ingress_traffic:
+    address: 0.0.0.0
+    port: 10000
+    message_format: openai
+    timeout: 30s

 # Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way
 llm_providers:
@ -13,7 +14,6 @@ llm_providers:
    access_key: $OPENAI_API_KEY
    model: gpt-4o
    default: true
-    stream: true

 # default system prompt used by all prompt targets
 system_prompt: You are a network assistant that just offers facts; not advice on manufacturers or purchasing decisions.
@ -54,11 +54,6 @@ prompt_targets:
        default: false
        enum: [true, false]

-error_target:
-  endpoint:
-    name: error_target_1
-    path: /error
-
 # Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem.
 endpoints:
  app_server:
--- a/docs/source/resources/includes/arch_config_full_reference.yaml
+++ b/docs/source/resources/includes/arch_config_full_reference.yaml
@ -1,16 +1,16 @@
 version: v0.1

-listener:
-  address: 0.0.0.0 # or 127.0.0.1
-  port: 10000
-  # Defines how Arch should parse the content from application/json or text/pain Content-type in the http request
-  message_format: huggingface
-  common_tls_context: # If you configure port 443, you'll need to update the listener with your TLS certificates
-    tls_certificates:
-      - certificate_chain:
-          filename: /etc/certs/cert.pem
-        private_key:
-          filename: /etc/certs/key.pem
+listeners:
+  ingress_traffic:
+    address: 0.0.0.0
+    port: 10000
+    message_format: openai
+    timeout: 5s
+  egress_traffic:
+    address: 0.0.0.0
+    port: 12000
+    message_format: openai
+    timeout: 5s

 # Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem.
 endpoints:
@ -35,15 +35,6 @@ llm_providers:
    access_key: $OPENAI_API_KEY
    model: gpt-4o
    default: true
-    stream: true
-    rate_limits:
-      selector: #optional headers, to add rate limiting based on http headers like JWT tokens or API keys
-        http_header:
-          name: Authorization
-          value: "" # Empty value means each separate value has a separate limit
-      limit:
-        tokens: 100000 # Tokens per unit
-        unit: minute

  - name: Mistral8x7b
    provider_interface: openai
@ -99,11 +90,6 @@ prompt_targets:
        default: false
        enum: [true, false]

-error_target:
-  endpoint:
-    name: error_target_1
-    path: /error
-
 tracing:
  # sampling rate. Note by default Arch works on OpenTelemetry compatible tracing.
  sampling_rate: 0.1