fix tests

2026-06-17 15:25:17 +02:00 · 2025-02-12 17:44:51 -08:00 · 2025-02-12 17:44:51 -08:00 · 57ffaf7431
commit 57ffaf7431
parent bc329a4421
15 changed files with 94 additions and 94 deletions
--- a/arch/arch_config_schema.yaml
+++ b/arch/arch_config_schema.yaml
@ -124,7 +124,10 @@ properties:
              required:
                type: boolean
              default:
-                type: string
+                anyOf:
+                  - type: string
+                  - type: integer
+                  - type: boolean
              description:
                type: string
              type:
@ -132,7 +135,10 @@ properties:
              enum:
                type: array
                items:
-                  type: string
+                  anyOf:
+                    - type: string
+                    - type: integer
+                    - type: boolean
              in_path:
                type: boolean
              format:
--- a/demos/samples_java/weather_forcecast_service/arch_config.yaml
+++ b/demos/samples_java/weather_forcecast_service/arch_config.yaml
@ -1,8 +1,10 @@
 version: v0.1
-listener:
-  address: 127.0.0.1
-  port: 10000 #If you configure port 443, you'll need to update the listener with tls_certificates
-  message_format: huggingface
+listeners:
+  prompt_gateway:
+    address: 0.0.0.0
+    port: 10000
+    message_format: openai
+    timeout: 30s

 # Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way
 llm_providers:
--- a/demos/samples_python/human_resources_agent/arch_config.yaml
+++ b/demos/samples_python/human_resources_agent/arch_config.yaml
@ -1,8 +1,10 @@
 version: v0.1
-listener:
-  address: 127.0.0.1
-  port: 8080 #If you configure port 443, you'll need to update the listener with tls_certificates
-  message_format: huggingface
+listeners:
+  prompt_gateway:
+    address: 0.0.0.0
+    port: 10000
+    message_format: openai
+    timeout: 30s

 # Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way
 llm_providers:
--- a/demos/samples_python/multi_turn_rag_agent/arch_config.yaml
+++ b/demos/samples_python/multi_turn_rag_agent/arch_config.yaml
@ -1,10 +1,11 @@
 version: v0.1

-listener:
-  address: 127.0.0.1
-  port: 10000
-  message_format: huggingface
-  connect_timeout: 0.005s
+listeners:
+  prompt_gateway:
+    address: 0.0.0.0
+    port: 10000
+    message_format: openai
+    timeout: 30s

 endpoints:
  rag_energy_source_agent:
--- a/demos/samples_python/network_switch_operator_agent/arch_config.yaml
+++ b/demos/samples_python/network_switch_operator_agent/arch_config.yaml
@ -1,8 +1,10 @@
 version: v0.1
-listener:
-  address: 127.0.0.1
-  port: 8080 #If you configure port 443, you'll need to update the listener with tls_certificates
-  message_format: huggingface
+listeners:
+  prompt_gateway:
+    address: 0.0.0.0
+    port: 10000
+    message_format: openai
+    timeout: 30s

 # Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way
 llm_providers:
--- a/demos/samples_python/stock_quote/arch_config.yaml
+++ b/demos/samples_python/stock_quote/arch_config.yaml
@ -1,11 +1,11 @@
 version: v0.1

-listener:
-  address: 0.0.0.0
-  port: 10000
-  message_format: huggingface
-  connect_timeout: 0.005s
-
+listeners:
+  prompt_gateway:
+    address: 0.0.0.0
+    port: 10000
+    message_format: openai
+    timeout: 30s
 llm_providers:
  - name: gpt-4o
    access_key: $OPENAI_API_KEY
--- a/demos/samples_python/weather_forecast/arch_config.yaml
+++ b/demos/samples_python/weather_forecast/arch_config.yaml
@ -1,10 +1,11 @@
 version: "0.1-beta"

-listener:
-  address: 0.0.0.0
-  port: 10000
-  message_format: huggingface
-  connect_timeout: 0.005s
+listeners:
+  prompt_gateway:
+    address: 0.0.0.0
+    port: 10000
+    message_format: openai
+    timeout: 30s

 endpoints:
  weather_forecast_service:
--- a/demos/use_cases/llm_routing/arch_config.yaml
+++ b/demos/use_cases/llm_routing/arch_config.yaml
@ -1,10 +1,11 @@
 version: "0.1-beta"

-listener:
-  address: 0.0.0.0
-  port: 10000
-  message_format: huggingface
-  connect_timeout: 0.005s
+listeners:
+  prompt_gateway:
+    address: 0.0.0.0
+    port: 10000
+    message_format: openai
+    timeout: 30s

 llm_providers:
  - name: gpt-4o-mini
--- a/demos/use_cases/ollama/arch_config.yaml
+++ b/demos/use_cases/ollama/arch_config.yaml
@ -1,10 +1,11 @@
 version: v0.1

-listener:
-  address: 0.0.0.0
-  port: 10000
-  message_format: huggingface
-  connect_timeout: 0.005s
+listeners:
+  prompt_gateway:
+    address: 0.0.0.0
+    port: 10000
+    message_format: openai
+    timeout: 30s

 llm_providers:

--- a/demos/use_cases/spotify_bearer_auth/arch_config.yaml
+++ b/demos/use_cases/spotify_bearer_auth/arch_config.yaml
@ -1,8 +1,10 @@
 version: v0.1
-listener:
-  address: 127.0.0.1
-  port: 8080 #If you configure port 443, you'll need to update the listener with tls_certificates
-  message_format: huggingface
+listeners:
+  prompt_gateway:
+    address: 0.0.0.0
+    port: 10000
+    message_format: openai
+    timeout: 30s

 overrides:
  optimize_context_window: true
--- a/docs/source/concepts/includes/arch_config.yaml
+++ b/docs/source/concepts/includes/arch_config.yaml
@ -1,10 +1,11 @@
 version: v0.1

-listener:
-  address: 0.0.0.0 # or 127.0.0.1
-  port: 10000
-  # Defines how Arch should parse the content from application/json or text/pain Content-type in the http request
-  message_format: huggingface
+listeners:
+  prompt_gateway:
+    address: 0.0.0.0
+    port: 10000
+    message_format: openai
+    timeout: 30s

 # Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way
 llm_providers:
@ -51,11 +52,6 @@ prompt_targets:
        default: false
        enum: [true, false]

-error_target:
-  endpoint:
-    name: error_target_1
-    path: /error
-
 # Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem.
 endpoints:
  app_server:
--- a/docs/source/get_started/quickstart.rst
+++ b/docs/source/get_started/quickstart.rst
@ -42,11 +42,12 @@ Create ``arch_config.yaml`` file with the following content:

   version: v0.1

-   listener:
-     address: 0.0.0.0
-     port: 10000
-     message_format: huggingface
-     connect_timeout: 0.005s
+  listeners:
+    prompt_gateway:
+      address: 0.0.0.0
+      port: 10000
+      message_format: openai
+      timeout: 30s

   llm_providers:
     - name: gpt-4o
@ -144,22 +145,23 @@ Create ``arch_config.yaml`` file with the following content:

   version: v0.1

-   listener:
-     address: 0.0.0.0
-     port: 10000
-     message_format: huggingface
-     connect_timeout: 0.005s
+  listeners:
+    prompt_gateway:
+      address: 0.0.0.0
+      port: 10000
+      message_format: openai
+      timeout: 30s

   llm_providers:
     - name: gpt-4o
       access_key: $OPENAI_API_KEY
-       provider: openai
+       provider_interface: openai
       model: gpt-4o
       default: true

     - name: ministral-3b
       access_key: $MISTRAL_API_KEY
-       provider: mistral
+       provider_interface: openai
       model: ministral-3b-latest

 Step 2. Start arch gateway
--- a/docs/source/guides/includes/arch_config.yaml
+++ b/docs/source/guides/includes/arch_config.yaml
@ -1,10 +1,11 @@
 version: v0.1

-listener:
-  address: 0.0.0.0 # or 127.0.0.1
-  port: 10000
-  # Defines how Arch should parse the content from application/json or text/pain Content-type in the http request
-  message_format: huggingface
+listeners:
+  prompt_gateway:
+    address: 0.0.0.0
+    port: 10000
+    message_format: openai
+    timeout: 30s

 # Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way
 llm_providers:
@ -53,11 +54,6 @@ prompt_targets:
        default: false
        enum: [true, false]

-error_target:
-  endpoint:
-    name: error_target_1
-    path: /error
-
 # Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem.
 endpoints:
  app_server:
--- a/docs/source/resources/includes/arch_config_full_reference.yaml
+++ b/docs/source/resources/includes/arch_config_full_reference.yaml
@ -33,14 +33,6 @@ llm_providers:
    access_key: $OPENAI_API_KEY
    model: gpt-4o
    default: true
-    rate_limits:
-      selector: #optional headers, to add rate limiting based on http headers like JWT tokens or API keys
-        http_header:
-          name: Authorization
-          value: "" # Empty value means each separate value has a separate limit
-      limit:
-        tokens: 100000 # Tokens per unit
-        unit: minute

  - name: Mistral8x7b
    provider_interface: openai
@ -96,11 +88,6 @@ prompt_targets:
        default: false
        enum: [true, false]

-error_target:
-  endpoint:
-    name: error_target_1
-    path: /error
-
 tracing:
  # sampling rate. Note by default Arch works on OpenTelemetry compatible tracing.
  sampling_rate: 0.1
--- a/tests/archgw/arch_config.yaml
+++ b/tests/archgw/arch_config.yaml
@ -1,10 +1,11 @@
 version: "0.1-beta"

-listener:
-  address: 0.0.0.0
-  port: 10000
-  message_format: huggingface
-  connect_timeout: 0.005s
+listeners:
+  prompt_gateway:
+    address: 0.0.0.0
+    port: 10000
+    message_format: openai
+    timeout: 30s

 endpoints:
  weather_forecast_service: