diff --git a/arch/arch_config_schema.yaml b/arch/arch_config_schema.yaml index d9c9349e..b76efc15 100644 --- a/arch/arch_config_schema.yaml +++ b/arch/arch_config_schema.yaml @@ -124,7 +124,10 @@ properties: required: type: boolean default: - type: string + anyOf: + - type: string + - type: integer + - type: boolean description: type: string type: @@ -132,7 +135,10 @@ properties: enum: type: array items: - type: string + anyOf: + - type: string + - type: integer + - type: boolean in_path: type: boolean format: diff --git a/demos/samples_java/weather_forcecast_service/arch_config.yaml b/demos/samples_java/weather_forcecast_service/arch_config.yaml index 10c22819..8727a606 100644 --- a/demos/samples_java/weather_forcecast_service/arch_config.yaml +++ b/demos/samples_java/weather_forcecast_service/arch_config.yaml @@ -1,8 +1,10 @@ version: v0.1 -listener: - address: 127.0.0.1 - port: 10000 #If you configure port 443, you'll need to update the listener with tls_certificates - message_format: huggingface +listeners: + prompt_gateway: + address: 0.0.0.0 + port: 10000 + message_format: openai + timeout: 30s # Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way llm_providers: diff --git a/demos/samples_python/human_resources_agent/arch_config.yaml b/demos/samples_python/human_resources_agent/arch_config.yaml index 09264821..29978db5 100644 --- a/demos/samples_python/human_resources_agent/arch_config.yaml +++ b/demos/samples_python/human_resources_agent/arch_config.yaml @@ -1,8 +1,10 @@ version: v0.1 -listener: - address: 127.0.0.1 - port: 8080 #If you configure port 443, you'll need to update the listener with tls_certificates - message_format: huggingface +listeners: + prompt_gateway: + address: 0.0.0.0 + port: 10000 + message_format: openai + timeout: 30s # Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way llm_providers: diff --git a/demos/samples_python/multi_turn_rag_agent/arch_config.yaml b/demos/samples_python/multi_turn_rag_agent/arch_config.yaml index 1399965f..4c940a6c 100644 --- a/demos/samples_python/multi_turn_rag_agent/arch_config.yaml +++ b/demos/samples_python/multi_turn_rag_agent/arch_config.yaml @@ -1,10 +1,11 @@ version: v0.1 -listener: - address: 127.0.0.1 - port: 10000 - message_format: huggingface - connect_timeout: 0.005s +listeners: + prompt_gateway: + address: 0.0.0.0 + port: 10000 + message_format: openai + timeout: 30s endpoints: rag_energy_source_agent: diff --git a/demos/samples_python/network_switch_operator_agent/arch_config.yaml b/demos/samples_python/network_switch_operator_agent/arch_config.yaml index ad3bfae5..9c29fbae 100644 --- a/demos/samples_python/network_switch_operator_agent/arch_config.yaml +++ b/demos/samples_python/network_switch_operator_agent/arch_config.yaml @@ -1,8 +1,10 @@ version: v0.1 -listener: - address: 127.0.0.1 - port: 8080 #If you configure port 443, you'll need to update the listener with tls_certificates - message_format: huggingface +listeners: + prompt_gateway: + address: 0.0.0.0 + port: 10000 + message_format: openai + timeout: 30s # Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way llm_providers: diff --git a/demos/samples_python/stock_quote/arch_config.yaml b/demos/samples_python/stock_quote/arch_config.yaml index c763d4ca..7c38527b 100644 --- a/demos/samples_python/stock_quote/arch_config.yaml +++ b/demos/samples_python/stock_quote/arch_config.yaml @@ -1,11 +1,11 @@ version: v0.1 -listener: - address: 0.0.0.0 - port: 10000 - message_format: huggingface - connect_timeout: 0.005s - +listeners: + prompt_gateway: + address: 0.0.0.0 + port: 10000 + message_format: openai + timeout: 30s llm_providers: - name: gpt-4o access_key: $OPENAI_API_KEY diff --git a/demos/samples_python/weather_forecast/arch_config.yaml b/demos/samples_python/weather_forecast/arch_config.yaml index 94a6bdfb..a99f8002 100644 --- a/demos/samples_python/weather_forecast/arch_config.yaml +++ b/demos/samples_python/weather_forecast/arch_config.yaml @@ -1,10 +1,11 @@ version: "0.1-beta" -listener: - address: 0.0.0.0 - port: 10000 - message_format: huggingface - connect_timeout: 0.005s +listeners: + prompt_gateway: + address: 0.0.0.0 + port: 10000 + message_format: openai + timeout: 30s endpoints: weather_forecast_service: diff --git a/demos/use_cases/llm_routing/arch_config.yaml b/demos/use_cases/llm_routing/arch_config.yaml index f7ce78cd..250ab279 100644 --- a/demos/use_cases/llm_routing/arch_config.yaml +++ b/demos/use_cases/llm_routing/arch_config.yaml @@ -1,10 +1,11 @@ version: "0.1-beta" -listener: - address: 0.0.0.0 - port: 10000 - message_format: huggingface - connect_timeout: 0.005s +listeners: + prompt_gateway: + address: 0.0.0.0 + port: 10000 + message_format: openai + timeout: 30s llm_providers: - name: gpt-4o-mini diff --git a/demos/use_cases/ollama/arch_config.yaml b/demos/use_cases/ollama/arch_config.yaml index 5cb77750..1111b650 100644 --- a/demos/use_cases/ollama/arch_config.yaml +++ b/demos/use_cases/ollama/arch_config.yaml @@ -1,10 +1,11 @@ version: v0.1 -listener: - address: 0.0.0.0 - port: 10000 - message_format: huggingface - connect_timeout: 0.005s +listeners: + prompt_gateway: + address: 0.0.0.0 + port: 10000 + message_format: openai + timeout: 30s llm_providers: diff --git a/demos/use_cases/spotify_bearer_auth/arch_config.yaml b/demos/use_cases/spotify_bearer_auth/arch_config.yaml index a259a539..8dd13eb5 100644 --- a/demos/use_cases/spotify_bearer_auth/arch_config.yaml +++ b/demos/use_cases/spotify_bearer_auth/arch_config.yaml @@ -1,8 +1,10 @@ version: v0.1 -listener: - address: 127.0.0.1 - port: 8080 #If you configure port 443, you'll need to update the listener with tls_certificates - message_format: huggingface +listeners: + prompt_gateway: + address: 0.0.0.0 + port: 10000 + message_format: openai + timeout: 30s overrides: optimize_context_window: true diff --git a/docs/source/concepts/includes/arch_config.yaml b/docs/source/concepts/includes/arch_config.yaml index 93164401..dcf64c94 100644 --- a/docs/source/concepts/includes/arch_config.yaml +++ b/docs/source/concepts/includes/arch_config.yaml @@ -1,10 +1,11 @@ version: v0.1 -listener: - address: 0.0.0.0 # or 127.0.0.1 - port: 10000 - # Defines how Arch should parse the content from application/json or text/pain Content-type in the http request - message_format: huggingface +listeners: + prompt_gateway: + address: 0.0.0.0 + port: 10000 + message_format: openai + timeout: 30s # Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way llm_providers: @@ -51,11 +52,6 @@ prompt_targets: default: false enum: [true, false] -error_target: - endpoint: - name: error_target_1 - path: /error - # Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem. endpoints: app_server: diff --git a/docs/source/get_started/quickstart.rst b/docs/source/get_started/quickstart.rst index 268bf45d..22bd532a 100644 --- a/docs/source/get_started/quickstart.rst +++ b/docs/source/get_started/quickstart.rst @@ -42,11 +42,12 @@ Create ``arch_config.yaml`` file with the following content: version: v0.1 - listener: - address: 0.0.0.0 - port: 10000 - message_format: huggingface - connect_timeout: 0.005s + listeners: + prompt_gateway: + address: 0.0.0.0 + port: 10000 + message_format: openai + timeout: 30s llm_providers: - name: gpt-4o @@ -144,22 +145,23 @@ Create ``arch_config.yaml`` file with the following content: version: v0.1 - listener: - address: 0.0.0.0 - port: 10000 - message_format: huggingface - connect_timeout: 0.005s + listeners: + prompt_gateway: + address: 0.0.0.0 + port: 10000 + message_format: openai + timeout: 30s llm_providers: - name: gpt-4o access_key: $OPENAI_API_KEY - provider: openai + provider_interface: openai model: gpt-4o default: true - name: ministral-3b access_key: $MISTRAL_API_KEY - provider: mistral + provider_interface: openai model: ministral-3b-latest Step 2. Start arch gateway diff --git a/docs/source/guides/includes/arch_config.yaml b/docs/source/guides/includes/arch_config.yaml index 33c1748c..9899fb26 100644 --- a/docs/source/guides/includes/arch_config.yaml +++ b/docs/source/guides/includes/arch_config.yaml @@ -1,10 +1,11 @@ version: v0.1 -listener: - address: 0.0.0.0 # or 127.0.0.1 - port: 10000 - # Defines how Arch should parse the content from application/json or text/pain Content-type in the http request - message_format: huggingface +listeners: + prompt_gateway: + address: 0.0.0.0 + port: 10000 + message_format: openai + timeout: 30s # Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way llm_providers: @@ -53,11 +54,6 @@ prompt_targets: default: false enum: [true, false] -error_target: - endpoint: - name: error_target_1 - path: /error - # Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem. endpoints: app_server: diff --git a/docs/source/resources/includes/arch_config_full_reference.yaml b/docs/source/resources/includes/arch_config_full_reference.yaml index 2389389c..dca180e3 100644 --- a/docs/source/resources/includes/arch_config_full_reference.yaml +++ b/docs/source/resources/includes/arch_config_full_reference.yaml @@ -33,14 +33,6 @@ llm_providers: access_key: $OPENAI_API_KEY model: gpt-4o default: true - rate_limits: - selector: #optional headers, to add rate limiting based on http headers like JWT tokens or API keys - http_header: - name: Authorization - value: "" # Empty value means each separate value has a separate limit - limit: - tokens: 100000 # Tokens per unit - unit: minute - name: Mistral8x7b provider_interface: openai @@ -96,11 +88,6 @@ prompt_targets: default: false enum: [true, false] -error_target: - endpoint: - name: error_target_1 - path: /error - tracing: # sampling rate. Note by default Arch works on OpenTelemetry compatible tracing. sampling_rate: 0.1 diff --git a/tests/archgw/arch_config.yaml b/tests/archgw/arch_config.yaml index 2c3d85d5..52fe0c30 100644 --- a/tests/archgw/arch_config.yaml +++ b/tests/archgw/arch_config.yaml @@ -1,10 +1,11 @@ version: "0.1-beta" -listener: - address: 0.0.0.0 - port: 10000 - message_format: huggingface - connect_timeout: 0.005s +listeners: + prompt_gateway: + address: 0.0.0.0 + port: 10000 + message_format: openai + timeout: 30s endpoints: weather_forecast_service: