Update arch_config and add tests for arch config file (#407)

This commit is contained in:
Adil Hafeez 2025-02-14 19:28:10 -08:00 committed by GitHub
parent d0a783cca8
commit e40b13be05
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
31 changed files with 379 additions and 212 deletions

View file

@ -1,10 +1,11 @@
version: v0.1
listener:
address: 0.0.0.0 # or 127.0.0.1
port: 10000
# Defines how Arch should parse the content from application/json or text/pain Content-type in the http request
message_format: huggingface
listeners:
ingress_traffic:
address: 0.0.0.0
port: 10000
message_format: openai
timeout: 30s
# Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way
llm_providers:
@ -13,7 +14,6 @@ llm_providers:
access_key: $OPENAI_API_KEY
model: gpt-4o
default: true
stream: true
# default system prompt used by all prompt targets
system_prompt: You are a network assistant that just offers facts; not advice on manufacturers or purchasing decisions.
@ -52,11 +52,6 @@ prompt_targets:
default: false
enum: [true, false]
error_target:
endpoint:
name: error_target_1
path: /error
# Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem.
endpoints:
app_server:

View file

@ -42,11 +42,12 @@ Create ``arch_config.yaml`` file with the following content:
version: v0.1
listener:
address: 0.0.0.0
port: 10000
message_format: huggingface
connect_timeout: 0.005s
listeners:
ingress_traffic:
address: 0.0.0.0
port: 10000
message_format: openai
timeout: 30s
llm_providers:
- name: gpt-4o
@ -144,22 +145,23 @@ Create ``arch_config.yaml`` file with the following content:
version: v0.1
listener:
address: 0.0.0.0
port: 10000
message_format: huggingface
connect_timeout: 0.005s
listeners:
egress_traffic:
address: 0.0.0.0
port: 12000
message_format: openai
timeout: 30s
llm_providers:
- name: gpt-4o
access_key: $OPENAI_API_KEY
provider: openai
provider_interface: openai
model: gpt-4o
default: true
- name: ministral-3b
access_key: $MISTRAL_API_KEY
provider: mistral
provider_interface: openai
model: ministral-3b-latest
Step 2. Start arch gateway

View file

@ -1,10 +1,11 @@
version: v0.1
listener:
address: 0.0.0.0 # or 127.0.0.1
port: 10000
# Defines how Arch should parse the content from application/json or text/pain Content-type in the http request
message_format: huggingface
listeners:
ingress_traffic:
address: 0.0.0.0
port: 10000
message_format: openai
timeout: 30s
# Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way
llm_providers:
@ -13,7 +14,6 @@ llm_providers:
access_key: $OPENAI_API_KEY
model: gpt-4o
default: true
stream: true
# default system prompt used by all prompt targets
system_prompt: You are a network assistant that just offers facts; not advice on manufacturers or purchasing decisions.
@ -54,11 +54,6 @@ prompt_targets:
default: false
enum: [true, false]
error_target:
endpoint:
name: error_target_1
path: /error
# Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem.
endpoints:
app_server:

View file

@ -1,16 +1,16 @@
version: v0.1
listener:
address: 0.0.0.0 # or 127.0.0.1
port: 10000
# Defines how Arch should parse the content from application/json or text/pain Content-type in the http request
message_format: huggingface
common_tls_context: # If you configure port 443, you'll need to update the listener with your TLS certificates
tls_certificates:
- certificate_chain:
filename: /etc/certs/cert.pem
private_key:
filename: /etc/certs/key.pem
listeners:
ingress_traffic:
address: 0.0.0.0
port: 10000
message_format: openai
timeout: 5s
egress_traffic:
address: 0.0.0.0
port: 12000
message_format: openai
timeout: 5s
# Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem.
endpoints:
@ -35,15 +35,6 @@ llm_providers:
access_key: $OPENAI_API_KEY
model: gpt-4o
default: true
stream: true
rate_limits:
selector: #optional headers, to add rate limiting based on http headers like JWT tokens or API keys
http_header:
name: Authorization
value: "" # Empty value means each separate value has a separate limit
limit:
tokens: 100000 # Tokens per unit
unit: minute
- name: Mistral8x7b
provider_interface: openai
@ -99,11 +90,6 @@ prompt_targets:
default: false
enum: [true, false]
error_target:
endpoint:
name: error_target_1
path: /error
tracing:
# sampling rate. Note by default Arch works on OpenTelemetry compatible tracing.
sampling_rate: 0.1