mirror of
https://github.com/katanemo/plano.git
synced 2026-04-25 00:36:34 +02:00
Update arch_config and add tests for arch config file (#407)
This commit is contained in:
parent
d0a783cca8
commit
e40b13be05
31 changed files with 379 additions and 212 deletions
|
|
@ -1,16 +1,16 @@
|
|||
version: v0.1
|
||||
|
||||
listener:
|
||||
address: 0.0.0.0 # or 127.0.0.1
|
||||
port: 10000
|
||||
# Defines how Arch should parse the content from application/json or text/pain Content-type in the http request
|
||||
message_format: huggingface
|
||||
common_tls_context: # If you configure port 443, you'll need to update the listener with your TLS certificates
|
||||
tls_certificates:
|
||||
- certificate_chain:
|
||||
filename: /etc/certs/cert.pem
|
||||
private_key:
|
||||
filename: /etc/certs/key.pem
|
||||
listeners:
|
||||
ingress_traffic:
|
||||
address: 0.0.0.0
|
||||
port: 10000
|
||||
message_format: openai
|
||||
timeout: 5s
|
||||
egress_traffic:
|
||||
address: 0.0.0.0
|
||||
port: 12000
|
||||
message_format: openai
|
||||
timeout: 5s
|
||||
|
||||
# Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem.
|
||||
endpoints:
|
||||
|
|
@ -35,15 +35,6 @@ llm_providers:
|
|||
access_key: $OPENAI_API_KEY
|
||||
model: gpt-4o
|
||||
default: true
|
||||
stream: true
|
||||
rate_limits:
|
||||
selector: #optional headers, to add rate limiting based on http headers like JWT tokens or API keys
|
||||
http_header:
|
||||
name: Authorization
|
||||
value: "" # Empty value means each separate value has a separate limit
|
||||
limit:
|
||||
tokens: 100000 # Tokens per unit
|
||||
unit: minute
|
||||
|
||||
- name: Mistral8x7b
|
||||
provider_interface: openai
|
||||
|
|
@ -99,11 +90,6 @@ prompt_targets:
|
|||
default: false
|
||||
enum: [true, false]
|
||||
|
||||
error_target:
|
||||
endpoint:
|
||||
name: error_target_1
|
||||
path: /error
|
||||
|
||||
tracing:
|
||||
# sampling rate. Note by default Arch works on OpenTelemetry compatible tracing.
|
||||
sampling_rate: 0.1
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue