mirror of
https://github.com/katanemo/plano.git
synced 2026-06-17 15:25:17 +02:00
fix more
This commit is contained in:
parent
d2ad943f63
commit
9cb04756c5
13 changed files with 181 additions and 90 deletions
|
|
@ -1,16 +1,14 @@
|
|||
version: v0.1
|
||||
|
||||
listener:
|
||||
address: 0.0.0.0 # or 127.0.0.1
|
||||
port: 10000
|
||||
# Defines how Arch should parse the content from application/json or text/pain Content-type in the http request
|
||||
message_format: huggingface
|
||||
common_tls_context: # If you configure port 443, you'll need to update the listener with your TLS certificates
|
||||
tls_certificates:
|
||||
- certificate_chain:
|
||||
filename: /etc/certs/cert.pem
|
||||
private_key:
|
||||
filename: /etc/certs/key.pem
|
||||
listeners:
|
||||
prompt_gateway:
|
||||
address: 0.0.0.0
|
||||
port: 10000
|
||||
message_format: openai
|
||||
timeout: 5s
|
||||
llm_gateway:
|
||||
address: 0.0.0.0
|
||||
port: 12000
|
||||
|
||||
# Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem.
|
||||
endpoints:
|
||||
|
|
@ -35,7 +33,6 @@ llm_providers:
|
|||
access_key: $OPENAI_API_KEY
|
||||
model: gpt-4o
|
||||
default: true
|
||||
stream: true
|
||||
rate_limits:
|
||||
selector: #optional headers, to add rate limiting based on http headers like JWT tokens or API keys
|
||||
http_header:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue