mirror of
https://github.com/katanemo/plano.git
synced 2026-06-17 15:25:17 +02:00
fix tests
This commit is contained in:
parent
bc329a4421
commit
57ffaf7431
15 changed files with 94 additions and 94 deletions
|
|
@ -124,7 +124,10 @@ properties:
|
|||
required:
|
||||
type: boolean
|
||||
default:
|
||||
type: string
|
||||
anyOf:
|
||||
- type: string
|
||||
- type: integer
|
||||
- type: boolean
|
||||
description:
|
||||
type: string
|
||||
type:
|
||||
|
|
@ -132,7 +135,10 @@ properties:
|
|||
enum:
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
anyOf:
|
||||
- type: string
|
||||
- type: integer
|
||||
- type: boolean
|
||||
in_path:
|
||||
type: boolean
|
||||
format:
|
||||
|
|
|
|||
|
|
@ -1,8 +1,10 @@
|
|||
version: v0.1
|
||||
listener:
|
||||
address: 127.0.0.1
|
||||
port: 10000 #If you configure port 443, you'll need to update the listener with tls_certificates
|
||||
message_format: huggingface
|
||||
listeners:
|
||||
prompt_gateway:
|
||||
address: 0.0.0.0
|
||||
port: 10000
|
||||
message_format: openai
|
||||
timeout: 30s
|
||||
|
||||
# Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way
|
||||
llm_providers:
|
||||
|
|
|
|||
|
|
@ -1,8 +1,10 @@
|
|||
version: v0.1
|
||||
listener:
|
||||
address: 127.0.0.1
|
||||
port: 8080 #If you configure port 443, you'll need to update the listener with tls_certificates
|
||||
message_format: huggingface
|
||||
listeners:
|
||||
prompt_gateway:
|
||||
address: 0.0.0.0
|
||||
port: 10000
|
||||
message_format: openai
|
||||
timeout: 30s
|
||||
|
||||
# Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way
|
||||
llm_providers:
|
||||
|
|
|
|||
|
|
@ -1,10 +1,11 @@
|
|||
version: v0.1
|
||||
|
||||
listener:
|
||||
address: 127.0.0.1
|
||||
port: 10000
|
||||
message_format: huggingface
|
||||
connect_timeout: 0.005s
|
||||
listeners:
|
||||
prompt_gateway:
|
||||
address: 0.0.0.0
|
||||
port: 10000
|
||||
message_format: openai
|
||||
timeout: 30s
|
||||
|
||||
endpoints:
|
||||
rag_energy_source_agent:
|
||||
|
|
|
|||
|
|
@ -1,8 +1,10 @@
|
|||
version: v0.1
|
||||
listener:
|
||||
address: 127.0.0.1
|
||||
port: 8080 #If you configure port 443, you'll need to update the listener with tls_certificates
|
||||
message_format: huggingface
|
||||
listeners:
|
||||
prompt_gateway:
|
||||
address: 0.0.0.0
|
||||
port: 10000
|
||||
message_format: openai
|
||||
timeout: 30s
|
||||
|
||||
# Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way
|
||||
llm_providers:
|
||||
|
|
|
|||
|
|
@ -1,11 +1,11 @@
|
|||
version: v0.1
|
||||
|
||||
listener:
|
||||
address: 0.0.0.0
|
||||
port: 10000
|
||||
message_format: huggingface
|
||||
connect_timeout: 0.005s
|
||||
|
||||
listeners:
|
||||
prompt_gateway:
|
||||
address: 0.0.0.0
|
||||
port: 10000
|
||||
message_format: openai
|
||||
timeout: 30s
|
||||
llm_providers:
|
||||
- name: gpt-4o
|
||||
access_key: $OPENAI_API_KEY
|
||||
|
|
|
|||
|
|
@ -1,10 +1,11 @@
|
|||
version: "0.1-beta"
|
||||
|
||||
listener:
|
||||
address: 0.0.0.0
|
||||
port: 10000
|
||||
message_format: huggingface
|
||||
connect_timeout: 0.005s
|
||||
listeners:
|
||||
prompt_gateway:
|
||||
address: 0.0.0.0
|
||||
port: 10000
|
||||
message_format: openai
|
||||
timeout: 30s
|
||||
|
||||
endpoints:
|
||||
weather_forecast_service:
|
||||
|
|
|
|||
|
|
@ -1,10 +1,11 @@
|
|||
version: "0.1-beta"
|
||||
|
||||
listener:
|
||||
address: 0.0.0.0
|
||||
port: 10000
|
||||
message_format: huggingface
|
||||
connect_timeout: 0.005s
|
||||
listeners:
|
||||
prompt_gateway:
|
||||
address: 0.0.0.0
|
||||
port: 10000
|
||||
message_format: openai
|
||||
timeout: 30s
|
||||
|
||||
llm_providers:
|
||||
- name: gpt-4o-mini
|
||||
|
|
|
|||
|
|
@ -1,10 +1,11 @@
|
|||
version: v0.1
|
||||
|
||||
listener:
|
||||
address: 0.0.0.0
|
||||
port: 10000
|
||||
message_format: huggingface
|
||||
connect_timeout: 0.005s
|
||||
listeners:
|
||||
prompt_gateway:
|
||||
address: 0.0.0.0
|
||||
port: 10000
|
||||
message_format: openai
|
||||
timeout: 30s
|
||||
|
||||
llm_providers:
|
||||
|
||||
|
|
|
|||
|
|
@ -1,8 +1,10 @@
|
|||
version: v0.1
|
||||
listener:
|
||||
address: 127.0.0.1
|
||||
port: 8080 #If you configure port 443, you'll need to update the listener with tls_certificates
|
||||
message_format: huggingface
|
||||
listeners:
|
||||
prompt_gateway:
|
||||
address: 0.0.0.0
|
||||
port: 10000
|
||||
message_format: openai
|
||||
timeout: 30s
|
||||
|
||||
overrides:
|
||||
optimize_context_window: true
|
||||
|
|
|
|||
|
|
@ -1,10 +1,11 @@
|
|||
version: v0.1
|
||||
|
||||
listener:
|
||||
address: 0.0.0.0 # or 127.0.0.1
|
||||
port: 10000
|
||||
# Defines how Arch should parse the content from application/json or text/pain Content-type in the http request
|
||||
message_format: huggingface
|
||||
listeners:
|
||||
prompt_gateway:
|
||||
address: 0.0.0.0
|
||||
port: 10000
|
||||
message_format: openai
|
||||
timeout: 30s
|
||||
|
||||
# Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way
|
||||
llm_providers:
|
||||
|
|
@ -51,11 +52,6 @@ prompt_targets:
|
|||
default: false
|
||||
enum: [true, false]
|
||||
|
||||
error_target:
|
||||
endpoint:
|
||||
name: error_target_1
|
||||
path: /error
|
||||
|
||||
# Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem.
|
||||
endpoints:
|
||||
app_server:
|
||||
|
|
|
|||
|
|
@ -42,11 +42,12 @@ Create ``arch_config.yaml`` file with the following content:
|
|||
|
||||
version: v0.1
|
||||
|
||||
listener:
|
||||
address: 0.0.0.0
|
||||
port: 10000
|
||||
message_format: huggingface
|
||||
connect_timeout: 0.005s
|
||||
listeners:
|
||||
prompt_gateway:
|
||||
address: 0.0.0.0
|
||||
port: 10000
|
||||
message_format: openai
|
||||
timeout: 30s
|
||||
|
||||
llm_providers:
|
||||
- name: gpt-4o
|
||||
|
|
@ -144,22 +145,23 @@ Create ``arch_config.yaml`` file with the following content:
|
|||
|
||||
version: v0.1
|
||||
|
||||
listener:
|
||||
address: 0.0.0.0
|
||||
port: 10000
|
||||
message_format: huggingface
|
||||
connect_timeout: 0.005s
|
||||
listeners:
|
||||
prompt_gateway:
|
||||
address: 0.0.0.0
|
||||
port: 10000
|
||||
message_format: openai
|
||||
timeout: 30s
|
||||
|
||||
llm_providers:
|
||||
- name: gpt-4o
|
||||
access_key: $OPENAI_API_KEY
|
||||
provider: openai
|
||||
provider_interface: openai
|
||||
model: gpt-4o
|
||||
default: true
|
||||
|
||||
- name: ministral-3b
|
||||
access_key: $MISTRAL_API_KEY
|
||||
provider: mistral
|
||||
provider_interface: openai
|
||||
model: ministral-3b-latest
|
||||
|
||||
Step 2. Start arch gateway
|
||||
|
|
|
|||
|
|
@ -1,10 +1,11 @@
|
|||
version: v0.1
|
||||
|
||||
listener:
|
||||
address: 0.0.0.0 # or 127.0.0.1
|
||||
port: 10000
|
||||
# Defines how Arch should parse the content from application/json or text/pain Content-type in the http request
|
||||
message_format: huggingface
|
||||
listeners:
|
||||
prompt_gateway:
|
||||
address: 0.0.0.0
|
||||
port: 10000
|
||||
message_format: openai
|
||||
timeout: 30s
|
||||
|
||||
# Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way
|
||||
llm_providers:
|
||||
|
|
@ -53,11 +54,6 @@ prompt_targets:
|
|||
default: false
|
||||
enum: [true, false]
|
||||
|
||||
error_target:
|
||||
endpoint:
|
||||
name: error_target_1
|
||||
path: /error
|
||||
|
||||
# Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem.
|
||||
endpoints:
|
||||
app_server:
|
||||
|
|
|
|||
|
|
@ -33,14 +33,6 @@ llm_providers:
|
|||
access_key: $OPENAI_API_KEY
|
||||
model: gpt-4o
|
||||
default: true
|
||||
rate_limits:
|
||||
selector: #optional headers, to add rate limiting based on http headers like JWT tokens or API keys
|
||||
http_header:
|
||||
name: Authorization
|
||||
value: "" # Empty value means each separate value has a separate limit
|
||||
limit:
|
||||
tokens: 100000 # Tokens per unit
|
||||
unit: minute
|
||||
|
||||
- name: Mistral8x7b
|
||||
provider_interface: openai
|
||||
|
|
@ -96,11 +88,6 @@ prompt_targets:
|
|||
default: false
|
||||
enum: [true, false]
|
||||
|
||||
error_target:
|
||||
endpoint:
|
||||
name: error_target_1
|
||||
path: /error
|
||||
|
||||
tracing:
|
||||
# sampling rate. Note by default Arch works on OpenTelemetry compatible tracing.
|
||||
sampling_rate: 0.1
|
||||
|
|
|
|||
|
|
@ -1,10 +1,11 @@
|
|||
version: "0.1-beta"
|
||||
|
||||
listener:
|
||||
address: 0.0.0.0
|
||||
port: 10000
|
||||
message_format: huggingface
|
||||
connect_timeout: 0.005s
|
||||
listeners:
|
||||
prompt_gateway:
|
||||
address: 0.0.0.0
|
||||
port: 10000
|
||||
message_format: openai
|
||||
timeout: 30s
|
||||
|
||||
endpoints:
|
||||
weather_forecast_service:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue