fix tests

This commit is contained in:
Adil Hafeez 2025-02-12 17:44:51 -08:00
parent bc329a4421
commit 57ffaf7431
No known key found for this signature in database
GPG key ID: 9B18EF7691369645
15 changed files with 94 additions and 94 deletions

View file

@ -124,7 +124,10 @@ properties:
required:
type: boolean
default:
type: string
anyOf:
- type: string
- type: integer
- type: boolean
description:
type: string
type:
@ -132,7 +135,10 @@ properties:
enum:
type: array
items:
type: string
anyOf:
- type: string
- type: integer
- type: boolean
in_path:
type: boolean
format:

View file

@ -1,8 +1,10 @@
version: v0.1
listener:
address: 127.0.0.1
port: 10000 #If you configure port 443, you'll need to update the listener with tls_certificates
message_format: huggingface
listeners:
prompt_gateway:
address: 0.0.0.0
port: 10000
message_format: openai
timeout: 30s
# Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way
llm_providers:

View file

@ -1,8 +1,10 @@
version: v0.1
listener:
address: 127.0.0.1
port: 8080 #If you configure port 443, you'll need to update the listener with tls_certificates
message_format: huggingface
listeners:
prompt_gateway:
address: 0.0.0.0
port: 10000
message_format: openai
timeout: 30s
# Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way
llm_providers:

View file

@ -1,10 +1,11 @@
version: v0.1
listener:
address: 127.0.0.1
port: 10000
message_format: huggingface
connect_timeout: 0.005s
listeners:
prompt_gateway:
address: 0.0.0.0
port: 10000
message_format: openai
timeout: 30s
endpoints:
rag_energy_source_agent:

View file

@ -1,8 +1,10 @@
version: v0.1
listener:
address: 127.0.0.1
port: 8080 #If you configure port 443, you'll need to update the listener with tls_certificates
message_format: huggingface
listeners:
prompt_gateway:
address: 0.0.0.0
port: 10000
message_format: openai
timeout: 30s
# Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way
llm_providers:

View file

@ -1,11 +1,11 @@
version: v0.1
listener:
address: 0.0.0.0
port: 10000
message_format: huggingface
connect_timeout: 0.005s
listeners:
prompt_gateway:
address: 0.0.0.0
port: 10000
message_format: openai
timeout: 30s
llm_providers:
- name: gpt-4o
access_key: $OPENAI_API_KEY

View file

@ -1,10 +1,11 @@
version: "0.1-beta"
listener:
address: 0.0.0.0
port: 10000
message_format: huggingface
connect_timeout: 0.005s
listeners:
prompt_gateway:
address: 0.0.0.0
port: 10000
message_format: openai
timeout: 30s
endpoints:
weather_forecast_service:

View file

@ -1,10 +1,11 @@
version: "0.1-beta"
listener:
address: 0.0.0.0
port: 10000
message_format: huggingface
connect_timeout: 0.005s
listeners:
prompt_gateway:
address: 0.0.0.0
port: 10000
message_format: openai
timeout: 30s
llm_providers:
- name: gpt-4o-mini

View file

@ -1,10 +1,11 @@
version: v0.1
listener:
address: 0.0.0.0
port: 10000
message_format: huggingface
connect_timeout: 0.005s
listeners:
prompt_gateway:
address: 0.0.0.0
port: 10000
message_format: openai
timeout: 30s
llm_providers:

View file

@ -1,8 +1,10 @@
version: v0.1
listener:
address: 127.0.0.1
port: 8080 #If you configure port 443, you'll need to update the listener with tls_certificates
message_format: huggingface
listeners:
prompt_gateway:
address: 0.0.0.0
port: 10000
message_format: openai
timeout: 30s
overrides:
optimize_context_window: true

View file

@ -1,10 +1,11 @@
version: v0.1
listener:
address: 0.0.0.0 # or 127.0.0.1
port: 10000
# Defines how Arch should parse the content from application/json or text/pain Content-type in the http request
message_format: huggingface
listeners:
prompt_gateway:
address: 0.0.0.0
port: 10000
message_format: openai
timeout: 30s
# Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way
llm_providers:
@ -51,11 +52,6 @@ prompt_targets:
default: false
enum: [true, false]
error_target:
endpoint:
name: error_target_1
path: /error
# Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem.
endpoints:
app_server:

View file

@ -42,11 +42,12 @@ Create ``arch_config.yaml`` file with the following content:
version: v0.1
listener:
address: 0.0.0.0
port: 10000
message_format: huggingface
connect_timeout: 0.005s
listeners:
prompt_gateway:
address: 0.0.0.0
port: 10000
message_format: openai
timeout: 30s
llm_providers:
- name: gpt-4o
@ -144,22 +145,23 @@ Create ``arch_config.yaml`` file with the following content:
version: v0.1
listener:
address: 0.0.0.0
port: 10000
message_format: huggingface
connect_timeout: 0.005s
listeners:
prompt_gateway:
address: 0.0.0.0
port: 10000
message_format: openai
timeout: 30s
llm_providers:
- name: gpt-4o
access_key: $OPENAI_API_KEY
provider: openai
provider_interface: openai
model: gpt-4o
default: true
- name: ministral-3b
access_key: $MISTRAL_API_KEY
provider: mistral
provider_interface: openai
model: ministral-3b-latest
Step 2. Start arch gateway

View file

@ -1,10 +1,11 @@
version: v0.1
listener:
address: 0.0.0.0 # or 127.0.0.1
port: 10000
# Defines how Arch should parse the content from application/json or text/pain Content-type in the http request
message_format: huggingface
listeners:
prompt_gateway:
address: 0.0.0.0
port: 10000
message_format: openai
timeout: 30s
# Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way
llm_providers:
@ -53,11 +54,6 @@ prompt_targets:
default: false
enum: [true, false]
error_target:
endpoint:
name: error_target_1
path: /error
# Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem.
endpoints:
app_server:

View file

@ -33,14 +33,6 @@ llm_providers:
access_key: $OPENAI_API_KEY
model: gpt-4o
default: true
rate_limits:
selector: #optional headers, to add rate limiting based on http headers like JWT tokens or API keys
http_header:
name: Authorization
value: "" # Empty value means each separate value has a separate limit
limit:
tokens: 100000 # Tokens per unit
unit: minute
- name: Mistral8x7b
provider_interface: openai
@ -96,11 +88,6 @@ prompt_targets:
default: false
enum: [true, false]
error_target:
endpoint:
name: error_target_1
path: /error
tracing:
# sampling rate. Note by default Arch works on OpenTelemetry compatible tracing.
sampling_rate: 0.1

View file

@ -1,10 +1,11 @@
version: "0.1-beta"
listener:
address: 0.0.0.0
port: 10000
message_format: huggingface
connect_timeout: 0.005s
listeners:
prompt_gateway:
address: 0.0.0.0
port: 10000
message_format: openai
timeout: 30s
endpoints:
weather_forecast_service: