mirror of
https://github.com/katanemo/plano.git
synced 2026-06-05 14:45:15 +02:00
Update arch_config and add tests for arch config file (#407)
This commit is contained in:
parent
d0a783cca8
commit
e40b13be05
31 changed files with 379 additions and 212 deletions
31
.github/workflows/validate_arch_config.yml
vendored
Normal file
31
.github/workflows/validate_arch_config.yml
vendored
Normal file
|
|
@ -0,0 +1,31 @@
|
|||
name: arch config tests
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
pull_request:
|
||||
|
||||
jobs:
|
||||
validate_arch_config:
|
||||
runs-on: ubuntu-latest
|
||||
defaults:
|
||||
run:
|
||||
working-directory: .
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: "3.12"
|
||||
|
||||
- name: build arch docker image
|
||||
run: |
|
||||
docker build -f arch/Dockerfile . -t katanemo/archgw
|
||||
|
||||
- name: validate arch config
|
||||
run: |
|
||||
bash arch/validate_arch_config.sh
|
||||
|
|
@ -3,21 +3,38 @@ type: object
|
|||
properties:
|
||||
version:
|
||||
type: string
|
||||
listener:
|
||||
listeners:
|
||||
type: object
|
||||
properties:
|
||||
address:
|
||||
type: string
|
||||
port:
|
||||
type: integer
|
||||
message_format:
|
||||
type: string
|
||||
connect_timeout:
|
||||
type: string
|
||||
additionalProperties: false
|
||||
required:
|
||||
- address
|
||||
- port
|
||||
properties:
|
||||
ingress_traffic:
|
||||
type: object
|
||||
properties:
|
||||
address:
|
||||
type: string
|
||||
port:
|
||||
type: integer
|
||||
message_format:
|
||||
type: string
|
||||
enum:
|
||||
- openai
|
||||
timeout:
|
||||
type: string
|
||||
additionalProperties: false
|
||||
egress_traffic:
|
||||
type: object
|
||||
properties:
|
||||
address:
|
||||
type: string
|
||||
port:
|
||||
type: integer
|
||||
message_format:
|
||||
type: string
|
||||
enum:
|
||||
- openai
|
||||
timeout:
|
||||
type: string
|
||||
additionalProperties: false
|
||||
endpoints:
|
||||
type: object
|
||||
patternProperties:
|
||||
|
|
@ -107,7 +124,10 @@ properties:
|
|||
required:
|
||||
type: boolean
|
||||
default:
|
||||
type: string
|
||||
anyOf:
|
||||
- type: string
|
||||
- type: integer
|
||||
- type: boolean
|
||||
description:
|
||||
type: string
|
||||
type:
|
||||
|
|
@ -115,7 +135,10 @@ properties:
|
|||
enum:
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
anyOf:
|
||||
- type: string
|
||||
- type: integer
|
||||
- type: boolean
|
||||
in_path:
|
||||
type: boolean
|
||||
format:
|
||||
|
|
@ -224,5 +247,4 @@ properties:
|
|||
additionalProperties: false
|
||||
required:
|
||||
- version
|
||||
- listener
|
||||
- llm_providers
|
||||
|
|
|
|||
|
|
@ -29,11 +29,11 @@ stats_config:
|
|||
- 180000
|
||||
static_resources:
|
||||
listeners:
|
||||
- name: arch_listener_http
|
||||
- name: ingress_traffic
|
||||
address:
|
||||
socket_address:
|
||||
address: 0.0.0.0
|
||||
port_value: 10000
|
||||
address: {{ prompt_gateway_listener.address }}
|
||||
port_value: {{ prompt_gateway_listener.port }}
|
||||
traffic_direction: INBOUND
|
||||
filter_chains:
|
||||
- filters:
|
||||
|
|
@ -55,7 +55,7 @@ static_resources:
|
|||
random_sampling:
|
||||
value: {{ arch_tracing.random_sampling }}
|
||||
{% endif %}
|
||||
stat_prefix: arch_listener_http
|
||||
stat_prefix: ingress_traffic
|
||||
codec_type: AUTO
|
||||
scheme_header_transformation:
|
||||
scheme_to_overwrite: https
|
||||
|
|
@ -76,13 +76,13 @@ static_resources:
|
|||
route:
|
||||
auto_host_rewrite: true
|
||||
cluster: arch_prompt_gateway_listener
|
||||
timeout: 60s
|
||||
timeout: {{ prompt_gateway_listener.timeout }}
|
||||
http_filters:
|
||||
- name: envoy.filters.http.router
|
||||
typed_config:
|
||||
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
|
||||
|
||||
- name: arch_prompt_gateway_listener
|
||||
- name: ingress_traffic_prompt
|
||||
address:
|
||||
socket_address:
|
||||
address: 0.0.0.0
|
||||
|
|
@ -104,11 +104,11 @@ static_resources:
|
|||
envoy_grpc:
|
||||
cluster_name: opentelemetry_collector
|
||||
timeout: 0.250s
|
||||
service_name: prompt_processor
|
||||
service_name: ingress_traffic
|
||||
random_sampling:
|
||||
value: {{ arch_tracing.random_sampling }}
|
||||
{% endif %}
|
||||
stat_prefix: arch_prompt_gateway_listener
|
||||
stat_prefix: ingress_traffic
|
||||
codec_type: AUTO
|
||||
scheme_header_transformation:
|
||||
scheme_to_overwrite: https
|
||||
|
|
@ -201,7 +201,7 @@ static_resources:
|
|||
typed_config:
|
||||
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
|
||||
|
||||
- name: arch_internal
|
||||
- name: egress_api_traffic
|
||||
address:
|
||||
socket_address:
|
||||
address: 0.0.0.0
|
||||
|
|
@ -223,11 +223,11 @@ static_resources:
|
|||
envoy_grpc:
|
||||
cluster_name: opentelemetry_collector
|
||||
timeout: 0.250s
|
||||
service_name: prompt_processor
|
||||
service_name: egress_api_traffic
|
||||
random_sampling:
|
||||
value: {{ arch_tracing.random_sampling }}
|
||||
{% endif %}
|
||||
stat_prefix: arch_internal
|
||||
stat_prefix: egress_api_traffic
|
||||
codec_type: AUTO
|
||||
scheme_header_transformation:
|
||||
scheme_to_overwrite: https
|
||||
|
|
@ -273,13 +273,12 @@ static_resources:
|
|||
typed_config:
|
||||
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
|
||||
|
||||
|
||||
- name: arch_listener_http_llm
|
||||
- name: egress_traffic
|
||||
address:
|
||||
socket_address:
|
||||
address: 0.0.0.0
|
||||
port_value: 12000
|
||||
traffic_direction: INBOUND
|
||||
address: {{ llm_gateway_listener.address }}
|
||||
port_value: {{ llm_gateway_listener.port }}
|
||||
traffic_direction: OUTBOUND
|
||||
filter_chains:
|
||||
- filters:
|
||||
- name: envoy.filters.network.http_connection_manager
|
||||
|
|
@ -300,7 +299,7 @@ static_resources:
|
|||
random_sampling:
|
||||
value: {{ arch_tracing.random_sampling }}
|
||||
{% endif %}
|
||||
stat_prefix: arch_listener_http
|
||||
stat_prefix: egress_traffic
|
||||
codec_type: AUTO
|
||||
scheme_header_transformation:
|
||||
scheme_to_overwrite: https
|
||||
|
|
@ -321,14 +320,13 @@ static_resources:
|
|||
route:
|
||||
auto_host_rewrite: true
|
||||
cluster: arch_listener_llm
|
||||
timeout: 60s
|
||||
timeout: {{ llm_gateway_listener.timeout }}
|
||||
http_filters:
|
||||
- name: envoy.filters.http.router
|
||||
typed_config:
|
||||
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
|
||||
|
||||
|
||||
- name: arch_listener_llm
|
||||
- name: egress_traffic_llm
|
||||
address:
|
||||
socket_address:
|
||||
address: 0.0.0.0
|
||||
|
|
@ -349,11 +347,11 @@ static_resources:
|
|||
envoy_grpc:
|
||||
cluster_name: opentelemetry_collector
|
||||
timeout: 0.250s
|
||||
service_name: llm_gateway
|
||||
service_name: egress_traffic_llm
|
||||
random_sampling:
|
||||
value: {{ arch_tracing.random_sampling }}
|
||||
{% endif %}
|
||||
stat_prefix: arch_listener_http
|
||||
stat_prefix: egress_traffic
|
||||
codec_type: AUTO
|
||||
scheme_header_transformation:
|
||||
scheme_to_overwrite: https
|
||||
|
|
@ -443,7 +441,7 @@ static_resources:
|
|||
|
||||
clusters:
|
||||
- name: openai
|
||||
connect_timeout: 5s
|
||||
connect_timeout: 0.5s
|
||||
type: LOGICAL_DNS
|
||||
dns_lookup_family: V4_ONLY
|
||||
lb_policy: ROUND_ROBIN
|
||||
|
|
@ -467,7 +465,7 @@ static_resources:
|
|||
tls_minimum_protocol_version: TLSv1_2
|
||||
tls_maximum_protocol_version: TLSv1_3
|
||||
- name: mistral
|
||||
connect_timeout: 5s
|
||||
connect_timeout: 0.5s
|
||||
type: LOGICAL_DNS
|
||||
dns_lookup_family: V4_ONLY
|
||||
lb_policy: ROUND_ROBIN
|
||||
|
|
@ -488,7 +486,7 @@ static_resources:
|
|||
sni: api.mistral.ai
|
||||
{% for internal_cluster in ["arch_fc", "model_server"] %}
|
||||
- name: {{ internal_cluster }}
|
||||
connect_timeout: 5s
|
||||
connect_timeout: 0.5s
|
||||
type: STRICT_DNS
|
||||
dns_lookup_family: V4_ONLY
|
||||
lb_policy: ROUND_ROBIN
|
||||
|
|
@ -504,7 +502,7 @@ static_resources:
|
|||
hostname: {{ internal_cluster }}
|
||||
{% endfor %}
|
||||
- name: mistral_7b_instruct
|
||||
connect_timeout: 5s
|
||||
connect_timeout: 0.5s
|
||||
type: STRICT_DNS
|
||||
dns_lookup_family: V4_ONLY
|
||||
lb_policy: ROUND_ROBIN
|
||||
|
|
@ -523,7 +521,7 @@ static_resources:
|
|||
{% if cluster.connect_timeout -%}
|
||||
connect_timeout: {{ cluster.connect_timeout }}
|
||||
{% else -%}
|
||||
connect_timeout: 5s
|
||||
connect_timeout: 0.5s
|
||||
{% endif -%}
|
||||
type: LOGICAL_DNS
|
||||
dns_lookup_family: V4_ONLY
|
||||
|
|
@ -557,7 +555,7 @@ static_resources:
|
|||
|
||||
{% for local_llm_provider in local_llms %}
|
||||
- name: {{ local_llm_provider.name }}
|
||||
connect_timeout: 5s
|
||||
connect_timeout: 0.5s
|
||||
type: LOGICAL_DNS
|
||||
dns_lookup_family: V4_ONLY
|
||||
lb_policy: ROUND_ROBIN
|
||||
|
|
@ -589,7 +587,7 @@ static_resources:
|
|||
|
||||
{% endfor %}
|
||||
- name: arch_internal
|
||||
connect_timeout: 5s
|
||||
connect_timeout: 0.5s
|
||||
type: LOGICAL_DNS
|
||||
dns_lookup_family: V4_ONLY
|
||||
lb_policy: ROUND_ROBIN
|
||||
|
|
@ -605,7 +603,7 @@ static_resources:
|
|||
hostname: arch_internal
|
||||
|
||||
- name: arch_prompt_gateway_listener
|
||||
connect_timeout: 5s
|
||||
connect_timeout: 0.5s
|
||||
type: LOGICAL_DNS
|
||||
dns_lookup_family: V4_ONLY
|
||||
lb_policy: ROUND_ROBIN
|
||||
|
|
@ -621,7 +619,7 @@ static_resources:
|
|||
hostname: arch_prompt_gateway_listener
|
||||
|
||||
- name: arch_listener_llm
|
||||
connect_timeout: 5s
|
||||
connect_timeout: 0.5s
|
||||
type: LOGICAL_DNS
|
||||
dns_lookup_family: V4_ONLY
|
||||
lb_policy: ROUND_ROBIN
|
||||
|
|
|
|||
|
|
@ -104,7 +104,27 @@ def validate_and_render_schema():
|
|||
arch_config_string = yaml.dump(config_yaml)
|
||||
arch_llm_config_string = yaml.dump(config_yaml)
|
||||
|
||||
prompt_gateway_listener = config_yaml.get("listeners", {}).get(
|
||||
"ingress_traffic", {}
|
||||
)
|
||||
if prompt_gateway_listener.get("port") == None:
|
||||
prompt_gateway_listener["port"] = 10000 # default port for prompt gateway
|
||||
if prompt_gateway_listener.get("address") == None:
|
||||
prompt_gateway_listener["address"] = "127.0.0.1"
|
||||
if prompt_gateway_listener.get("timeout") == None:
|
||||
prompt_gateway_listener["timeout"] = "10s"
|
||||
|
||||
llm_gateway_listener = config_yaml.get("listeners", {}).get("egress_traffic", {})
|
||||
if llm_gateway_listener.get("port") == None:
|
||||
llm_gateway_listener["port"] = 12000 # default port for llm gateway
|
||||
if llm_gateway_listener.get("address") == None:
|
||||
llm_gateway_listener["address"] = "127.0.0.1"
|
||||
if llm_gateway_listener.get("timeout") == None:
|
||||
llm_gateway_listener["timeout"] = "10s"
|
||||
|
||||
data = {
|
||||
"prompt_gateway_listener": prompt_gateway_listener,
|
||||
"llm_gateway_listener": llm_gateway_listener,
|
||||
"arch_config": arch_config_string,
|
||||
"arch_llm_config": arch_llm_config_string,
|
||||
"arch_clusters": inferred_clusters,
|
||||
|
|
|
|||
|
|
@ -2,6 +2,8 @@ import subprocess
|
|||
import os
|
||||
import time
|
||||
import sys
|
||||
|
||||
import yaml
|
||||
from cli.utils import getLogger
|
||||
from cli.consts import (
|
||||
ARCHGW_DOCKER_NAME,
|
||||
|
|
@ -22,6 +24,29 @@ from cli.docker_cli import (
|
|||
log = getLogger(__name__)
|
||||
|
||||
|
||||
def _get_gateway_ports(arch_config_file: str) -> tuple:
|
||||
PROMPT_GATEWAY_DEFAULT_PORT = 10000
|
||||
LLM_GATEWAY_DEFAULT_PORT = 12000
|
||||
|
||||
# parse arch_config_file yaml file and get prompt_gateway_port
|
||||
arch_config_dict = {}
|
||||
with open(arch_config_file) as f:
|
||||
arch_config_dict = yaml.safe_load(f)
|
||||
|
||||
prompt_gateway_port = (
|
||||
arch_config_dict.get("listeners", {})
|
||||
.get("ingress_traffic", {})
|
||||
.get("port", PROMPT_GATEWAY_DEFAULT_PORT)
|
||||
)
|
||||
llm_gateway_port = (
|
||||
arch_config_dict.get("listeners", {})
|
||||
.get("egress_traffic", {})
|
||||
.get("port", LLM_GATEWAY_DEFAULT_PORT)
|
||||
)
|
||||
|
||||
return prompt_gateway_port, llm_gateway_port
|
||||
|
||||
|
||||
def start_arch(arch_config_file, env, log_timeout=120, foreground=False):
|
||||
"""
|
||||
Start Docker Compose in detached mode and stream logs until services are healthy.
|
||||
|
|
@ -39,8 +64,14 @@ def start_arch(arch_config_file, env, log_timeout=120, foreground=False):
|
|||
docker_stop_container(ARCHGW_DOCKER_NAME)
|
||||
docker_remove_container(ARCHGW_DOCKER_NAME)
|
||||
|
||||
prompt_gateway_port, llm_gateway_port = _get_gateway_ports(arch_config_file)
|
||||
|
||||
return_code, _, archgw_stderr = docker_start_archgw_detached(
|
||||
arch_config_file, os.path.expanduser("~/archgw_logs"), env
|
||||
arch_config_file,
|
||||
os.path.expanduser("~/archgw_logs"),
|
||||
env,
|
||||
prompt_gateway_port,
|
||||
llm_gateway_port,
|
||||
)
|
||||
if return_code != 0:
|
||||
log.info("Failed to start arch gateway: " + str(return_code))
|
||||
|
|
@ -50,7 +81,7 @@ def start_arch(arch_config_file, env, log_timeout=120, foreground=False):
|
|||
start_time = time.time()
|
||||
while True:
|
||||
health_check_status = health_check_endpoint(
|
||||
"http://localhost:10000/healthz"
|
||||
f"http://localhost:{prompt_gateway_port}/healthz"
|
||||
)
|
||||
archgw_status = docker_container_status(ARCHGW_DOCKER_NAME)
|
||||
current_time = time.time()
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
import subprocess
|
||||
import json
|
||||
import sys
|
||||
import requests # Add this import
|
||||
import requests
|
||||
|
||||
from cli.consts import ARCHGW_DOCKER_IMAGE, ARCHGW_DOCKER_NAME
|
||||
from cli.utils import getLogger
|
||||
|
|
@ -33,11 +33,19 @@ def docker_remove_container(container: str) -> str:
|
|||
|
||||
|
||||
def docker_start_archgw_detached(
|
||||
arch_config_file: str, logs_path_abs: str, env: dict
|
||||
arch_config_file: str,
|
||||
logs_path_abs: str,
|
||||
env: dict,
|
||||
prompt_gateway_port,
|
||||
llm_gateway_port,
|
||||
) -> str:
|
||||
env_args = [item for key, value in env.items() for item in ["-e", f"{key}={value}"]]
|
||||
|
||||
port_mappings = ["10000:10000", "12000:12000", "9901:19901"]
|
||||
port_mappings = [
|
||||
f"{prompt_gateway_port}:{prompt_gateway_port}",
|
||||
f"{llm_gateway_port}:{llm_gateway_port}",
|
||||
"9901:19901",
|
||||
]
|
||||
port_mappings_args = [item for port in port_mappings for item in ("-p", port)]
|
||||
|
||||
volume_mappings = [
|
||||
|
|
|
|||
65
arch/tools/poetry.lock
generated
65
arch/tools/poetry.lock
generated
|
|
@ -318,6 +318,28 @@ files = [
|
|||
{file = "distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "docker"
|
||||
version = "7.1.0"
|
||||
description = "A Python library for the Docker Engine API."
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
files = [
|
||||
{file = "docker-7.1.0-py3-none-any.whl", hash = "sha256:c96b93b7f0a746f9e77d325bcfb87422a3d8bd4f03136ae8a85b37f1898d5fc0"},
|
||||
{file = "docker-7.1.0.tar.gz", hash = "sha256:ad8c70e6e3f8926cb8a92619b832b4ea5299e2831c14284663184e200546fa6c"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
pywin32 = {version = ">=304", markers = "sys_platform == \"win32\""}
|
||||
requests = ">=2.26.0"
|
||||
urllib3 = ">=1.26.0"
|
||||
|
||||
[package.extras]
|
||||
dev = ["coverage (==7.2.7)", "pytest (==7.4.2)", "pytest-cov (==4.1.0)", "pytest-timeout (==2.1.0)", "ruff (==0.1.8)"]
|
||||
docs = ["myst-parser (==0.18.0)", "sphinx (==5.1.1)"]
|
||||
ssh = ["paramiko (>=2.4.3)"]
|
||||
websockets = ["websocket-client (>=1.3.0)"]
|
||||
|
||||
[[package]]
|
||||
name = "exceptiongroup"
|
||||
version = "1.2.2"
|
||||
|
|
@ -1602,6 +1624,20 @@ files = [
|
|||
[package.dependencies]
|
||||
six = ">=1.5"
|
||||
|
||||
[[package]]
|
||||
name = "python-dotenv"
|
||||
version = "1.0.1"
|
||||
description = "Read key-value pairs from a .env file and set them as environment variables"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
files = [
|
||||
{file = "python-dotenv-1.0.1.tar.gz", hash = "sha256:e324ee90a023d808f1959c46bcbc04446a10ced277783dc6ee09987c37ec10ca"},
|
||||
{file = "python_dotenv-1.0.1-py3-none-any.whl", hash = "sha256:f7b63ef50f1b690dddf550d03497b66d609393b40b564ed0d674909a68ebf16a"},
|
||||
]
|
||||
|
||||
[package.extras]
|
||||
cli = ["click (>=5.0)"]
|
||||
|
||||
[[package]]
|
||||
name = "pytz"
|
||||
version = "2025.1"
|
||||
|
|
@ -1613,6 +1649,33 @@ files = [
|
|||
{file = "pytz-2025.1.tar.gz", hash = "sha256:c2db42be2a2518b28e65f9207c4d05e6ff547d1efa4086469ef855e4ab70178e"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pywin32"
|
||||
version = "308"
|
||||
description = "Python for Window Extensions"
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
files = [
|
||||
{file = "pywin32-308-cp310-cp310-win32.whl", hash = "sha256:796ff4426437896550d2981b9c2ac0ffd75238ad9ea2d3bfa67a1abd546d262e"},
|
||||
{file = "pywin32-308-cp310-cp310-win_amd64.whl", hash = "sha256:4fc888c59b3c0bef905ce7eb7e2106a07712015ea1c8234b703a088d46110e8e"},
|
||||
{file = "pywin32-308-cp310-cp310-win_arm64.whl", hash = "sha256:a5ab5381813b40f264fa3495b98af850098f814a25a63589a8e9eb12560f450c"},
|
||||
{file = "pywin32-308-cp311-cp311-win32.whl", hash = "sha256:5d8c8015b24a7d6855b1550d8e660d8daa09983c80e5daf89a273e5c6fb5095a"},
|
||||
{file = "pywin32-308-cp311-cp311-win_amd64.whl", hash = "sha256:575621b90f0dc2695fec346b2d6302faebd4f0f45c05ea29404cefe35d89442b"},
|
||||
{file = "pywin32-308-cp311-cp311-win_arm64.whl", hash = "sha256:100a5442b7332070983c4cd03f2e906a5648a5104b8a7f50175f7906efd16bb6"},
|
||||
{file = "pywin32-308-cp312-cp312-win32.whl", hash = "sha256:587f3e19696f4bf96fde9d8a57cec74a57021ad5f204c9e627e15c33ff568897"},
|
||||
{file = "pywin32-308-cp312-cp312-win_amd64.whl", hash = "sha256:00b3e11ef09ede56c6a43c71f2d31857cf7c54b0ab6e78ac659497abd2834f47"},
|
||||
{file = "pywin32-308-cp312-cp312-win_arm64.whl", hash = "sha256:9b4de86c8d909aed15b7011182c8cab38c8850de36e6afb1f0db22b8959e3091"},
|
||||
{file = "pywin32-308-cp313-cp313-win32.whl", hash = "sha256:1c44539a37a5b7b21d02ab34e6a4d314e0788f1690d65b48e9b0b89f31abbbed"},
|
||||
{file = "pywin32-308-cp313-cp313-win_amd64.whl", hash = "sha256:fd380990e792eaf6827fcb7e187b2b4b1cede0585e3d0c9e84201ec27b9905e4"},
|
||||
{file = "pywin32-308-cp313-cp313-win_arm64.whl", hash = "sha256:ef313c46d4c18dfb82a2431e3051ac8f112ccee1a34f29c263c583c568db63cd"},
|
||||
{file = "pywin32-308-cp37-cp37m-win32.whl", hash = "sha256:1f696ab352a2ddd63bd07430080dd598e6369152ea13a25ebcdd2f503a38f1ff"},
|
||||
{file = "pywin32-308-cp37-cp37m-win_amd64.whl", hash = "sha256:13dcb914ed4347019fbec6697a01a0aec61019c1046c2b905410d197856326a6"},
|
||||
{file = "pywin32-308-cp38-cp38-win32.whl", hash = "sha256:5794e764ebcabf4ff08c555b31bd348c9025929371763b2183172ff4708152f0"},
|
||||
{file = "pywin32-308-cp38-cp38-win_amd64.whl", hash = "sha256:3b92622e29d651c6b783e368ba7d6722b1634b8e70bd376fd7610fe1992e19de"},
|
||||
{file = "pywin32-308-cp39-cp39-win32.whl", hash = "sha256:7873ca4dc60ab3287919881a7d4f88baee4a6e639aa6962de25a98ba6b193341"},
|
||||
{file = "pywin32-308-cp39-cp39-win_amd64.whl", hash = "sha256:71b3322d949b4cc20776436a9c9ba0eeedcbc9c650daa536df63f0ff111bb920"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pyyaml"
|
||||
version = "6.0.2"
|
||||
|
|
@ -2481,4 +2544,4 @@ type = ["pytest-mypy"]
|
|||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = "^3.10"
|
||||
content-hash = "d4fb144073d4f8abcd8972892545d8ce47692d303ffa63dfe9b9bcdd0aea96f2"
|
||||
content-hash = "d02e43f0884294d48736e1b8df248f47af480baffcbb7a0194da4e16cc1ea502"
|
||||
|
|
|
|||
|
|
@ -15,6 +15,9 @@ click = "^8.1.7"
|
|||
jinja2 = "^3.1.4"
|
||||
jsonschema = "^4.23.0"
|
||||
setuptools = "75.5.0"
|
||||
docker = "^7.1.0"
|
||||
python-dotenv = "^1.0.1"
|
||||
pyyaml = "^6.0.2"
|
||||
|
||||
[tool.poetry.scripts]
|
||||
archgw = "cli.main:main"
|
||||
|
|
|
|||
20
arch/validate_arch_config.sh
Normal file
20
arch/validate_arch_config.sh
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
#!/bin/bash
|
||||
|
||||
failed_files=()
|
||||
|
||||
for file in $(find . -name arch_config.yaml -o -name arch_config_full_reference.yaml); do
|
||||
echo "Validating $file..."
|
||||
if ! docker run --rm -v "$(pwd)/$file:/app/arch_config.yaml:ro" --entrypoint /bin/sh katanemo/archgw:latest -c "python config_generator.py" 2>&1 > /dev/null ; then
|
||||
echo "Validation failed for $file"
|
||||
failed_files+=("$file")
|
||||
fi
|
||||
done
|
||||
|
||||
# Print summary of failed files
|
||||
if [ ${#failed_files[@]} -ne 0 ]; then
|
||||
echo -e "\nValidation failed for the following files:"
|
||||
printf '%s\n' "${failed_files[@]}"
|
||||
exit 1
|
||||
else
|
||||
echo -e "\nAll files validated successfully!"
|
||||
fi
|
||||
|
|
@ -9,7 +9,6 @@ use crate::api::open_ai::{
|
|||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Configuration {
|
||||
pub version: String,
|
||||
pub listener: Listener,
|
||||
pub endpoints: Option<HashMap<String, Endpoint>>,
|
||||
pub llm_providers: Vec<LlmProvider>,
|
||||
pub overrides: Option<Overrides>,
|
||||
|
|
@ -48,32 +47,6 @@ pub struct ErrorTargetDetail {
|
|||
pub endpoint: Option<EndpointDetails>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Listener {
|
||||
pub address: String,
|
||||
pub port: u16,
|
||||
pub message_format: MessageFormat,
|
||||
// pub connect_timeout: Option<DurationString>,
|
||||
}
|
||||
|
||||
impl Default for Listener {
|
||||
fn default() -> Self {
|
||||
Listener {
|
||||
address: "".to_string(),
|
||||
port: 0,
|
||||
message_format: MessageFormat::default(),
|
||||
// connect_timeout: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
|
||||
pub enum MessageFormat {
|
||||
#[serde(rename = "huggingface")]
|
||||
#[default]
|
||||
Huggingface,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
|
||||
pub struct PromptGuards {
|
||||
pub input_guards: HashMap<GuardType, GuardOptions>,
|
||||
|
|
@ -353,16 +326,6 @@ mod test {
|
|||
Some("/agent/summary".to_string())
|
||||
);
|
||||
|
||||
let error_target = config.error_target.as_ref().unwrap();
|
||||
assert_eq!(
|
||||
error_target.endpoint.as_ref().unwrap().name,
|
||||
"error_target_1".to_string()
|
||||
);
|
||||
assert_eq!(
|
||||
error_target.endpoint.as_ref().unwrap().path,
|
||||
Some("/error".to_string())
|
||||
);
|
||||
|
||||
let tracing = config.tracing.as_ref().unwrap();
|
||||
assert_eq!(tracing.sampling_rate.unwrap(), 0.1);
|
||||
|
||||
|
|
|
|||
|
|
@ -3,7 +3,10 @@ pub const SYSTEM_ROLE: &str = "system";
|
|||
pub const USER_ROLE: &str = "user";
|
||||
pub const TOOL_ROLE: &str = "tool";
|
||||
pub const ASSISTANT_ROLE: &str = "assistant";
|
||||
pub const ARCH_FC_REQUEST_TIMEOUT_MS: u64 = 120000; // 2 minutes
|
||||
pub const ARCH_FC_REQUEST_TIMEOUT_MS: u64 = 30000; // 30 seconds
|
||||
pub const DEFAULT_TARGET_REQUEST_TIMEOUT_MS: u64 = 30000; // 30 seconds
|
||||
pub const API_REQUEST_TIMEOUT_MS: u64 = 30000; // 30 seconds
|
||||
pub const MODEL_SERVER_REQUEST_TIMEOUT_MS: u64 = 30000; // 30 seconds
|
||||
pub const MODEL_SERVER_NAME: &str = "model_server";
|
||||
pub const ARCH_ROUTING_HEADER: &str = "x-arch-llm-provider";
|
||||
pub const MESSAGES_KEY: &str = "messages";
|
||||
|
|
|
|||
|
|
@ -166,7 +166,7 @@ impl TraceData {
|
|||
attributes: vec![Attribute {
|
||||
key: "service.name".to_string(),
|
||||
value: AttributeValue {
|
||||
string_value: Some("upstream-llm".to_string()),
|
||||
string_value: Some("egress_llm_traffic".to_string()),
|
||||
},
|
||||
}],
|
||||
};
|
||||
|
|
|
|||
|
|
@ -381,7 +381,7 @@ impl HttpContext for StreamContext {
|
|||
Ok(traceparent) => {
|
||||
let mut trace_data = common::tracing::TraceData::new();
|
||||
let mut llm_span = Span::new(
|
||||
"upstream_llm_time".to_string(),
|
||||
"egress_traffic".to_string(),
|
||||
Some(traceparent.trace_id),
|
||||
Some(traceparent.parent_id),
|
||||
self.request_body_sent_time.unwrap(),
|
||||
|
|
|
|||
|
|
@ -6,7 +6,8 @@ use common::{
|
|||
consts::{
|
||||
ARCH_FC_MODEL_NAME, ARCH_INTERNAL_CLUSTER_NAME, ARCH_STATE_HEADER,
|
||||
ARCH_UPSTREAM_HOST_HEADER, ASSISTANT_ROLE, CHAT_COMPLETIONS_PATH, HEALTHZ_PATH,
|
||||
MODEL_SERVER_NAME, REQUEST_ID_HEADER, TOOL_ROLE, TRACE_PARENT_HEADER, USER_ROLE,
|
||||
MODEL_SERVER_NAME, MODEL_SERVER_REQUEST_TIMEOUT_MS, REQUEST_ID_HEADER, TOOL_ROLE,
|
||||
TRACE_PARENT_HEADER, USER_ROLE,
|
||||
},
|
||||
errors::ServerError,
|
||||
http::{CallArgs, Client},
|
||||
|
|
@ -144,7 +145,10 @@ impl HttpContext for StreamContext {
|
|||
if metadata.is_none() {
|
||||
metadata = Some(HashMap::new());
|
||||
}
|
||||
metadata.as_mut().unwrap().insert("optimize_context_window".to_string(), "true".to_string());
|
||||
metadata
|
||||
.as_mut()
|
||||
.unwrap()
|
||||
.insert("optimize_context_window".to_string(), "true".to_string());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -170,12 +174,15 @@ impl HttpContext for StreamContext {
|
|||
debug!("sending request to model server");
|
||||
trace!("request body: {}", json_data);
|
||||
|
||||
let timeout_str = MODEL_SERVER_REQUEST_TIMEOUT_MS.to_string();
|
||||
|
||||
let mut headers = vec![
|
||||
(ARCH_UPSTREAM_HOST_HEADER, MODEL_SERVER_NAME),
|
||||
(":method", "POST"),
|
||||
(":path", "/function_calling"),
|
||||
("content-type", "application/json"),
|
||||
(":authority", MODEL_SERVER_NAME),
|
||||
("x-envoy-upstream-rq-timeout-ms", timeout_str.as_str()),
|
||||
];
|
||||
|
||||
if self.request_id.is_some() {
|
||||
|
|
|
|||
|
|
@ -6,9 +6,9 @@ use common::api::open_ai::{
|
|||
};
|
||||
use common::configuration::{Overrides, PromptTarget, Tracing};
|
||||
use common::consts::{
|
||||
ARCH_FC_MODEL_NAME, ARCH_FC_REQUEST_TIMEOUT_MS, ARCH_INTERNAL_CLUSTER_NAME,
|
||||
ARCH_UPSTREAM_HOST_HEADER, ASSISTANT_ROLE, MESSAGES_KEY, REQUEST_ID_HEADER, SYSTEM_ROLE,
|
||||
TOOL_ROLE, TRACE_PARENT_HEADER, USER_ROLE,
|
||||
API_REQUEST_TIMEOUT_MS, ARCH_FC_MODEL_NAME, ARCH_INTERNAL_CLUSTER_NAME,
|
||||
ARCH_UPSTREAM_HOST_HEADER, ASSISTANT_ROLE, DEFAULT_TARGET_REQUEST_TIMEOUT_MS, MESSAGES_KEY,
|
||||
REQUEST_ID_HEADER, SYSTEM_ROLE, TOOL_ROLE, TRACE_PARENT_HEADER, USER_ROLE,
|
||||
};
|
||||
use common::errors::ServerError;
|
||||
use common::http::{CallArgs, Client};
|
||||
|
|
@ -89,7 +89,7 @@ impl StreamContext {
|
|||
streaming_response: false,
|
||||
user_prompt: None,
|
||||
is_chat_completions_request: false,
|
||||
overrides: overrides,
|
||||
overrides,
|
||||
request_id: None,
|
||||
traceparent: None,
|
||||
_tracing: tracing,
|
||||
|
|
@ -160,7 +160,7 @@ impl StreamContext {
|
|||
callout_context.request_body.messages.clone(),
|
||||
);
|
||||
let arch_messages_json = serde_json::to_string(¶ms).unwrap();
|
||||
let timeout_str = ARCH_FC_REQUEST_TIMEOUT_MS.to_string();
|
||||
let timeout_str = DEFAULT_TARGET_REQUEST_TIMEOUT_MS.to_string();
|
||||
|
||||
let mut headers = vec![
|
||||
(":method", "POST"),
|
||||
|
|
@ -302,6 +302,8 @@ impl StreamContext {
|
|||
}
|
||||
};
|
||||
|
||||
let timeout_str = API_REQUEST_TIMEOUT_MS.to_string();
|
||||
|
||||
let http_method_str = http_method.to_string();
|
||||
let mut headers: HashMap<_, _> = [
|
||||
(ARCH_UPSTREAM_HOST_HEADER, endpoint_details.name.as_str()),
|
||||
|
|
@ -310,6 +312,7 @@ impl StreamContext {
|
|||
(":authority", endpoint_details.name.as_str()),
|
||||
("content-type", "application/json"),
|
||||
("x-envoy-max-retries", "3"),
|
||||
("x-envoy-upstream-rq-timeout-ms", timeout_str.as_str()),
|
||||
]
|
||||
.into_iter()
|
||||
.collect();
|
||||
|
|
|
|||
|
|
@ -81,10 +81,11 @@ fn normal_flow(module: &mut Tester, filter_context: i32, http_context: i32) {
|
|||
(":path", "/function_calling"),
|
||||
("content-type", "application/json"),
|
||||
(":authority", "model_server"),
|
||||
("x-envoy-upstream-rq-timeout-ms", "30000"),
|
||||
]),
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
Some(5000),
|
||||
)
|
||||
.returning(Some(1))
|
||||
.expect_log(Some(LogLevel::Trace), None)
|
||||
|
|
@ -387,10 +388,11 @@ fn prompt_gateway_request_to_llm_gateway() {
|
|||
(":authority", "api_server"),
|
||||
("x-envoy-max-retries", "3"),
|
||||
(":path", "/weather"),
|
||||
("x-envoy-upstream-rq-timeout-ms", "30000"),
|
||||
]),
|
||||
Some(expected_body),
|
||||
None,
|
||||
None,
|
||||
Some(5000),
|
||||
)
|
||||
.returning(Some(2))
|
||||
.expect_metric_increment("active_http_calls", 1)
|
||||
|
|
|
|||
|
|
@ -1,8 +1,10 @@
|
|||
version: v0.1
|
||||
listener:
|
||||
address: 127.0.0.1
|
||||
port: 10000 #If you configure port 443, you'll need to update the listener with tls_certificates
|
||||
message_format: huggingface
|
||||
listeners:
|
||||
ingress_traffic:
|
||||
address: 0.0.0.0
|
||||
port: 10000
|
||||
message_format: openai
|
||||
timeout: 30s
|
||||
|
||||
# Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way
|
||||
llm_providers:
|
||||
|
|
|
|||
|
|
@ -1,10 +1,11 @@
|
|||
version: v0.1
|
||||
|
||||
listener:
|
||||
address: 0.0.0.0
|
||||
port: 10000
|
||||
message_format: huggingface
|
||||
connect_timeout: 0.005s
|
||||
listeners:
|
||||
ingress_traffic:
|
||||
address: 0.0.0.0
|
||||
port: 10000
|
||||
message_format: openai
|
||||
timeout: 30s
|
||||
|
||||
llm_providers:
|
||||
- name: gpt-4o
|
||||
|
|
|
|||
|
|
@ -1,8 +1,10 @@
|
|||
version: v0.1
|
||||
listener:
|
||||
address: 127.0.0.1
|
||||
port: 8080 #If you configure port 443, you'll need to update the listener with tls_certificates
|
||||
message_format: huggingface
|
||||
listeners:
|
||||
ingress_traffic:
|
||||
address: 0.0.0.0
|
||||
port: 10000
|
||||
message_format: openai
|
||||
timeout: 30s
|
||||
|
||||
# Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way
|
||||
llm_providers:
|
||||
|
|
|
|||
|
|
@ -1,10 +1,11 @@
|
|||
version: v0.1
|
||||
|
||||
listener:
|
||||
address: 127.0.0.1
|
||||
port: 10000
|
||||
message_format: huggingface
|
||||
connect_timeout: 0.005s
|
||||
listeners:
|
||||
ingress_traffic:
|
||||
address: 0.0.0.0
|
||||
port: 10000
|
||||
message_format: openai
|
||||
timeout: 30s
|
||||
|
||||
endpoints:
|
||||
rag_energy_source_agent:
|
||||
|
|
|
|||
|
|
@ -1,8 +1,10 @@
|
|||
version: v0.1
|
||||
listener:
|
||||
address: 127.0.0.1
|
||||
port: 8080 #If you configure port 443, you'll need to update the listener with tls_certificates
|
||||
message_format: huggingface
|
||||
listeners:
|
||||
ingress_traffic:
|
||||
address: 0.0.0.0
|
||||
port: 10000
|
||||
message_format: openai
|
||||
timeout: 30s
|
||||
|
||||
# Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way
|
||||
llm_providers:
|
||||
|
|
|
|||
|
|
@ -1,10 +1,11 @@
|
|||
version: v0.1
|
||||
|
||||
listener:
|
||||
address: 0.0.0.0
|
||||
port: 10000
|
||||
message_format: huggingface
|
||||
connect_timeout: 0.005s
|
||||
listeners:
|
||||
ingress_traffic:
|
||||
address: 0.0.0.0
|
||||
port: 10000
|
||||
message_format: openai
|
||||
timeout: 30s
|
||||
|
||||
llm_providers:
|
||||
- name: gpt-4o
|
||||
|
|
|
|||
|
|
@ -1,10 +1,11 @@
|
|||
version: "0.1-beta"
|
||||
|
||||
listener:
|
||||
address: 0.0.0.0
|
||||
port: 10000
|
||||
message_format: huggingface
|
||||
connect_timeout: 0.005s
|
||||
listeners:
|
||||
ingress_traffic:
|
||||
address: 0.0.0.0
|
||||
port: 10000
|
||||
message_format: openai
|
||||
timeout: 30s
|
||||
|
||||
endpoints:
|
||||
weather_forecast_service:
|
||||
|
|
|
|||
|
|
@ -1,10 +1,11 @@
|
|||
version: "0.1-beta"
|
||||
|
||||
listener:
|
||||
address: 0.0.0.0
|
||||
port: 10000
|
||||
message_format: huggingface
|
||||
connect_timeout: 0.005s
|
||||
listeners:
|
||||
egress_traffic:
|
||||
address: 0.0.0.0
|
||||
port: 12000
|
||||
message_format: openai
|
||||
timeout: 30s
|
||||
|
||||
llm_providers:
|
||||
- name: gpt-4o-mini
|
||||
|
|
|
|||
|
|
@ -1,10 +1,11 @@
|
|||
version: v0.1
|
||||
|
||||
listener:
|
||||
address: 0.0.0.0
|
||||
port: 10000
|
||||
message_format: huggingface
|
||||
connect_timeout: 0.005s
|
||||
listeners:
|
||||
ingress_traffic:
|
||||
address: 0.0.0.0
|
||||
port: 10000
|
||||
message_format: openai
|
||||
timeout: 30s
|
||||
|
||||
llm_providers:
|
||||
|
||||
|
|
|
|||
|
|
@ -1,8 +1,10 @@
|
|||
version: v0.1
|
||||
listener:
|
||||
address: 127.0.0.1
|
||||
port: 8080 #If you configure port 443, you'll need to update the listener with tls_certificates
|
||||
message_format: huggingface
|
||||
listeners:
|
||||
ingress_traffic:
|
||||
address: 0.0.0.0
|
||||
port: 10000
|
||||
message_format: openai
|
||||
timeout: 30s
|
||||
|
||||
overrides:
|
||||
optimize_context_window: true
|
||||
|
|
|
|||
|
|
@ -1,10 +1,11 @@
|
|||
version: v0.1
|
||||
|
||||
listener:
|
||||
address: 0.0.0.0 # or 127.0.0.1
|
||||
port: 10000
|
||||
# Defines how Arch should parse the content from application/json or text/pain Content-type in the http request
|
||||
message_format: huggingface
|
||||
listeners:
|
||||
ingress_traffic:
|
||||
address: 0.0.0.0
|
||||
port: 10000
|
||||
message_format: openai
|
||||
timeout: 30s
|
||||
|
||||
# Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way
|
||||
llm_providers:
|
||||
|
|
@ -13,7 +14,6 @@ llm_providers:
|
|||
access_key: $OPENAI_API_KEY
|
||||
model: gpt-4o
|
||||
default: true
|
||||
stream: true
|
||||
|
||||
# default system prompt used by all prompt targets
|
||||
system_prompt: You are a network assistant that just offers facts; not advice on manufacturers or purchasing decisions.
|
||||
|
|
@ -52,11 +52,6 @@ prompt_targets:
|
|||
default: false
|
||||
enum: [true, false]
|
||||
|
||||
error_target:
|
||||
endpoint:
|
||||
name: error_target_1
|
||||
path: /error
|
||||
|
||||
# Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem.
|
||||
endpoints:
|
||||
app_server:
|
||||
|
|
|
|||
|
|
@ -42,11 +42,12 @@ Create ``arch_config.yaml`` file with the following content:
|
|||
|
||||
version: v0.1
|
||||
|
||||
listener:
|
||||
address: 0.0.0.0
|
||||
port: 10000
|
||||
message_format: huggingface
|
||||
connect_timeout: 0.005s
|
||||
listeners:
|
||||
ingress_traffic:
|
||||
address: 0.0.0.0
|
||||
port: 10000
|
||||
message_format: openai
|
||||
timeout: 30s
|
||||
|
||||
llm_providers:
|
||||
- name: gpt-4o
|
||||
|
|
@ -144,22 +145,23 @@ Create ``arch_config.yaml`` file with the following content:
|
|||
|
||||
version: v0.1
|
||||
|
||||
listener:
|
||||
address: 0.0.0.0
|
||||
port: 10000
|
||||
message_format: huggingface
|
||||
connect_timeout: 0.005s
|
||||
listeners:
|
||||
egress_traffic:
|
||||
address: 0.0.0.0
|
||||
port: 12000
|
||||
message_format: openai
|
||||
timeout: 30s
|
||||
|
||||
llm_providers:
|
||||
- name: gpt-4o
|
||||
access_key: $OPENAI_API_KEY
|
||||
provider: openai
|
||||
provider_interface: openai
|
||||
model: gpt-4o
|
||||
default: true
|
||||
|
||||
- name: ministral-3b
|
||||
access_key: $MISTRAL_API_KEY
|
||||
provider: mistral
|
||||
provider_interface: openai
|
||||
model: ministral-3b-latest
|
||||
|
||||
Step 2. Start arch gateway
|
||||
|
|
|
|||
|
|
@ -1,10 +1,11 @@
|
|||
version: v0.1
|
||||
|
||||
listener:
|
||||
address: 0.0.0.0 # or 127.0.0.1
|
||||
port: 10000
|
||||
# Defines how Arch should parse the content from application/json or text/pain Content-type in the http request
|
||||
message_format: huggingface
|
||||
listeners:
|
||||
ingress_traffic:
|
||||
address: 0.0.0.0
|
||||
port: 10000
|
||||
message_format: openai
|
||||
timeout: 30s
|
||||
|
||||
# Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way
|
||||
llm_providers:
|
||||
|
|
@ -13,7 +14,6 @@ llm_providers:
|
|||
access_key: $OPENAI_API_KEY
|
||||
model: gpt-4o
|
||||
default: true
|
||||
stream: true
|
||||
|
||||
# default system prompt used by all prompt targets
|
||||
system_prompt: You are a network assistant that just offers facts; not advice on manufacturers or purchasing decisions.
|
||||
|
|
@ -54,11 +54,6 @@ prompt_targets:
|
|||
default: false
|
||||
enum: [true, false]
|
||||
|
||||
error_target:
|
||||
endpoint:
|
||||
name: error_target_1
|
||||
path: /error
|
||||
|
||||
# Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem.
|
||||
endpoints:
|
||||
app_server:
|
||||
|
|
|
|||
|
|
@ -1,16 +1,16 @@
|
|||
version: v0.1
|
||||
|
||||
listener:
|
||||
address: 0.0.0.0 # or 127.0.0.1
|
||||
port: 10000
|
||||
# Defines how Arch should parse the content from application/json or text/pain Content-type in the http request
|
||||
message_format: huggingface
|
||||
common_tls_context: # If you configure port 443, you'll need to update the listener with your TLS certificates
|
||||
tls_certificates:
|
||||
- certificate_chain:
|
||||
filename: /etc/certs/cert.pem
|
||||
private_key:
|
||||
filename: /etc/certs/key.pem
|
||||
listeners:
|
||||
ingress_traffic:
|
||||
address: 0.0.0.0
|
||||
port: 10000
|
||||
message_format: openai
|
||||
timeout: 5s
|
||||
egress_traffic:
|
||||
address: 0.0.0.0
|
||||
port: 12000
|
||||
message_format: openai
|
||||
timeout: 5s
|
||||
|
||||
# Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem.
|
||||
endpoints:
|
||||
|
|
@ -35,15 +35,6 @@ llm_providers:
|
|||
access_key: $OPENAI_API_KEY
|
||||
model: gpt-4o
|
||||
default: true
|
||||
stream: true
|
||||
rate_limits:
|
||||
selector: #optional headers, to add rate limiting based on http headers like JWT tokens or API keys
|
||||
http_header:
|
||||
name: Authorization
|
||||
value: "" # Empty value means each separate value has a separate limit
|
||||
limit:
|
||||
tokens: 100000 # Tokens per unit
|
||||
unit: minute
|
||||
|
||||
- name: Mistral8x7b
|
||||
provider_interface: openai
|
||||
|
|
@ -99,11 +90,6 @@ prompt_targets:
|
|||
default: false
|
||||
enum: [true, false]
|
||||
|
||||
error_target:
|
||||
endpoint:
|
||||
name: error_target_1
|
||||
path: /error
|
||||
|
||||
tracing:
|
||||
# sampling rate. Note by default Arch works on OpenTelemetry compatible tracing.
|
||||
sampling_rate: 0.1
|
||||
|
|
|
|||
|
|
@ -1,10 +1,11 @@
|
|||
version: "0.1-beta"
|
||||
|
||||
listener:
|
||||
address: 0.0.0.0
|
||||
port: 10000
|
||||
message_format: huggingface
|
||||
connect_timeout: 0.005s
|
||||
listeners:
|
||||
ingress_traffic:
|
||||
address: 0.0.0.0
|
||||
port: 10000
|
||||
message_format: openai
|
||||
timeout: 30s
|
||||
|
||||
endpoints:
|
||||
weather_forecast_service:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue