Update arch_config and add tests for arch config file (#407)

This commit is contained in:
Adil Hafeez 2025-02-14 19:28:10 -08:00 committed by GitHub
parent d0a783cca8
commit e40b13be05
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
31 changed files with 379 additions and 212 deletions

View file

@ -3,21 +3,38 @@ type: object
properties:
version:
type: string
listener:
listeners:
type: object
properties:
address:
type: string
port:
type: integer
message_format:
type: string
connect_timeout:
type: string
additionalProperties: false
required:
- address
- port
properties:
ingress_traffic:
type: object
properties:
address:
type: string
port:
type: integer
message_format:
type: string
enum:
- openai
timeout:
type: string
additionalProperties: false
egress_traffic:
type: object
properties:
address:
type: string
port:
type: integer
message_format:
type: string
enum:
- openai
timeout:
type: string
additionalProperties: false
endpoints:
type: object
patternProperties:
@ -107,7 +124,10 @@ properties:
required:
type: boolean
default:
type: string
anyOf:
- type: string
- type: integer
- type: boolean
description:
type: string
type:
@ -115,7 +135,10 @@ properties:
enum:
type: array
items:
type: string
anyOf:
- type: string
- type: integer
- type: boolean
in_path:
type: boolean
format:
@ -224,5 +247,4 @@ properties:
additionalProperties: false
required:
- version
- listener
- llm_providers

View file

@ -29,11 +29,11 @@ stats_config:
- 180000
static_resources:
listeners:
- name: arch_listener_http
- name: ingress_traffic
address:
socket_address:
address: 0.0.0.0
port_value: 10000
address: {{ prompt_gateway_listener.address }}
port_value: {{ prompt_gateway_listener.port }}
traffic_direction: INBOUND
filter_chains:
- filters:
@ -55,7 +55,7 @@ static_resources:
random_sampling:
value: {{ arch_tracing.random_sampling }}
{% endif %}
stat_prefix: arch_listener_http
stat_prefix: ingress_traffic
codec_type: AUTO
scheme_header_transformation:
scheme_to_overwrite: https
@ -76,13 +76,13 @@ static_resources:
route:
auto_host_rewrite: true
cluster: arch_prompt_gateway_listener
timeout: 60s
timeout: {{ prompt_gateway_listener.timeout }}
http_filters:
- name: envoy.filters.http.router
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
- name: arch_prompt_gateway_listener
- name: ingress_traffic_prompt
address:
socket_address:
address: 0.0.0.0
@ -104,11 +104,11 @@ static_resources:
envoy_grpc:
cluster_name: opentelemetry_collector
timeout: 0.250s
service_name: prompt_processor
service_name: ingress_traffic
random_sampling:
value: {{ arch_tracing.random_sampling }}
{% endif %}
stat_prefix: arch_prompt_gateway_listener
stat_prefix: ingress_traffic
codec_type: AUTO
scheme_header_transformation:
scheme_to_overwrite: https
@ -201,7 +201,7 @@ static_resources:
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
- name: arch_internal
- name: egress_api_traffic
address:
socket_address:
address: 0.0.0.0
@ -223,11 +223,11 @@ static_resources:
envoy_grpc:
cluster_name: opentelemetry_collector
timeout: 0.250s
service_name: prompt_processor
service_name: egress_api_traffic
random_sampling:
value: {{ arch_tracing.random_sampling }}
{% endif %}
stat_prefix: arch_internal
stat_prefix: egress_api_traffic
codec_type: AUTO
scheme_header_transformation:
scheme_to_overwrite: https
@ -273,13 +273,12 @@ static_resources:
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
- name: arch_listener_http_llm
- name: egress_traffic
address:
socket_address:
address: 0.0.0.0
port_value: 12000
traffic_direction: INBOUND
address: {{ llm_gateway_listener.address }}
port_value: {{ llm_gateway_listener.port }}
traffic_direction: OUTBOUND
filter_chains:
- filters:
- name: envoy.filters.network.http_connection_manager
@ -300,7 +299,7 @@ static_resources:
random_sampling:
value: {{ arch_tracing.random_sampling }}
{% endif %}
stat_prefix: arch_listener_http
stat_prefix: egress_traffic
codec_type: AUTO
scheme_header_transformation:
scheme_to_overwrite: https
@ -321,14 +320,13 @@ static_resources:
route:
auto_host_rewrite: true
cluster: arch_listener_llm
timeout: 60s
timeout: {{ llm_gateway_listener.timeout }}
http_filters:
- name: envoy.filters.http.router
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
- name: arch_listener_llm
- name: egress_traffic_llm
address:
socket_address:
address: 0.0.0.0
@ -349,11 +347,11 @@ static_resources:
envoy_grpc:
cluster_name: opentelemetry_collector
timeout: 0.250s
service_name: llm_gateway
service_name: egress_traffic_llm
random_sampling:
value: {{ arch_tracing.random_sampling }}
{% endif %}
stat_prefix: arch_listener_http
stat_prefix: egress_traffic
codec_type: AUTO
scheme_header_transformation:
scheme_to_overwrite: https
@ -443,7 +441,7 @@ static_resources:
clusters:
- name: openai
connect_timeout: 5s
connect_timeout: 0.5s
type: LOGICAL_DNS
dns_lookup_family: V4_ONLY
lb_policy: ROUND_ROBIN
@ -467,7 +465,7 @@ static_resources:
tls_minimum_protocol_version: TLSv1_2
tls_maximum_protocol_version: TLSv1_3
- name: mistral
connect_timeout: 5s
connect_timeout: 0.5s
type: LOGICAL_DNS
dns_lookup_family: V4_ONLY
lb_policy: ROUND_ROBIN
@ -488,7 +486,7 @@ static_resources:
sni: api.mistral.ai
{% for internal_cluster in ["arch_fc", "model_server"] %}
- name: {{ internal_cluster }}
connect_timeout: 5s
connect_timeout: 0.5s
type: STRICT_DNS
dns_lookup_family: V4_ONLY
lb_policy: ROUND_ROBIN
@ -504,7 +502,7 @@ static_resources:
hostname: {{ internal_cluster }}
{% endfor %}
- name: mistral_7b_instruct
connect_timeout: 5s
connect_timeout: 0.5s
type: STRICT_DNS
dns_lookup_family: V4_ONLY
lb_policy: ROUND_ROBIN
@ -523,7 +521,7 @@ static_resources:
{% if cluster.connect_timeout -%}
connect_timeout: {{ cluster.connect_timeout }}
{% else -%}
connect_timeout: 5s
connect_timeout: 0.5s
{% endif -%}
type: LOGICAL_DNS
dns_lookup_family: V4_ONLY
@ -557,7 +555,7 @@ static_resources:
{% for local_llm_provider in local_llms %}
- name: {{ local_llm_provider.name }}
connect_timeout: 5s
connect_timeout: 0.5s
type: LOGICAL_DNS
dns_lookup_family: V4_ONLY
lb_policy: ROUND_ROBIN
@ -589,7 +587,7 @@ static_resources:
{% endfor %}
- name: arch_internal
connect_timeout: 5s
connect_timeout: 0.5s
type: LOGICAL_DNS
dns_lookup_family: V4_ONLY
lb_policy: ROUND_ROBIN
@ -605,7 +603,7 @@ static_resources:
hostname: arch_internal
- name: arch_prompt_gateway_listener
connect_timeout: 5s
connect_timeout: 0.5s
type: LOGICAL_DNS
dns_lookup_family: V4_ONLY
lb_policy: ROUND_ROBIN
@ -621,7 +619,7 @@ static_resources:
hostname: arch_prompt_gateway_listener
- name: arch_listener_llm
connect_timeout: 5s
connect_timeout: 0.5s
type: LOGICAL_DNS
dns_lookup_family: V4_ONLY
lb_policy: ROUND_ROBIN

View file

@ -104,7 +104,27 @@ def validate_and_render_schema():
arch_config_string = yaml.dump(config_yaml)
arch_llm_config_string = yaml.dump(config_yaml)
prompt_gateway_listener = config_yaml.get("listeners", {}).get(
"ingress_traffic", {}
)
if prompt_gateway_listener.get("port") == None:
prompt_gateway_listener["port"] = 10000 # default port for prompt gateway
if prompt_gateway_listener.get("address") == None:
prompt_gateway_listener["address"] = "127.0.0.1"
if prompt_gateway_listener.get("timeout") == None:
prompt_gateway_listener["timeout"] = "10s"
llm_gateway_listener = config_yaml.get("listeners", {}).get("egress_traffic", {})
if llm_gateway_listener.get("port") == None:
llm_gateway_listener["port"] = 12000 # default port for llm gateway
if llm_gateway_listener.get("address") == None:
llm_gateway_listener["address"] = "127.0.0.1"
if llm_gateway_listener.get("timeout") == None:
llm_gateway_listener["timeout"] = "10s"
data = {
"prompt_gateway_listener": prompt_gateway_listener,
"llm_gateway_listener": llm_gateway_listener,
"arch_config": arch_config_string,
"arch_llm_config": arch_llm_config_string,
"arch_clusters": inferred_clusters,

View file

@ -2,6 +2,8 @@ import subprocess
import os
import time
import sys
import yaml
from cli.utils import getLogger
from cli.consts import (
ARCHGW_DOCKER_NAME,
@ -22,6 +24,29 @@ from cli.docker_cli import (
log = getLogger(__name__)
def _get_gateway_ports(arch_config_file: str) -> tuple:
PROMPT_GATEWAY_DEFAULT_PORT = 10000
LLM_GATEWAY_DEFAULT_PORT = 12000
# parse arch_config_file yaml file and get prompt_gateway_port
arch_config_dict = {}
with open(arch_config_file) as f:
arch_config_dict = yaml.safe_load(f)
prompt_gateway_port = (
arch_config_dict.get("listeners", {})
.get("ingress_traffic", {})
.get("port", PROMPT_GATEWAY_DEFAULT_PORT)
)
llm_gateway_port = (
arch_config_dict.get("listeners", {})
.get("egress_traffic", {})
.get("port", LLM_GATEWAY_DEFAULT_PORT)
)
return prompt_gateway_port, llm_gateway_port
def start_arch(arch_config_file, env, log_timeout=120, foreground=False):
"""
Start Docker Compose in detached mode and stream logs until services are healthy.
@ -39,8 +64,14 @@ def start_arch(arch_config_file, env, log_timeout=120, foreground=False):
docker_stop_container(ARCHGW_DOCKER_NAME)
docker_remove_container(ARCHGW_DOCKER_NAME)
prompt_gateway_port, llm_gateway_port = _get_gateway_ports(arch_config_file)
return_code, _, archgw_stderr = docker_start_archgw_detached(
arch_config_file, os.path.expanduser("~/archgw_logs"), env
arch_config_file,
os.path.expanduser("~/archgw_logs"),
env,
prompt_gateway_port,
llm_gateway_port,
)
if return_code != 0:
log.info("Failed to start arch gateway: " + str(return_code))
@ -50,7 +81,7 @@ def start_arch(arch_config_file, env, log_timeout=120, foreground=False):
start_time = time.time()
while True:
health_check_status = health_check_endpoint(
"http://localhost:10000/healthz"
f"http://localhost:{prompt_gateway_port}/healthz"
)
archgw_status = docker_container_status(ARCHGW_DOCKER_NAME)
current_time = time.time()

View file

@ -1,7 +1,7 @@
import subprocess
import json
import sys
import requests # Add this import
import requests
from cli.consts import ARCHGW_DOCKER_IMAGE, ARCHGW_DOCKER_NAME
from cli.utils import getLogger
@ -33,11 +33,19 @@ def docker_remove_container(container: str) -> str:
def docker_start_archgw_detached(
arch_config_file: str, logs_path_abs: str, env: dict
arch_config_file: str,
logs_path_abs: str,
env: dict,
prompt_gateway_port,
llm_gateway_port,
) -> str:
env_args = [item for key, value in env.items() for item in ["-e", f"{key}={value}"]]
port_mappings = ["10000:10000", "12000:12000", "9901:19901"]
port_mappings = [
f"{prompt_gateway_port}:{prompt_gateway_port}",
f"{llm_gateway_port}:{llm_gateway_port}",
"9901:19901",
]
port_mappings_args = [item for port in port_mappings for item in ("-p", port)]
volume_mappings = [

65
arch/tools/poetry.lock generated
View file

@ -318,6 +318,28 @@ files = [
{file = "distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed"},
]
[[package]]
name = "docker"
version = "7.1.0"
description = "A Python library for the Docker Engine API."
optional = false
python-versions = ">=3.8"
files = [
{file = "docker-7.1.0-py3-none-any.whl", hash = "sha256:c96b93b7f0a746f9e77d325bcfb87422a3d8bd4f03136ae8a85b37f1898d5fc0"},
{file = "docker-7.1.0.tar.gz", hash = "sha256:ad8c70e6e3f8926cb8a92619b832b4ea5299e2831c14284663184e200546fa6c"},
]
[package.dependencies]
pywin32 = {version = ">=304", markers = "sys_platform == \"win32\""}
requests = ">=2.26.0"
urllib3 = ">=1.26.0"
[package.extras]
dev = ["coverage (==7.2.7)", "pytest (==7.4.2)", "pytest-cov (==4.1.0)", "pytest-timeout (==2.1.0)", "ruff (==0.1.8)"]
docs = ["myst-parser (==0.18.0)", "sphinx (==5.1.1)"]
ssh = ["paramiko (>=2.4.3)"]
websockets = ["websocket-client (>=1.3.0)"]
[[package]]
name = "exceptiongroup"
version = "1.2.2"
@ -1602,6 +1624,20 @@ files = [
[package.dependencies]
six = ">=1.5"
[[package]]
name = "python-dotenv"
version = "1.0.1"
description = "Read key-value pairs from a .env file and set them as environment variables"
optional = false
python-versions = ">=3.8"
files = [
{file = "python-dotenv-1.0.1.tar.gz", hash = "sha256:e324ee90a023d808f1959c46bcbc04446a10ced277783dc6ee09987c37ec10ca"},
{file = "python_dotenv-1.0.1-py3-none-any.whl", hash = "sha256:f7b63ef50f1b690dddf550d03497b66d609393b40b564ed0d674909a68ebf16a"},
]
[package.extras]
cli = ["click (>=5.0)"]
[[package]]
name = "pytz"
version = "2025.1"
@ -1613,6 +1649,33 @@ files = [
{file = "pytz-2025.1.tar.gz", hash = "sha256:c2db42be2a2518b28e65f9207c4d05e6ff547d1efa4086469ef855e4ab70178e"},
]
[[package]]
name = "pywin32"
version = "308"
description = "Python for Window Extensions"
optional = false
python-versions = "*"
files = [
{file = "pywin32-308-cp310-cp310-win32.whl", hash = "sha256:796ff4426437896550d2981b9c2ac0ffd75238ad9ea2d3bfa67a1abd546d262e"},
{file = "pywin32-308-cp310-cp310-win_amd64.whl", hash = "sha256:4fc888c59b3c0bef905ce7eb7e2106a07712015ea1c8234b703a088d46110e8e"},
{file = "pywin32-308-cp310-cp310-win_arm64.whl", hash = "sha256:a5ab5381813b40f264fa3495b98af850098f814a25a63589a8e9eb12560f450c"},
{file = "pywin32-308-cp311-cp311-win32.whl", hash = "sha256:5d8c8015b24a7d6855b1550d8e660d8daa09983c80e5daf89a273e5c6fb5095a"},
{file = "pywin32-308-cp311-cp311-win_amd64.whl", hash = "sha256:575621b90f0dc2695fec346b2d6302faebd4f0f45c05ea29404cefe35d89442b"},
{file = "pywin32-308-cp311-cp311-win_arm64.whl", hash = "sha256:100a5442b7332070983c4cd03f2e906a5648a5104b8a7f50175f7906efd16bb6"},
{file = "pywin32-308-cp312-cp312-win32.whl", hash = "sha256:587f3e19696f4bf96fde9d8a57cec74a57021ad5f204c9e627e15c33ff568897"},
{file = "pywin32-308-cp312-cp312-win_amd64.whl", hash = "sha256:00b3e11ef09ede56c6a43c71f2d31857cf7c54b0ab6e78ac659497abd2834f47"},
{file = "pywin32-308-cp312-cp312-win_arm64.whl", hash = "sha256:9b4de86c8d909aed15b7011182c8cab38c8850de36e6afb1f0db22b8959e3091"},
{file = "pywin32-308-cp313-cp313-win32.whl", hash = "sha256:1c44539a37a5b7b21d02ab34e6a4d314e0788f1690d65b48e9b0b89f31abbbed"},
{file = "pywin32-308-cp313-cp313-win_amd64.whl", hash = "sha256:fd380990e792eaf6827fcb7e187b2b4b1cede0585e3d0c9e84201ec27b9905e4"},
{file = "pywin32-308-cp313-cp313-win_arm64.whl", hash = "sha256:ef313c46d4c18dfb82a2431e3051ac8f112ccee1a34f29c263c583c568db63cd"},
{file = "pywin32-308-cp37-cp37m-win32.whl", hash = "sha256:1f696ab352a2ddd63bd07430080dd598e6369152ea13a25ebcdd2f503a38f1ff"},
{file = "pywin32-308-cp37-cp37m-win_amd64.whl", hash = "sha256:13dcb914ed4347019fbec6697a01a0aec61019c1046c2b905410d197856326a6"},
{file = "pywin32-308-cp38-cp38-win32.whl", hash = "sha256:5794e764ebcabf4ff08c555b31bd348c9025929371763b2183172ff4708152f0"},
{file = "pywin32-308-cp38-cp38-win_amd64.whl", hash = "sha256:3b92622e29d651c6b783e368ba7d6722b1634b8e70bd376fd7610fe1992e19de"},
{file = "pywin32-308-cp39-cp39-win32.whl", hash = "sha256:7873ca4dc60ab3287919881a7d4f88baee4a6e639aa6962de25a98ba6b193341"},
{file = "pywin32-308-cp39-cp39-win_amd64.whl", hash = "sha256:71b3322d949b4cc20776436a9c9ba0eeedcbc9c650daa536df63f0ff111bb920"},
]
[[package]]
name = "pyyaml"
version = "6.0.2"
@ -2481,4 +2544,4 @@ type = ["pytest-mypy"]
[metadata]
lock-version = "2.0"
python-versions = "^3.10"
content-hash = "d4fb144073d4f8abcd8972892545d8ce47692d303ffa63dfe9b9bcdd0aea96f2"
content-hash = "d02e43f0884294d48736e1b8df248f47af480baffcbb7a0194da4e16cc1ea502"

View file

@ -15,6 +15,9 @@ click = "^8.1.7"
jinja2 = "^3.1.4"
jsonschema = "^4.23.0"
setuptools = "75.5.0"
docker = "^7.1.0"
python-dotenv = "^1.0.1"
pyyaml = "^6.0.2"
[tool.poetry.scripts]
archgw = "cli.main:main"

View file

@ -0,0 +1,20 @@
#!/bin/bash
failed_files=()
for file in $(find . -name arch_config.yaml -o -name arch_config_full_reference.yaml); do
echo "Validating $file..."
if ! docker run --rm -v "$(pwd)/$file:/app/arch_config.yaml:ro" --entrypoint /bin/sh katanemo/archgw:latest -c "python config_generator.py" 2>&1 > /dev/null ; then
echo "Validation failed for $file"
failed_files+=("$file")
fi
done
# Print summary of failed files
if [ ${#failed_files[@]} -ne 0 ]; then
echo -e "\nValidation failed for the following files:"
printf '%s\n' "${failed_files[@]}"
exit 1
else
echo -e "\nAll files validated successfully!"
fi