Merge branch 'main' into musa/www

This commit is contained in:
Musa 2025-12-18 14:53:56 -08:00
commit a6f9ca3594
189 changed files with 21252 additions and 14516 deletions

View file

@ -14,6 +14,38 @@ properties:
type: array
items:
type: object
properties:
id:
type: string
url:
type: string
additionalProperties: false
required:
- id
- url
filters:
type: array
items:
type: object
properties:
id:
type: string
url:
type: string
type:
type: string
enum:
- mcp
transport:
type: string
enum:
- streamable-http
tool:
type: string
additionalProperties: false
required:
- id
- url
listeners:
oneOf:
- type: array
@ -331,6 +363,31 @@ properties:
model:
type: string
additionalProperties: false
state_storage:
type: object
properties:
type:
type: string
enum:
- memory
- postgres
connection_string:
type: string
description: Required when type is postgres. Supports environment variable substitution using $VAR or ${VAR} syntax.
additionalProperties: false
required:
- type
# Note: connection_string is conditionally required based on type
# If type is 'postgres', connection_string must be provided
# If type is 'memory', connection_string is not needed
allOf:
- if:
properties:
type:
const: postgres
then:
required:
- connection_string
prompt_guards:
type: object
properties:

View file

@ -22,4 +22,3 @@ services:
- OPENAI_API_KEY=${OPENAI_API_KEY:?error}
- MISTRAL_API_KEY=${MISTRAL_API_KEY:?error}
- OTEL_TRACING_HTTP_ENDPOINT=http://host.docker.internal:4318/v1/traces
- MODEL_SERVER_PORT=${MODEL_SERVER_PORT:-51000}

View file

@ -51,11 +51,11 @@ static_resources:
envoy_grpc:
cluster_name: opentelemetry_collector
timeout: 0.250s
service_name: archgw(inbound)
service_name: plano(inbound)
random_sampling:
value: {{ arch_tracing.random_sampling }}
{% endif %}
stat_prefix: ingress_traffic
stat_prefix: plano(inbound)
codec_type: AUTO
scheme_header_transformation:
scheme_to_overwrite: https
@ -95,21 +95,6 @@ static_resources:
- name: envoy.filters.network.http_connection_manager
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
{% if "random_sampling" in arch_tracing and arch_tracing["random_sampling"] > 0 %}
generate_request_id: true
tracing:
provider:
name: envoy.tracers.opentelemetry
typed_config:
"@type": type.googleapis.com/envoy.config.trace.v3.OpenTelemetryConfig
grpc_service:
envoy_grpc:
cluster_name: opentelemetry_collector
timeout: 0.250s
service_name: ingress_traffic
random_sampling:
value: {{ arch_tracing.random_sampling }}
{% endif %}
stat_prefix: ingress_traffic
codec_type: AUTO
scheme_header_transformation:
@ -221,7 +206,7 @@ static_resources:
- name: outbound_api_traffic
address:
socket_address:
address: 0.0.0.0
address: 127.0.0.1
port_value: 11000
traffic_direction: OUTBOUND
filter_chains:
@ -229,21 +214,21 @@ static_resources:
- name: envoy.filters.network.http_connection_manager
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
{% if "random_sampling" in arch_tracing and arch_tracing["random_sampling"] > 0 %}
generate_request_id: true
tracing:
provider:
name: envoy.tracers.opentelemetry
typed_config:
"@type": type.googleapis.com/envoy.config.trace.v3.OpenTelemetryConfig
grpc_service:
envoy_grpc:
cluster_name: opentelemetry_collector
timeout: 0.250s
service_name: outbound_api_traffic
random_sampling:
value: {{ arch_tracing.random_sampling }}
{% endif %}
# {% if "random_sampling" in arch_tracing and arch_tracing["random_sampling"] > 0 %}
# generate_request_id: true
# tracing:
# provider:
# name: envoy.tracers.opentelemetry
# typed_config:
# "@type": type.googleapis.com/envoy.config.trace.v3.OpenTelemetryConfig
# grpc_service:
# envoy_grpc:
# cluster_name: opentelemetry_collector
# timeout: 0.250s
# service_name: tools
# random_sampling:
# value: {{ arch_tracing.random_sampling }}
# {% endif %}
stat_prefix: outbound_api_traffic
codec_type: AUTO
scheme_header_transformation:
@ -262,19 +247,16 @@ static_resources:
domains:
- "*"
routes:
{% for internal_cluster in ["arch_fc", "model_server"] %}
- match:
prefix: "/"
headers:
- name: "x-arch-upstream"
string_match:
exact: {{ internal_cluster }}
exact: bright_staff
route:
auto_host_rewrite: true
cluster: {{ internal_cluster }}
cluster: bright_staff
timeout: 300s
{% endfor %}
{% for cluster_name, cluster in arch_clusters.items() %}
- match:
prefix: "/"
@ -317,7 +299,7 @@ static_resources:
envoy_grpc:
cluster_name: opentelemetry_collector
timeout: 0.250s
service_name: arch_gateway
service_name: plano(inbound)
random_sampling:
value: {{ arch_tracing.random_sampling }}
{% endif %}
@ -416,7 +398,7 @@ static_resources:
envoy_grpc:
cluster_name: opentelemetry_collector
timeout: 0.250s
service_name: archgw(outbound)
service_name: plano(outbound)
random_sampling:
value: {{ arch_tracing.random_sampling }}
{% endif %}
@ -487,6 +469,50 @@ static_resources:
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
{% if "random_sampling" in arch_tracing and arch_tracing["random_sampling"] > 0 %}
- name: otel_collector_proxy
address:
socket_address:
address: 127.0.0.1
port_value: 9903
traffic_direction: OUTBOUND
filter_chains:
- filters:
- name: envoy.filters.network.http_connection_manager
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
stat_prefix: otel_proxy
codec_type: AUTO
access_log:
- name: envoy.access_loggers.file
typed_config:
"@type": type.googleapis.com/envoy.extensions.access_loggers.file.v3.FileAccessLog
path: "/var/log/access_otel.log"
format: |
[%START_TIME%] "%REQ(:METHOD)% %REQ(X-ENVOY-ORIGINAL-PATH?:PATH)% %PROTOCOL%" %RESPONSE_CODE% %RESPONSE_FLAGS% %BYTES_RECEIVED% %BYTES_SENT% %DURATION% %RESP(X-ENVOY-UPSTREAM-SERVICE-TIME)% "%REQ(X-FORWARDED-FOR)%" "%REQ(USER-AGENT)%" "%REQ(X-REQUEST-ID)%" "%REQ(:AUTHORITY)%" "%UPSTREAM_HOST%" "%UPSTREAM_CLUSTER%"
route_config:
name: otel_route
virtual_hosts:
- name: otel_backend
domains: ["*"]
routes:
- match:
prefix: "/v1/traces"
route:
cluster: opentelemetry_collector_http
timeout: 5s
retry_policy:
retry_on: "5xx,connect-failure,refused-stream,reset"
num_retries: 3
per_try_timeout: 2s
host_selection_retry_max_attempts: 5
retriable_status_codes: [500, 502, 503, 504]
http_filters:
- name: envoy.filters.http.router
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
{% endif %}
- name: egress_traffic_llm
address:
socket_address:
@ -599,7 +625,7 @@ static_resources:
clusters:
- name: arch
connect_timeout: 0.5s
connect_timeout: 5s
type: LOGICAL_DNS
dns_lookup_family: V4_ONLY
lb_policy: ROUND_ROBIN
@ -868,24 +894,6 @@ static_resources:
tls_params:
tls_minimum_protocol_version: TLSv1_2
tls_maximum_protocol_version: TLSv1_3
{% for internal_cluster in ["arch_fc", "model_server"] %}
- name: {{ internal_cluster }}
connect_timeout: 0.5s
type: STRICT_DNS
dns_lookup_family: V4_ONLY
lb_policy: ROUND_ROBIN
load_assignment:
cluster_name: {{ internal_cluster }}
endpoints:
- lb_endpoints:
- endpoint:
address:
socket_address:
address: host.docker.internal
port_value: 51000
hostname: {{ internal_cluster }}
{% endfor %}
- name: mistral_7b_instruct
connect_timeout: 0.5s
type: STRICT_DNS
@ -1035,7 +1043,6 @@ static_resources:
port_value: 12001
hostname: arch_listener_llm
{% if "random_sampling" in arch_tracing and arch_tracing["random_sampling"] > 0 %}
- name: opentelemetry_collector
type: STRICT_DNS
@ -1069,4 +1076,19 @@ static_resources:
socket_address:
address: host.docker.internal
port_value: 4318
# Circuit breaker configuration to prevent overwhelming OTEL collector
circuit_breakers:
thresholds:
- priority: DEFAULT
max_connections: 100
max_pending_requests: 100
max_requests: 100
max_retries: 3
# Health checking and outlier detection
outlier_detection:
consecutive_5xx: 5
interval: 10s
base_ejection_time: 30s
max_ejection_percent: 50
enforcing_consecutive_5xx: 100
{% endif %}

View file

@ -2,7 +2,7 @@
nodaemon=true
[program:brightstaff]
command=sh -c "RUST_LOG=info /app/brightstaff 2>&1 | tee /var/log/brightstaff.log | while IFS= read -r line; do echo '[brightstaff]' \"$line\"; done"
command=sh -c "envsubst < /app/arch_config_rendered.yaml > /app/arch_config_rendered.env_sub.yaml && RUST_LOG=debug ARCH_CONFIG_PATH_RENDERED=/app/arch_config_rendered.env_sub.yaml /app/brightstaff 2>&1 | tee /var/log/brightstaff.log | while IFS= read -r line; do echo '[brightstaff]' \"$line\"; done"
stdout_logfile=/dev/stdout
redirect_stderr=true
stdout_logfile_maxbytes=0

View file

@ -19,7 +19,7 @@ source venv/bin/activate
### Step 3: Run the build script
```bash
pip install archgw==0.3.18
pip install archgw==0.3.22
```
## Uninstall Instructions: archgw CLI
@ -56,15 +56,8 @@ poetry install
archgw build
```
### Step 5: download models
This will help download models so model_server can load faster. This should be done once.
```bash
archgw download-models
```
### Logs
`archgw` command can also view logs from gateway and model_server. Use following command to view logs,
`archgw` command can also view logs from the gateway. Use following command to view logs,
```bash
archgw logs --follow

View file

@ -13,10 +13,10 @@ SUPPORTED_PROVIDERS_WITH_BASE_URL = [
"ollama",
"qwen",
"amazon_bedrock",
"arch",
]
SUPPORTED_PROVIDERS_WITHOUT_BASE_URL = [
"arch",
"deepseek",
"groq",
"mistral",
@ -101,8 +101,17 @@ def validate_and_render_schema():
# Process agents section and convert to endpoints
agents = config_yaml.get("agents", [])
for agent in agents:
filters = config_yaml.get("filters", [])
agents_combined = agents + filters
agent_id_keys = set()
for agent in agents_combined:
agent_id = agent.get("id")
if agent_id in agent_id_keys:
raise Exception(
f"Duplicate agent id {agent_id}, please provide unique id for each agent"
)
agent_id_keys.add(agent_id)
agent_endpoint = agent.get("url")
if agent_id and agent_endpoint:
@ -304,6 +313,16 @@ def validate_and_render_schema():
}
)
# Always add arch-function model provider if not already defined
if "arch-function" not in model_provider_name_set:
updated_model_providers.append(
{
"name": "arch-function",
"provider_interface": "arch",
"model": "Arch-Function",
}
)
config_yaml["model_providers"] = deepcopy(updated_model_providers)
listeners_with_provider = 0

View file

@ -1,13 +1,5 @@
import os
KATANEMO_DOCKERHUB_REPO = "katanemo/archgw"
KATANEMO_LOCAL_MODEL_LIST = [
"katanemo/Arch-Guard",
]
SERVICE_NAME_ARCHGW = "archgw"
SERVICE_NAME_MODEL_SERVER = "model_server"
SERVICE_ALL = "all"
MODEL_SERVER_LOG_FILE = "~/archgw_logs/modelserver.log"
ARCHGW_DOCKER_NAME = "archgw"
ARCHGW_DOCKER_IMAGE = os.getenv("ARCHGW_DOCKER_IMAGE", "katanemo/archgw:0.3.18")
ARCHGW_DOCKER_IMAGE = os.getenv("ARCHGW_DOCKER_IMAGE", "katanemo/archgw:0.3.22")

View file

@ -9,9 +9,7 @@ from cli.utils import convert_legacy_listeners, getLogger
from cli.consts import (
ARCHGW_DOCKER_IMAGE,
ARCHGW_DOCKER_NAME,
KATANEMO_LOCAL_MODEL_LIST,
)
from huggingface_hub import snapshot_download
import subprocess
from cli.docker_cli import (
docker_container_status,
@ -144,49 +142,6 @@ def stop_docker_container(service=ARCHGW_DOCKER_NAME):
log.info(f"Failed to shut down services: {str(e)}")
def download_models_from_hf():
for model in KATANEMO_LOCAL_MODEL_LIST:
log.info(f"Downloading model: {model}")
snapshot_download(repo_id=model)
def start_arch_modelserver(foreground):
"""
Start the model server. This assumes that the archgw_modelserver package is installed locally
"""
try:
log.info("archgw_modelserver restart")
if foreground:
subprocess.run(
["archgw_modelserver", "start", "--foreground"],
check=True,
)
else:
subprocess.run(
["archgw_modelserver", "start"],
check=True,
)
except subprocess.CalledProcessError as e:
log.info(f"Failed to start model_server. Please check archgw_modelserver logs")
sys.exit(1)
def stop_arch_modelserver():
"""
Stop the model server. This assumes that the archgw_modelserver package is installed locally
"""
try:
subprocess.run(
["archgw_modelserver", "stop"],
check=True,
)
except subprocess.CalledProcessError as e:
log.info(f"Failed to start model_server. Please check archgw_modelserver logs")
sys.exit(1)
def start_cli_agent(arch_config_file=None, settings_json="{}"):
"""Start a CLI client connected to Arch."""

View file

@ -20,20 +20,14 @@ from cli.utils import (
find_config_file,
)
from cli.core import (
start_arch_modelserver,
stop_arch_modelserver,
start_arch,
stop_docker_container,
download_models_from_hf,
start_cli_agent,
)
from cli.consts import (
ARCHGW_DOCKER_IMAGE,
ARCHGW_DOCKER_NAME,
KATANEMO_DOCKERHUB_REPO,
SERVICE_NAME_ARCHGW,
SERVICE_NAME_MODEL_SERVER,
SERVICE_ALL,
)
log = getLogger(__name__)
@ -47,9 +41,8 @@ logo = r"""
"""
# Command to build archgw and model_server Docker images
# Command to build archgw Docker images
ARCHGW_DOCKERFILE = "./arch/Dockerfile"
MODEL_SERVER_BUILD_FILE = "./model_server/pyproject.toml"
def get_version():
@ -60,18 +53,6 @@ def get_version():
return "version not found"
def verify_service_name(service):
"""Verify if the service name is valid."""
if service not in [
SERVICE_NAME_ARCHGW,
SERVICE_NAME_MODEL_SERVER,
SERVICE_ALL,
]:
print(f"Error: Invalid service {service}. Exiting")
sys.exit(1)
return True
@click.group(invoke_without_command=True)
@click.option("--version", is_flag=True, help="Show the archgw cli version and exit.")
@click.pass_context
@ -89,17 +70,11 @@ def main(ctx, version):
@click.command()
@click.option(
"--service",
default=SERVICE_ALL,
help="Optional parameter to specify which service to build. Options are model_server, archgw",
)
def build(service):
def build():
"""Build Arch from source. Must be in root of cloned repo."""
verify_service_name(service)
# Check if /arch/Dockerfile exists
if service == SERVICE_NAME_ARCHGW or service == SERVICE_ALL:
if os.path.exists(ARCHGW_DOCKERFILE):
if os.path.exists(ARCHGW_DOCKERFILE):
click.echo("Building archgw image...")
try:
@ -110,8 +85,6 @@ def build(service):
"-f",
ARCHGW_DOCKERFILE,
"-t",
f"{KATANEMO_DOCKERHUB_REPO}:latest",
"-t",
f"{ARCHGW_DOCKER_IMAGE}",
".",
"--add-host=host.docker.internal:host-gateway",
@ -128,57 +101,20 @@ def build(service):
click.echo("archgw image built successfully.")
"""Install the model server dependencies using Poetry."""
if service == SERVICE_NAME_MODEL_SERVER or service == SERVICE_ALL:
# Check if pyproject.toml exists
if os.path.exists(MODEL_SERVER_BUILD_FILE):
click.echo("Installing model server dependencies with Poetry...")
try:
subprocess.run(
["poetry", "install", "--no-cache"],
cwd=os.path.dirname(MODEL_SERVER_BUILD_FILE),
check=True,
)
click.echo("Model server dependencies installed successfully.")
except subprocess.CalledProcessError as e:
click.echo(f"Error installing model server dependencies: {e}")
sys.exit(1)
else:
click.echo(f"Error: pyproject.toml not found in {MODEL_SERVER_BUILD_FILE}")
sys.exit(1)
@click.command()
@click.argument("file", required=False) # Optional file argument
@click.option(
"--path", default=".", help="Path to the directory containing arch_config.yaml"
)
@click.option(
"--service",
default=SERVICE_ALL,
help="Service to start. Options are model_server, archgw.",
)
@click.option(
"--foreground",
default=False,
help="Run Arch in the foreground. Default is False",
is_flag=True,
)
def up(file, path, service, foreground):
def up(file, path, foreground):
"""Starts Arch."""
verify_service_name(service)
if service == SERVICE_ALL and foreground:
# foreground can only be specified when starting individual services
log.info("foreground flag is only supported for individual services. Exiting.")
sys.exit(1)
if service == SERVICE_NAME_MODEL_SERVER:
log.info("Download models from HuggingFace...")
download_models_from_hf()
start_arch_modelserver(foreground)
return
# Use the utility function to find config file
arch_config_file = find_config_file(path, file)
@ -202,7 +138,6 @@ def up(file, path, service, foreground):
# Set the ARCH_CONFIG_FILE environment variable
env_stage = {
"OTEL_TRACING_HTTP_ENDPOINT": "http://host.docker.internal:4318/v1/traces",
"MODEL_SERVER_PORT": os.getenv("MODEL_SERVER_PORT", "51000"),
}
env = os.environ.copy()
# Remove PATH variable if present
@ -242,40 +177,13 @@ def up(file, path, service, foreground):
env_stage[access_key] = env_file_dict[access_key]
env.update(env_stage)
if service == SERVICE_NAME_ARCHGW:
start_arch(arch_config_file, env, foreground=foreground)
else:
# Check if ingress_traffic listener is configured before starting model_server
if has_ingress_listener(arch_config_file):
download_models_from_hf()
start_arch_modelserver(foreground)
else:
log.info(
"Skipping model_server startup: no ingress_traffic listener configured in arch_config.yaml"
)
start_arch(arch_config_file, env, foreground=foreground)
start_arch(arch_config_file, env, foreground=foreground)
@click.command()
@click.option(
"--service",
default=SERVICE_ALL,
help="Service to down. Options are all, model_server, archgw. Default is all",
)
def down(service):
def down():
"""Stops Arch."""
verify_service_name(service)
if service == SERVICE_NAME_MODEL_SERVER:
stop_arch_modelserver()
elif service == SERVICE_NAME_ARCHGW:
stop_docker_container()
else:
stop_arch_modelserver()
stop_docker_container(SERVICE_NAME_ARCHGW)
stop_docker_container()
@click.command()
@ -303,7 +211,7 @@ def generate_prompt_targets(file):
@click.command()
@click.option(
"--debug",
help="For detailed debug logs to trace calls from archgw <> model_server <> api_server, etc",
help="For detailed debug logs to trace calls from archgw <> api_server, etc",
is_flag=True,
)
@click.option("--follow", help="Follow the logs", is_flag=True)

View file

@ -57,6 +57,10 @@ def convert_legacy_listeners(
"timeout": "30s",
}
# Handle None case
if listeners is None:
return [llm_gateway_listener], llm_gateway_listener, prompt_gateway_listener
if isinstance(listeners, dict):
# legacy listeners
# check if type is array or object
@ -148,6 +152,24 @@ def get_llm_provider_access_keys(arch_config_file):
if access_key is not None:
access_key_list.append(access_key)
# Extract environment variables from state_storage.connection_string
state_storage = arch_config_yaml.get("state_storage_v1_responses")
if state_storage:
connection_string = state_storage.get("connection_string")
if connection_string and isinstance(connection_string, str):
# Extract all $VAR and ${VAR} patterns from connection string
import re
# Match both $VAR and ${VAR} patterns
pattern = r"\$\{?([A-Z_][A-Z0-9_]*)\}?"
matches = re.findall(pattern, connection_string)
for var in matches:
access_key_list.append(f"${var}")
else:
raise ValueError(
"Invalid connection string received in state_storage_v1_responses"
)
return access_key_list

2481
arch/tools/poetry.lock generated

File diff suppressed because it is too large Load diff

View file

@ -1,34 +1,28 @@
[project]
name = "archgw"
version = "0.3.18"
description = "Python-based CLI tool to manage Arch Gateway."
authors = [{ name = "Katanemo Labs, Inc." }]
readme = "README.md"
requires-python = ">=3.10"
dependencies = [
"archgw_modelserver==0.3.18",
"click>=8.1.7,<9.0.0",
"jinja2>=3.1.4,<4.0.0",
"jsonschema>=4.23.0,<5.0.0",
"pyyaml>=6.0.2,<7.0.0",
]
[project.scripts]
archgw = "cli.main:main"
[dependency-groups]
dev = [
"pytest>=8.4.1,<9.0.0",
]
[tool.poetry]
name = "archgw"
version = "0.3.22"
description = "Python-based CLI tool to manage Arch Gateway."
authors = ["Katanemo Labs, Inc."]
readme = "README.md"
packages = [{ include = "cli" }]
dependencies = { archgw_modelserver = { path = "../../model_server", develop = true } }
[tool.poetry.dependencies]
python = ">=3.10"
click = ">=8.1.7,<9.0.0"
jinja2 = ">=3.1.4,<4.0.0"
jsonschema = ">=4.23.0,<5.0.0"
pyyaml = ">=6.0.2,<7.0.0"
requests = ">=2.31.0,<3.0.0"
[tool.poetry.group.dev.dependencies]
pytest = ">=8.4.1,<9.0.0"
[tool.poetry.scripts]
archgw = "cli.main:main"
[build-system]
requires = ["poetry-core>=2.0.0"]
build-backend = "poetry.core.masonry.api"
[tool.pytest.ini_options]
addopts = ["-v"]

View file

@ -94,21 +94,16 @@ def test_validate_and_render_happy_path_agent_config(monkeypatch):
version: v0.3.0
agents:
- name: query_rewriter
kind: openai
endpoint: http://localhost:10500
- name: context_builder
kind: openai
endpoint: http://localhost:10501
- name: response_generator
kind: openai
endpoint: http://localhost:10502
- name: research_agent
kind: openai
endpoint: http://localhost:10500
- name: input_guard_rails
kind: openai
endpoint: http://localhost:10503
- id: query_rewriter
url: http://localhost:10500
- id: context_builder
url: http://localhost:10501
- id: response_generator
url: http://localhost:10502
- id: research_agent
url: http://localhost:10500
- id: input_guard_rails
url: http://localhost:10503
listeners:
- name: tmobile
@ -156,7 +151,7 @@ listeners:
mock.mock_open().return_value, # ARCH_CONFIG_FILE_RENDERED (write)
]
with mock.patch("builtins.open", m_open):
with mock.patch("config_generator.Environment"):
with mock.patch("cli.config_generator.Environment"):
validate_and_render_schema()