Merge branch 'main' into adil/add_acm_demo

This commit is contained in:
Adil Hafeez 2025-01-08 16:55:07 -08:00
commit 68097fde07
166 changed files with 9507 additions and 11803 deletions

View file

@ -99,6 +99,8 @@ properties:
type: string
in_path:
type: boolean
format:
type: string
additionalProperties: false
required:
- name

View file

@ -22,3 +22,4 @@ services:
- OPENAI_API_KEY=${OPENAI_API_KEY:?error}
- MISTRAL_API_KEY=${MISTRAL_API_KEY:?error}
- OTEL_TRACING_HTTP_ENDPOINT=http://host.docker.internal:4318/v1/traces
- MODEL_SERVER_PORT=${MODEL_SERVER_PORT:-51000}

View file

@ -1,19 +0,0 @@
services:
archgw:
image: katanemo/archgw:latest
ports:
- "10000:10000"
- "10001:10001"
- "11000:11000"
- "12000:12000"
- "19901:9901"
volumes:
- ${ARCH_CONFIG_FILE:-../demos/weather_forecast/arch_config.yaml}:/config/arch_config.yaml
- /etc/ssl/cert.pem:/etc/ssl/cert.pem
- ~/archgw_logs:/var/log/
extra_hosts:
- "host.docker.internal:host-gateway"
environment:
- OPENAI_API_KEY=${OPENAI_API_KEY:?error}
- MISTRAL_API_KEY=${MISTRAL_API_KEY:?error}
- OTEL_TRACING_HTTP_ENDPOINT=http://host.docker.internal:4318/v1/traces

View file

@ -1,6 +1,32 @@
admin:
address:
socket_address: { address: 0.0.0.0, port_value: 9901 }
stats_config:
histogram_bucket_settings:
match:
exact: "wasmcustom.time_to_first_token"
buckets:
- 100
- 500
- 800
- 1000
- 1200
- 1400
- 1600
- 1800
- 2000
- 2200
- 2400
- 3000
- 3500
- 4000
- 4500
- 5000
- 6000
- 10000
- 60000
- 180000
static_resources:
listeners:
- name: arch_listener_http
@ -211,8 +237,7 @@ static_resources:
domains:
- "*"
routes:
{% for internal_clustrer in ["embeddings", "zeroshot", "guard", "arch_fc", "hallucination"] %}
{% for internal_clustrer in ["arch_fc", "model_server"] %}
- match:
prefix: "/"
headers:
@ -225,16 +250,16 @@ static_resources:
timeout: 60s
{% endfor %}
{% for _, cluster in arch_clusters.items() %}
{% for cluster_name, cluster in arch_clusters.items() %}
- match:
prefix: "/"
headers:
- name: "x-arch-upstream"
string_match:
exact: {{ cluster.name }}
exact: {{ cluster_name }}
route:
auto_host_rewrite: true
cluster: {{ cluster.name }}
cluster: {{ cluster_name }}
timeout: 60s
{% endfor %}
http_filters:
@ -449,7 +474,7 @@ static_resources:
typed_config:
"@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext
sni: api.mistral.ai
{% for internal_clustrer in ["embeddings", "zeroshot", "guard", "arch_fc", "hallucination"] %}
{% for internal_clustrer in ["arch_fc", "model_server"] %}
- name: {{ internal_clustrer }}
connect_timeout: 5s
type: STRICT_DNS
@ -463,7 +488,7 @@ static_resources:
address:
socket_address:
address: host.docker.internal
port_value: 51000
port_value: $MODEL_SERVER_PORT
hostname: {{ internal_clustrer }}
{% endfor %}
- name: mistral_7b_instruct
@ -481,8 +506,8 @@ static_resources:
address: mistral_7b_instruct
port_value: 10001
hostname: "mistral_7b_instruct"
{% for _, cluster in arch_clusters.items() %}
- name: {{ cluster.name }}
{% for cluster_name, cluster in arch_clusters.items() %}
- name: {{ cluster_name }}
{% if cluster.connect_timeout -%}
connect_timeout: {{ cluster.connect_timeout }}
{% else -%}
@ -492,7 +517,7 @@ static_resources:
dns_lookup_family: V4_ONLY
lb_policy: ROUND_ROBIN
load_assignment:
cluster_name: {{ cluster.name }}
cluster_name: {{ cluster_name }}
endpoints:
- lb_endpoints:
- endpoint:

View file

@ -19,7 +19,7 @@ source venv/bin/activate
### Step 3: Run the build script
```bash
pip install archgw==0.1.6
pip install archgw==0.1.7
```
## Uninstall Instructions: archgw CLI

View file

@ -1,3 +1,4 @@
import json
import os
from jinja2 import Environment, FileSystemLoader
import yaml
@ -47,32 +48,27 @@ def validate_and_render_schema():
config_schema_yaml = yaml.safe_load(arch_config_schema)
inferred_clusters = {}
endpoints = config_yaml.get("endpoints", {})
# override the inferred clusters with the ones defined in the config
for name, endpoint_details in endpoints.items():
inferred_clusters[name] = endpoint_details
endpoint = inferred_clusters[name]["endpoint"]
if len(endpoint.split(":")) > 1:
inferred_clusters[name]["endpoint"] = endpoint.split(":")[0]
inferred_clusters[name]["port"] = int(endpoint.split(":")[1])
print("defined clusters from arch_config.yaml: ", json.dumps(inferred_clusters))
if "prompt_targets" in config_yaml:
for prompt_target in config_yaml["prompt_targets"]:
name = prompt_target.get("endpoint", {}).get("name", None)
if not name:
continue
if name not in inferred_clusters:
inferred_clusters[name] = {
"name": name,
"port": 80, # default port
}
endpoints = config_yaml.get("endpoints", {})
# override the inferred clusters with the ones defined in the config
for name, endpoint_details in endpoints.items():
if name in inferred_clusters:
print("updating cluster", endpoint_details)
inferred_clusters[name].update(endpoint_details)
endpoint = inferred_clusters[name]["endpoint"]
if len(endpoint.split(":")) > 1:
inferred_clusters[name]["endpoint"] = endpoint.split(":")[0]
inferred_clusters[name]["port"] = int(endpoint.split(":")[1])
else:
inferred_clusters[name] = endpoint_details
print("updated clusters", inferred_clusters)
raise Exception(
f"Unknown endpoint {name}, please add it in endpoints section in your arch_config.yaml file"
)
arch_llm_providers = config_yaml["llm_providers"]
arch_tracing = config_yaml.get("tracing", {})
@ -90,6 +86,7 @@ def validate_and_render_schema():
rendered = template.render(data)
print(ENVOY_CONFIG_FILE_RENDERED)
print(rendered)
with open(ENVOY_CONFIG_FILE_RENDERED, "w") as file:
file.write(rendered)

View file

@ -1,8 +1,6 @@
KATANEMO_DOCKERHUB_REPO = "katanemo/archgw"
KATANEMO_LOCAL_MODEL_LIST = [
"katanemo/Arch-Guard-cpu",
"katanemo/Arch-Guard",
"katanemo/bge-large-en-v1.5",
]
SERVICE_NAME_ARCHGW = "archgw"
SERVICE_NAME_MODEL_SERVER = "model_server"

View file

@ -4,7 +4,8 @@ import time
import sys
import glob
import docker
from cli.utils import getLogger
from docker.errors import DockerException
from cli.utils import getLogger, update_docker_host_env
from cli.consts import (
ARCHGW_DOCKER_IMAGE,
ARCHGW_DOCKER_NAME,
@ -44,6 +45,7 @@ def start_archgw_docker(client, arch_config_file, env):
},
environment={
"OTEL_TRACING_HTTP_ENDPOINT": "http://host.docker.internal:4318/v1/traces",
"MODEL_SERVER_PORT": os.getenv("MODEL_SERVER_PORT", "51000"),
**env,
},
extra_hosts={"host.docker.internal": "host-gateway"},
@ -78,25 +80,6 @@ def stream_gateway_logs(follow):
log.info(f"Failed to stream logs: {str(e)}")
def stream_model_server_logs(follow):
"""
Get the model server logs, check if the user wants to follow/tail them.
"""
log_file_expanded = os.path.expanduser(MODEL_SERVER_LOG_FILE)
stream_command = ["tail"]
if follow:
stream_command.append("-f")
stream_command.append(log_file_expanded)
subprocess.run(
stream_command,
check=True,
stdout=sys.stdout,
stderr=sys.stderr,
)
def stream_access_logs(follow):
"""
Get the archgw access logs
@ -117,7 +100,7 @@ def stream_access_logs(follow):
)
def start_arch(arch_config_file, env, log_timeout=120):
def start_arch(arch_config_file, env, log_timeout=120, foreground=False):
"""
Start Docker Compose in detached mode and stream logs until services are healthy.
@ -128,7 +111,22 @@ def start_arch(arch_config_file, env, log_timeout=120):
log.info("Starting arch gateway")
try:
client = docker.from_env()
try:
client = docker.from_env()
except DockerException as e:
# try setting up the docker host environment variable and retry
update_docker_host_env()
client = docker.from_env()
try:
container = client.containers.get("archgw")
log.info("archgw container found in docker, stopping and removing it")
# ensure that previous docker container is stopped and removed
container.stop()
container.remove()
log.info("Stopped and removed archgw container")
except docker.errors.NotFound as e:
pass
container = start_archgw_docker(client, arch_config_file, env)
@ -153,6 +151,13 @@ def start_arch(arch_config_file, env, log_timeout=120):
log.info(f"Container health status: {container_status}")
time.sleep(1)
if foreground:
for line in container.logs(stream=True):
print(line.decode("utf-8").strip("\n"))
except KeyboardInterrupt:
log.info("Keyboard interrupt received, stopping arch gateway service.")
stop_arch()
except docker.errors.APIError as e:
log.info(f"Failed to start Arch: {str(e)}")
@ -186,17 +191,23 @@ def download_models_from_hf():
snapshot_download(repo_id=model)
def start_arch_modelserver():
def start_arch_modelserver(foreground):
"""
Start the model server. This assumes that the archgw_modelserver package is installed locally
"""
try:
log.info("archgw_modelserver restart")
subprocess.run(
["archgw_modelserver", "restart"], check=True, start_new_session=True
)
log.info("Successfully ran model_server")
if foreground:
subprocess.run(
["archgw_modelserver", "start", "--foreground"],
check=True,
)
else:
subprocess.run(
["archgw_modelserver", "start"],
check=True,
)
except subprocess.CalledProcessError as e:
log.info(f"Failed to start model_server. Please check archgw_modelserver logs")
sys.exit(1)
@ -212,7 +223,6 @@ def stop_arch_modelserver():
["archgw_modelserver", "stop"],
check=True,
)
log.info("Successfully stopped the archgw model_server")
except subprocess.CalledProcessError as e:
log.info(f"Failed to start model_server. Please check archgw_modelserver logs")
sys.exit(1)

View file

@ -16,10 +16,9 @@ from cli.core import (
stop_arch_modelserver,
start_arch,
stop_arch,
stream_gateway_logs,
stream_model_server_logs,
stream_access_logs,
download_models_from_hf,
stream_access_logs,
stream_gateway_logs,
)
from cli.consts import (
KATANEMO_DOCKERHUB_REPO,
@ -138,16 +137,27 @@ def build(service):
default=SERVICE_ALL,
help="Service to start. Options are model_server, archgw.",
)
def up(file, path, service):
@click.option(
"--foreground",
default=False,
help="Run Arch in the foreground. Default is False",
is_flag=True,
)
def up(file, path, service, foreground):
"""Starts Arch."""
if service not in [SERVICE_NAME_ARCHGW, SERVICE_NAME_MODEL_SERVER, SERVICE_ALL]:
log.info(f"Error: Invalid service {service}. Exiting")
sys.exit(1)
if service == SERVICE_ALL and foreground:
# foreground can only be specified when starting individual services
log.info("foreground flag is only supported for individual services. Exiting.")
sys.exit(1)
if service == SERVICE_NAME_MODEL_SERVER:
log.info("Download archgw models from HuggingFace...")
download_models_from_hf()
start_arch_modelserver()
start_arch_modelserver(foreground)
return
if file:
@ -214,12 +224,11 @@ def up(file, path, service):
env.update(env_stage)
if service == SERVICE_NAME_ARCHGW:
start_arch(arch_config_file, env)
start_arch(arch_config_file, env, foreground=foreground)
else:
# this will used the cached versions of the models, so its safe to use everytime.
download_models_from_hf()
start_arch_modelserver()
start_arch(arch_config_file, env)
start_arch_modelserver(foreground)
start_arch(arch_config_file, env, foreground=foreground)
@click.command()
@ -267,65 +276,37 @@ def generate_prompt_targets(file):
@click.command()
@click.option(
"--service",
default=SERVICE_ALL,
help="Service to monitor. By default it will monitor both core gateway and model_server logs.",
)
@click.option(
"--debug",
help="For detailed debug logs to trace calls from archgw <> model_server <> api_server, etc",
is_flag=True,
)
@click.option("--follow", help="Follow the logs", is_flag=True)
def logs(service, debug, follow):
def logs(debug, follow):
"""Stream logs from access logs services."""
if service not in [SERVICE_NAME_ARCHGW, SERVICE_NAME_MODEL_SERVER, SERVICE_ALL]:
print(f"Error: Invalid service {service}. Exiting")
sys.exit(1)
if debug:
try:
archgw_process = None
if service == SERVICE_NAME_ARCHGW or service == SERVICE_ALL:
archgw_process = multiprocessing.Process(
target=stream_gateway_logs, args=(follow,)
)
archgw_process.start()
model_server_process = None
if service == SERVICE_NAME_MODEL_SERVER or service == SERVICE_ALL:
model_server_process = multiprocessing.Process(
target=stream_model_server_logs, args=(follow,)
)
model_server_process.start()
if archgw_process:
archgw_process.join()
if model_server_process:
model_server_process.join()
except KeyboardInterrupt:
log.info("KeyboardInterrupt detected. Exiting.")
if archgw_process and archgw_process.is_alive():
archgw_process.terminate()
if model_server_process and model_server_process.is_alive():
model_server_process.terminate()
else:
try:
archgw_access_logs_process = None
archgw_access_logs_process = multiprocessing.Process(
target=stream_access_logs, args=(follow,)
archgw_process = None
try:
if debug:
archgw_process = multiprocessing.Process(
target=stream_gateway_logs, args=(follow,)
)
archgw_access_logs_process.start()
archgw_process.start()
if archgw_access_logs_process:
archgw_access_logs_process.join()
except KeyboardInterrupt:
log.info("KeyboardInterrupt detected. Exiting.")
if archgw_access_logs_process.is_alive():
archgw_access_logs_process.terminate()
archgw_access_logs_process = multiprocessing.Process(
target=stream_access_logs, args=(follow,)
)
archgw_access_logs_process.start()
archgw_access_logs_process.join()
if archgw_process:
archgw_process.join()
except KeyboardInterrupt:
log.info("KeyboardInterrupt detected. Exiting.")
if archgw_access_logs_process.is_alive():
archgw_access_logs_process.terminate()
if archgw_process and archgw_process.is_alive():
archgw_process.terminate()
main.add_command(up)

View file

@ -1,6 +1,8 @@
import os
import yaml
import logging
import docker
from docker.errors import DockerException
from cli.consts import ARCHGW_DOCKER_IMAGE, ARCHGW_DOCKER_NAME
@ -19,10 +21,32 @@ def getLogger(name="cli"):
log = getLogger(__name__)
def update_docker_host_env():
"""
Update DOCKER_HOST environment variable to use the local Docker socket
"""
if os.getenv("DOCKER_HOST"):
return
default_docker_socket = os.getenv("DEFAULT_DOCKER_SOCKET", "/var/run/docker.sock")
if not os.path.exists(default_docker_socket):
home_dir = os.getenv("HOME")
docker_host = f"unix://{home_dir}/.docker/run/docker.sock"
log.info(
f"Default docker socket {default_docker_socket} not found, using {docker_host}"
)
os.environ["DOCKER_HOST"] = docker_host
def validate_schema(arch_config_file: str) -> None:
try:
client = docker.from_env()
# Run the container with detach=True to avoid blocking main process
try:
client = docker.from_env()
except DockerException as e:
# try setting up the docker host environment variable and retry
update_docker_host_env()
client = docker.from_env()
container = client.containers.run(
image=ARCHGW_DOCKER_IMAGE,
volumes={

3691
arch/tools/poetry.lock generated

File diff suppressed because it is too large Load diff

View file

@ -1,6 +1,6 @@
[tool.poetry]
name = "archgw"
version = "0.1.6"
version = "0.1.7"
description = "Python-based CLI tool to manage Arch Gateway."
authors = ["Katanemo Labs, Inc."]
packages = [
@ -9,8 +9,8 @@ packages = [
readme = "README.md"
[tool.poetry.dependencies]
python = ">=3.12"
archgw_modelserver = "0.1.6"
python = "^3.12"
archgw_modelserver = "0.1.7"
pyyaml = "^6.0.2"
pydantic = "^2.10.1"
click = "^8.1.7"