mirror of
https://github.com/katanemo/plano.git
synced 2026-06-17 15:25:17 +02:00
Merge branch 'main' into adil/add_acm_demo
This commit is contained in:
commit
68097fde07
166 changed files with 9507 additions and 11803 deletions
|
|
@ -99,6 +99,8 @@ properties:
|
|||
type: string
|
||||
in_path:
|
||||
type: boolean
|
||||
format:
|
||||
type: string
|
||||
additionalProperties: false
|
||||
required:
|
||||
- name
|
||||
|
|
|
|||
|
|
@ -22,3 +22,4 @@ services:
|
|||
- OPENAI_API_KEY=${OPENAI_API_KEY:?error}
|
||||
- MISTRAL_API_KEY=${MISTRAL_API_KEY:?error}
|
||||
- OTEL_TRACING_HTTP_ENDPOINT=http://host.docker.internal:4318/v1/traces
|
||||
- MODEL_SERVER_PORT=${MODEL_SERVER_PORT:-51000}
|
||||
|
|
|
|||
|
|
@ -1,19 +0,0 @@
|
|||
services:
|
||||
archgw:
|
||||
image: katanemo/archgw:latest
|
||||
ports:
|
||||
- "10000:10000"
|
||||
- "10001:10001"
|
||||
- "11000:11000"
|
||||
- "12000:12000"
|
||||
- "19901:9901"
|
||||
volumes:
|
||||
- ${ARCH_CONFIG_FILE:-../demos/weather_forecast/arch_config.yaml}:/config/arch_config.yaml
|
||||
- /etc/ssl/cert.pem:/etc/ssl/cert.pem
|
||||
- ~/archgw_logs:/var/log/
|
||||
extra_hosts:
|
||||
- "host.docker.internal:host-gateway"
|
||||
environment:
|
||||
- OPENAI_API_KEY=${OPENAI_API_KEY:?error}
|
||||
- MISTRAL_API_KEY=${MISTRAL_API_KEY:?error}
|
||||
- OTEL_TRACING_HTTP_ENDPOINT=http://host.docker.internal:4318/v1/traces
|
||||
|
|
@ -1,6 +1,32 @@
|
|||
admin:
|
||||
address:
|
||||
socket_address: { address: 0.0.0.0, port_value: 9901 }
|
||||
|
||||
stats_config:
|
||||
histogram_bucket_settings:
|
||||
match:
|
||||
exact: "wasmcustom.time_to_first_token"
|
||||
buckets:
|
||||
- 100
|
||||
- 500
|
||||
- 800
|
||||
- 1000
|
||||
- 1200
|
||||
- 1400
|
||||
- 1600
|
||||
- 1800
|
||||
- 2000
|
||||
- 2200
|
||||
- 2400
|
||||
- 3000
|
||||
- 3500
|
||||
- 4000
|
||||
- 4500
|
||||
- 5000
|
||||
- 6000
|
||||
- 10000
|
||||
- 60000
|
||||
- 180000
|
||||
static_resources:
|
||||
listeners:
|
||||
- name: arch_listener_http
|
||||
|
|
@ -211,8 +237,7 @@ static_resources:
|
|||
domains:
|
||||
- "*"
|
||||
routes:
|
||||
|
||||
{% for internal_clustrer in ["embeddings", "zeroshot", "guard", "arch_fc", "hallucination"] %}
|
||||
{% for internal_clustrer in ["arch_fc", "model_server"] %}
|
||||
- match:
|
||||
prefix: "/"
|
||||
headers:
|
||||
|
|
@ -225,16 +250,16 @@ static_resources:
|
|||
timeout: 60s
|
||||
{% endfor %}
|
||||
|
||||
{% for _, cluster in arch_clusters.items() %}
|
||||
{% for cluster_name, cluster in arch_clusters.items() %}
|
||||
- match:
|
||||
prefix: "/"
|
||||
headers:
|
||||
- name: "x-arch-upstream"
|
||||
string_match:
|
||||
exact: {{ cluster.name }}
|
||||
exact: {{ cluster_name }}
|
||||
route:
|
||||
auto_host_rewrite: true
|
||||
cluster: {{ cluster.name }}
|
||||
cluster: {{ cluster_name }}
|
||||
timeout: 60s
|
||||
{% endfor %}
|
||||
http_filters:
|
||||
|
|
@ -449,7 +474,7 @@ static_resources:
|
|||
typed_config:
|
||||
"@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext
|
||||
sni: api.mistral.ai
|
||||
{% for internal_clustrer in ["embeddings", "zeroshot", "guard", "arch_fc", "hallucination"] %}
|
||||
{% for internal_clustrer in ["arch_fc", "model_server"] %}
|
||||
- name: {{ internal_clustrer }}
|
||||
connect_timeout: 5s
|
||||
type: STRICT_DNS
|
||||
|
|
@ -463,7 +488,7 @@ static_resources:
|
|||
address:
|
||||
socket_address:
|
||||
address: host.docker.internal
|
||||
port_value: 51000
|
||||
port_value: $MODEL_SERVER_PORT
|
||||
hostname: {{ internal_clustrer }}
|
||||
{% endfor %}
|
||||
- name: mistral_7b_instruct
|
||||
|
|
@ -481,8 +506,8 @@ static_resources:
|
|||
address: mistral_7b_instruct
|
||||
port_value: 10001
|
||||
hostname: "mistral_7b_instruct"
|
||||
{% for _, cluster in arch_clusters.items() %}
|
||||
- name: {{ cluster.name }}
|
||||
{% for cluster_name, cluster in arch_clusters.items() %}
|
||||
- name: {{ cluster_name }}
|
||||
{% if cluster.connect_timeout -%}
|
||||
connect_timeout: {{ cluster.connect_timeout }}
|
||||
{% else -%}
|
||||
|
|
@ -492,7 +517,7 @@ static_resources:
|
|||
dns_lookup_family: V4_ONLY
|
||||
lb_policy: ROUND_ROBIN
|
||||
load_assignment:
|
||||
cluster_name: {{ cluster.name }}
|
||||
cluster_name: {{ cluster_name }}
|
||||
endpoints:
|
||||
- lb_endpoints:
|
||||
- endpoint:
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ source venv/bin/activate
|
|||
|
||||
### Step 3: Run the build script
|
||||
```bash
|
||||
pip install archgw==0.1.6
|
||||
pip install archgw==0.1.7
|
||||
```
|
||||
|
||||
## Uninstall Instructions: archgw CLI
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
import json
|
||||
import os
|
||||
from jinja2 import Environment, FileSystemLoader
|
||||
import yaml
|
||||
|
|
@ -47,32 +48,27 @@ def validate_and_render_schema():
|
|||
config_schema_yaml = yaml.safe_load(arch_config_schema)
|
||||
inferred_clusters = {}
|
||||
|
||||
endpoints = config_yaml.get("endpoints", {})
|
||||
|
||||
# override the inferred clusters with the ones defined in the config
|
||||
for name, endpoint_details in endpoints.items():
|
||||
inferred_clusters[name] = endpoint_details
|
||||
endpoint = inferred_clusters[name]["endpoint"]
|
||||
if len(endpoint.split(":")) > 1:
|
||||
inferred_clusters[name]["endpoint"] = endpoint.split(":")[0]
|
||||
inferred_clusters[name]["port"] = int(endpoint.split(":")[1])
|
||||
|
||||
print("defined clusters from arch_config.yaml: ", json.dumps(inferred_clusters))
|
||||
|
||||
if "prompt_targets" in config_yaml:
|
||||
for prompt_target in config_yaml["prompt_targets"]:
|
||||
name = prompt_target.get("endpoint", {}).get("name", None)
|
||||
if not name:
|
||||
continue
|
||||
if name not in inferred_clusters:
|
||||
inferred_clusters[name] = {
|
||||
"name": name,
|
||||
"port": 80, # default port
|
||||
}
|
||||
|
||||
endpoints = config_yaml.get("endpoints", {})
|
||||
|
||||
# override the inferred clusters with the ones defined in the config
|
||||
for name, endpoint_details in endpoints.items():
|
||||
if name in inferred_clusters:
|
||||
print("updating cluster", endpoint_details)
|
||||
inferred_clusters[name].update(endpoint_details)
|
||||
endpoint = inferred_clusters[name]["endpoint"]
|
||||
if len(endpoint.split(":")) > 1:
|
||||
inferred_clusters[name]["endpoint"] = endpoint.split(":")[0]
|
||||
inferred_clusters[name]["port"] = int(endpoint.split(":")[1])
|
||||
else:
|
||||
inferred_clusters[name] = endpoint_details
|
||||
|
||||
print("updated clusters", inferred_clusters)
|
||||
raise Exception(
|
||||
f"Unknown endpoint {name}, please add it in endpoints section in your arch_config.yaml file"
|
||||
)
|
||||
|
||||
arch_llm_providers = config_yaml["llm_providers"]
|
||||
arch_tracing = config_yaml.get("tracing", {})
|
||||
|
|
@ -90,6 +86,7 @@ def validate_and_render_schema():
|
|||
|
||||
rendered = template.render(data)
|
||||
print(ENVOY_CONFIG_FILE_RENDERED)
|
||||
print(rendered)
|
||||
with open(ENVOY_CONFIG_FILE_RENDERED, "w") as file:
|
||||
file.write(rendered)
|
||||
|
||||
|
|
|
|||
|
|
@ -1,8 +1,6 @@
|
|||
KATANEMO_DOCKERHUB_REPO = "katanemo/archgw"
|
||||
KATANEMO_LOCAL_MODEL_LIST = [
|
||||
"katanemo/Arch-Guard-cpu",
|
||||
"katanemo/Arch-Guard",
|
||||
"katanemo/bge-large-en-v1.5",
|
||||
]
|
||||
SERVICE_NAME_ARCHGW = "archgw"
|
||||
SERVICE_NAME_MODEL_SERVER = "model_server"
|
||||
|
|
|
|||
|
|
@ -4,7 +4,8 @@ import time
|
|||
import sys
|
||||
import glob
|
||||
import docker
|
||||
from cli.utils import getLogger
|
||||
from docker.errors import DockerException
|
||||
from cli.utils import getLogger, update_docker_host_env
|
||||
from cli.consts import (
|
||||
ARCHGW_DOCKER_IMAGE,
|
||||
ARCHGW_DOCKER_NAME,
|
||||
|
|
@ -44,6 +45,7 @@ def start_archgw_docker(client, arch_config_file, env):
|
|||
},
|
||||
environment={
|
||||
"OTEL_TRACING_HTTP_ENDPOINT": "http://host.docker.internal:4318/v1/traces",
|
||||
"MODEL_SERVER_PORT": os.getenv("MODEL_SERVER_PORT", "51000"),
|
||||
**env,
|
||||
},
|
||||
extra_hosts={"host.docker.internal": "host-gateway"},
|
||||
|
|
@ -78,25 +80,6 @@ def stream_gateway_logs(follow):
|
|||
log.info(f"Failed to stream logs: {str(e)}")
|
||||
|
||||
|
||||
def stream_model_server_logs(follow):
|
||||
"""
|
||||
Get the model server logs, check if the user wants to follow/tail them.
|
||||
"""
|
||||
log_file_expanded = os.path.expanduser(MODEL_SERVER_LOG_FILE)
|
||||
|
||||
stream_command = ["tail"]
|
||||
if follow:
|
||||
stream_command.append("-f")
|
||||
|
||||
stream_command.append(log_file_expanded)
|
||||
subprocess.run(
|
||||
stream_command,
|
||||
check=True,
|
||||
stdout=sys.stdout,
|
||||
stderr=sys.stderr,
|
||||
)
|
||||
|
||||
|
||||
def stream_access_logs(follow):
|
||||
"""
|
||||
Get the archgw access logs
|
||||
|
|
@ -117,7 +100,7 @@ def stream_access_logs(follow):
|
|||
)
|
||||
|
||||
|
||||
def start_arch(arch_config_file, env, log_timeout=120):
|
||||
def start_arch(arch_config_file, env, log_timeout=120, foreground=False):
|
||||
"""
|
||||
Start Docker Compose in detached mode and stream logs until services are healthy.
|
||||
|
||||
|
|
@ -128,7 +111,22 @@ def start_arch(arch_config_file, env, log_timeout=120):
|
|||
log.info("Starting arch gateway")
|
||||
|
||||
try:
|
||||
client = docker.from_env()
|
||||
try:
|
||||
client = docker.from_env()
|
||||
except DockerException as e:
|
||||
# try setting up the docker host environment variable and retry
|
||||
update_docker_host_env()
|
||||
client = docker.from_env()
|
||||
|
||||
try:
|
||||
container = client.containers.get("archgw")
|
||||
log.info("archgw container found in docker, stopping and removing it")
|
||||
# ensure that previous docker container is stopped and removed
|
||||
container.stop()
|
||||
container.remove()
|
||||
log.info("Stopped and removed archgw container")
|
||||
except docker.errors.NotFound as e:
|
||||
pass
|
||||
|
||||
container = start_archgw_docker(client, arch_config_file, env)
|
||||
|
||||
|
|
@ -153,6 +151,13 @@ def start_arch(arch_config_file, env, log_timeout=120):
|
|||
log.info(f"Container health status: {container_status}")
|
||||
time.sleep(1)
|
||||
|
||||
if foreground:
|
||||
for line in container.logs(stream=True):
|
||||
print(line.decode("utf-8").strip("\n"))
|
||||
|
||||
except KeyboardInterrupt:
|
||||
log.info("Keyboard interrupt received, stopping arch gateway service.")
|
||||
stop_arch()
|
||||
except docker.errors.APIError as e:
|
||||
log.info(f"Failed to start Arch: {str(e)}")
|
||||
|
||||
|
|
@ -186,17 +191,23 @@ def download_models_from_hf():
|
|||
snapshot_download(repo_id=model)
|
||||
|
||||
|
||||
def start_arch_modelserver():
|
||||
def start_arch_modelserver(foreground):
|
||||
"""
|
||||
Start the model server. This assumes that the archgw_modelserver package is installed locally
|
||||
|
||||
"""
|
||||
try:
|
||||
log.info("archgw_modelserver restart")
|
||||
subprocess.run(
|
||||
["archgw_modelserver", "restart"], check=True, start_new_session=True
|
||||
)
|
||||
log.info("Successfully ran model_server")
|
||||
if foreground:
|
||||
subprocess.run(
|
||||
["archgw_modelserver", "start", "--foreground"],
|
||||
check=True,
|
||||
)
|
||||
else:
|
||||
subprocess.run(
|
||||
["archgw_modelserver", "start"],
|
||||
check=True,
|
||||
)
|
||||
except subprocess.CalledProcessError as e:
|
||||
log.info(f"Failed to start model_server. Please check archgw_modelserver logs")
|
||||
sys.exit(1)
|
||||
|
|
@ -212,7 +223,6 @@ def stop_arch_modelserver():
|
|||
["archgw_modelserver", "stop"],
|
||||
check=True,
|
||||
)
|
||||
log.info("Successfully stopped the archgw model_server")
|
||||
except subprocess.CalledProcessError as e:
|
||||
log.info(f"Failed to start model_server. Please check archgw_modelserver logs")
|
||||
sys.exit(1)
|
||||
|
|
|
|||
|
|
@ -16,10 +16,9 @@ from cli.core import (
|
|||
stop_arch_modelserver,
|
||||
start_arch,
|
||||
stop_arch,
|
||||
stream_gateway_logs,
|
||||
stream_model_server_logs,
|
||||
stream_access_logs,
|
||||
download_models_from_hf,
|
||||
stream_access_logs,
|
||||
stream_gateway_logs,
|
||||
)
|
||||
from cli.consts import (
|
||||
KATANEMO_DOCKERHUB_REPO,
|
||||
|
|
@ -138,16 +137,27 @@ def build(service):
|
|||
default=SERVICE_ALL,
|
||||
help="Service to start. Options are model_server, archgw.",
|
||||
)
|
||||
def up(file, path, service):
|
||||
@click.option(
|
||||
"--foreground",
|
||||
default=False,
|
||||
help="Run Arch in the foreground. Default is False",
|
||||
is_flag=True,
|
||||
)
|
||||
def up(file, path, service, foreground):
|
||||
"""Starts Arch."""
|
||||
if service not in [SERVICE_NAME_ARCHGW, SERVICE_NAME_MODEL_SERVER, SERVICE_ALL]:
|
||||
log.info(f"Error: Invalid service {service}. Exiting")
|
||||
sys.exit(1)
|
||||
|
||||
if service == SERVICE_ALL and foreground:
|
||||
# foreground can only be specified when starting individual services
|
||||
log.info("foreground flag is only supported for individual services. Exiting.")
|
||||
sys.exit(1)
|
||||
|
||||
if service == SERVICE_NAME_MODEL_SERVER:
|
||||
log.info("Download archgw models from HuggingFace...")
|
||||
download_models_from_hf()
|
||||
start_arch_modelserver()
|
||||
start_arch_modelserver(foreground)
|
||||
return
|
||||
|
||||
if file:
|
||||
|
|
@ -214,12 +224,11 @@ def up(file, path, service):
|
|||
env.update(env_stage)
|
||||
|
||||
if service == SERVICE_NAME_ARCHGW:
|
||||
start_arch(arch_config_file, env)
|
||||
start_arch(arch_config_file, env, foreground=foreground)
|
||||
else:
|
||||
# this will used the cached versions of the models, so its safe to use everytime.
|
||||
download_models_from_hf()
|
||||
start_arch_modelserver()
|
||||
start_arch(arch_config_file, env)
|
||||
start_arch_modelserver(foreground)
|
||||
start_arch(arch_config_file, env, foreground=foreground)
|
||||
|
||||
|
||||
@click.command()
|
||||
|
|
@ -267,65 +276,37 @@ def generate_prompt_targets(file):
|
|||
|
||||
|
||||
@click.command()
|
||||
@click.option(
|
||||
"--service",
|
||||
default=SERVICE_ALL,
|
||||
help="Service to monitor. By default it will monitor both core gateway and model_server logs.",
|
||||
)
|
||||
@click.option(
|
||||
"--debug",
|
||||
help="For detailed debug logs to trace calls from archgw <> model_server <> api_server, etc",
|
||||
is_flag=True,
|
||||
)
|
||||
@click.option("--follow", help="Follow the logs", is_flag=True)
|
||||
def logs(service, debug, follow):
|
||||
def logs(debug, follow):
|
||||
"""Stream logs from access logs services."""
|
||||
|
||||
if service not in [SERVICE_NAME_ARCHGW, SERVICE_NAME_MODEL_SERVER, SERVICE_ALL]:
|
||||
print(f"Error: Invalid service {service}. Exiting")
|
||||
sys.exit(1)
|
||||
|
||||
if debug:
|
||||
try:
|
||||
archgw_process = None
|
||||
if service == SERVICE_NAME_ARCHGW or service == SERVICE_ALL:
|
||||
archgw_process = multiprocessing.Process(
|
||||
target=stream_gateway_logs, args=(follow,)
|
||||
)
|
||||
archgw_process.start()
|
||||
|
||||
model_server_process = None
|
||||
if service == SERVICE_NAME_MODEL_SERVER or service == SERVICE_ALL:
|
||||
model_server_process = multiprocessing.Process(
|
||||
target=stream_model_server_logs, args=(follow,)
|
||||
)
|
||||
model_server_process.start()
|
||||
|
||||
if archgw_process:
|
||||
archgw_process.join()
|
||||
if model_server_process:
|
||||
model_server_process.join()
|
||||
except KeyboardInterrupt:
|
||||
log.info("KeyboardInterrupt detected. Exiting.")
|
||||
if archgw_process and archgw_process.is_alive():
|
||||
archgw_process.terminate()
|
||||
|
||||
if model_server_process and model_server_process.is_alive():
|
||||
model_server_process.terminate()
|
||||
else:
|
||||
try:
|
||||
archgw_access_logs_process = None
|
||||
archgw_access_logs_process = multiprocessing.Process(
|
||||
target=stream_access_logs, args=(follow,)
|
||||
archgw_process = None
|
||||
try:
|
||||
if debug:
|
||||
archgw_process = multiprocessing.Process(
|
||||
target=stream_gateway_logs, args=(follow,)
|
||||
)
|
||||
archgw_access_logs_process.start()
|
||||
archgw_process.start()
|
||||
|
||||
if archgw_access_logs_process:
|
||||
archgw_access_logs_process.join()
|
||||
except KeyboardInterrupt:
|
||||
log.info("KeyboardInterrupt detected. Exiting.")
|
||||
if archgw_access_logs_process.is_alive():
|
||||
archgw_access_logs_process.terminate()
|
||||
archgw_access_logs_process = multiprocessing.Process(
|
||||
target=stream_access_logs, args=(follow,)
|
||||
)
|
||||
archgw_access_logs_process.start()
|
||||
archgw_access_logs_process.join()
|
||||
|
||||
if archgw_process:
|
||||
archgw_process.join()
|
||||
except KeyboardInterrupt:
|
||||
log.info("KeyboardInterrupt detected. Exiting.")
|
||||
if archgw_access_logs_process.is_alive():
|
||||
archgw_access_logs_process.terminate()
|
||||
if archgw_process and archgw_process.is_alive():
|
||||
archgw_process.terminate()
|
||||
|
||||
|
||||
main.add_command(up)
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
import os
|
||||
import yaml
|
||||
import logging
|
||||
import docker
|
||||
from docker.errors import DockerException
|
||||
|
||||
from cli.consts import ARCHGW_DOCKER_IMAGE, ARCHGW_DOCKER_NAME
|
||||
|
||||
|
|
@ -19,10 +21,32 @@ def getLogger(name="cli"):
|
|||
log = getLogger(__name__)
|
||||
|
||||
|
||||
def update_docker_host_env():
|
||||
"""
|
||||
Update DOCKER_HOST environment variable to use the local Docker socket
|
||||
"""
|
||||
if os.getenv("DOCKER_HOST"):
|
||||
return
|
||||
|
||||
default_docker_socket = os.getenv("DEFAULT_DOCKER_SOCKET", "/var/run/docker.sock")
|
||||
if not os.path.exists(default_docker_socket):
|
||||
home_dir = os.getenv("HOME")
|
||||
docker_host = f"unix://{home_dir}/.docker/run/docker.sock"
|
||||
log.info(
|
||||
f"Default docker socket {default_docker_socket} not found, using {docker_host}"
|
||||
)
|
||||
os.environ["DOCKER_HOST"] = docker_host
|
||||
|
||||
|
||||
def validate_schema(arch_config_file: str) -> None:
|
||||
try:
|
||||
client = docker.from_env()
|
||||
# Run the container with detach=True to avoid blocking main process
|
||||
try:
|
||||
client = docker.from_env()
|
||||
except DockerException as e:
|
||||
# try setting up the docker host environment variable and retry
|
||||
update_docker_host_env()
|
||||
client = docker.from_env()
|
||||
|
||||
container = client.containers.run(
|
||||
image=ARCHGW_DOCKER_IMAGE,
|
||||
volumes={
|
||||
|
|
|
|||
3691
arch/tools/poetry.lock
generated
3691
arch/tools/poetry.lock
generated
File diff suppressed because it is too large
Load diff
|
|
@ -1,6 +1,6 @@
|
|||
[tool.poetry]
|
||||
name = "archgw"
|
||||
version = "0.1.6"
|
||||
version = "0.1.7"
|
||||
description = "Python-based CLI tool to manage Arch Gateway."
|
||||
authors = ["Katanemo Labs, Inc."]
|
||||
packages = [
|
||||
|
|
@ -9,8 +9,8 @@ packages = [
|
|||
readme = "README.md"
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = ">=3.12"
|
||||
archgw_modelserver = "0.1.6"
|
||||
python = "^3.12"
|
||||
archgw_modelserver = "0.1.7"
|
||||
pyyaml = "^6.0.2"
|
||||
pydantic = "^2.10.1"
|
||||
click = "^8.1.7"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue