mirror of
https://github.com/katanemo/plano.git
synced 2026-06-17 15:25:17 +02:00
Merge branch 'main' into adil/multi_llm_support
This commit is contained in:
commit
a3352254fe
36 changed files with 2816 additions and 149 deletions
|
|
@ -1,11 +1,11 @@
|
|||
# build filter using rust toolchain
|
||||
# build docker image for arch gateway
|
||||
FROM rust:1.82.0 as builder
|
||||
RUN rustup -v target add wasm32-wasip1
|
||||
WORKDIR /arch
|
||||
COPY crates .
|
||||
|
||||
RUN cargo build --release --target wasm32-wasip1 -p prompt_gateway -p llm_gateway
|
||||
RUN cargo build --release --target wasm32-wasip1 -p brightstaff
|
||||
RUN cargo build --release -p brightstaff
|
||||
|
||||
# copy built filter into envoy image
|
||||
FROM docker.io/envoyproxy/envoy:v1.32-latest as envoy
|
||||
|
|
@ -13,20 +13,27 @@ FROM docker.io/envoyproxy/envoy:v1.32-latest as envoy
|
|||
#Build config generator, so that we have a single build image for both Rust and Python
|
||||
FROM python:3.12-slim as arch
|
||||
|
||||
RUN apt-get update && apt-get install -y gettext-base curl && apt-get clean && rm -rf /var/lib/apt/lists/*
|
||||
RUN apt-get update && apt-get install -y supervisor gettext-base curl && apt-get clean && rm -rf /var/lib/apt/lists/*
|
||||
|
||||
COPY --from=builder /arch/target/wasm32-wasip1/release/prompt_gateway.wasm /etc/envoy/proxy-wasm-plugins/prompt_gateway.wasm
|
||||
COPY --from=builder /arch/target/wasm32-wasip1/release/llm_gateway.wasm /etc/envoy/proxy-wasm-plugins/llm_gateway.wasm
|
||||
COPY --from=builder /arch/target/release/brightstaff /app/brightstaff
|
||||
COPY --from=envoy /usr/local/bin/envoy /usr/local/bin/envoy
|
||||
|
||||
WORKDIR /app
|
||||
COPY arch/requirements.txt .
|
||||
RUN pip install -r requirements.txt
|
||||
COPY arch/tools/cli/config_generator.py .
|
||||
COPY arch/envoy.template.yaml .
|
||||
COPY arch/arch_config_schema.yaml .
|
||||
COPY arch/supervisord.conf /etc/supervisor/conf.d/supervisord.conf
|
||||
|
||||
RUN pip install requests
|
||||
RUN touch /var/log/envoy.log
|
||||
RUN mkdir -p /var/log/supervisor/
|
||||
RUN touch /var/log/supervisor/supervisord.log
|
||||
|
||||
ENTRYPOINT ["sh","-c", "/usr/bin/supervisord"]
|
||||
|
||||
# ENTRYPOINT ["sh","-c", "python config_generator.py && envsubst < /etc/envoy/envoy.yaml > /etc/envoy.env_sub.yaml && envoy -c /etc/envoy.env_sub.yaml --log-level trace 2>&1 | tee /var/log/envoy.log"]
|
||||
ENTRYPOINT ["sh","-c", "python config_generator.py && envsubst < /etc/envoy/envoy.yaml > /etc/envoy.env_sub.yaml && envoy -c /etc/envoy.env_sub.yaml --component-log-level wasm:info 2>&1 | tee /var/log/envoy.log"]
|
||||
# ENTRYPOINT ["sh","-c", "python config_generator.py && envsubst < /etc/envoy/envoy.yaml > /etc/envoy.env_sub.yaml && envoy -c /etc/envoy.env_sub.yaml --component-log-level wasm:info 2>&1 | tee /var/log/envoy.log"]
|
||||
|
|
|
|||
|
|
@ -90,6 +90,8 @@ properties:
|
|||
- https
|
||||
http_host:
|
||||
type: string
|
||||
usage:
|
||||
type: string
|
||||
additionalProperties: false
|
||||
required:
|
||||
- name
|
||||
|
|
@ -225,6 +227,12 @@ properties:
|
|||
enum:
|
||||
- llm
|
||||
- prompt
|
||||
routing:
|
||||
type: object
|
||||
properties:
|
||||
model:
|
||||
type: string
|
||||
additionalProperties: false
|
||||
prompt_guards:
|
||||
type: object
|
||||
properties:
|
||||
|
|
|
|||
|
|
@ -328,11 +328,15 @@ static_resources:
|
|||
domains:
|
||||
- "*"
|
||||
routes:
|
||||
- match:
|
||||
prefix: "/healthz"
|
||||
direct_response:
|
||||
status: 200
|
||||
- match:
|
||||
prefix: "/"
|
||||
route:
|
||||
auto_host_rewrite: true
|
||||
cluster: arch_listener_llm
|
||||
cluster: bright_staff
|
||||
timeout: {{ llm_gateway_listener.timeout }}
|
||||
http_filters:
|
||||
- name: envoy.filters.http.router
|
||||
|
|
@ -380,12 +384,6 @@ static_resources:
|
|||
domains:
|
||||
- "*"
|
||||
routes:
|
||||
- match:
|
||||
prefix: "/healthz"
|
||||
route:
|
||||
auto_host_rewrite: true
|
||||
cluster: openai
|
||||
timeout: 60s
|
||||
{% for provider in arch_llm_providers %}
|
||||
# if endpoint is set then use custom cluster for upstream llm
|
||||
{% if provider.endpoint %}
|
||||
|
|
@ -691,6 +689,38 @@ static_resources:
|
|||
port_value: 11000
|
||||
hostname: arch_internal
|
||||
|
||||
- name: bright_staff
|
||||
connect_timeout: 0.5s
|
||||
type: LOGICAL_DNS
|
||||
dns_lookup_family: V4_ONLY
|
||||
lb_policy: ROUND_ROBIN
|
||||
load_assignment:
|
||||
cluster_name: bright_staff
|
||||
endpoints:
|
||||
- lb_endpoints:
|
||||
- endpoint:
|
||||
address:
|
||||
socket_address:
|
||||
address: 0.0.0.0
|
||||
port_value: 9091
|
||||
hostname: localhost
|
||||
|
||||
- name: router_model_host
|
||||
connect_timeout: 0.5s
|
||||
type: LOGICAL_DNS
|
||||
dns_lookup_family: V4_ONLY
|
||||
lb_policy: ROUND_ROBIN
|
||||
load_assignment:
|
||||
cluster_name: router_model_host
|
||||
endpoints:
|
||||
- lb_endpoints:
|
||||
- endpoint:
|
||||
address:
|
||||
socket_address:
|
||||
address: 34.30.16.38
|
||||
port_value: 8000
|
||||
hostname: router_model_host
|
||||
|
||||
- name: arch_prompt_gateway_listener
|
||||
connect_timeout: 0.5s
|
||||
type: LOGICAL_DNS
|
||||
|
|
|
|||
16
arch/supervisord.conf
Normal file
16
arch/supervisord.conf
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
[supervisord]
|
||||
nodaemon=true
|
||||
|
||||
[program:brightstaff]
|
||||
command=sh -c "/app/brightstaff 2>&1 | tee /var/log/brightstaff.log"
|
||||
stdout_logfile=/dev/stdout
|
||||
redirect_stderr=true
|
||||
stdout_logfile_maxbytes=0
|
||||
stderr_logfile_maxbytes=0
|
||||
|
||||
[program:envoy]
|
||||
command=/bin/sh -c "python /app/config_generator.py && envsubst < /etc/envoy/envoy.yaml > /etc/envoy.env_sub.yaml && envoy -c /etc/envoy.env_sub.yaml 2>&1 | tee /var/log//envoy.log"
|
||||
stdout_logfile=/dev/stdout
|
||||
redirect_stderr=true
|
||||
stdout_logfile_maxbytes=0
|
||||
stderr_logfile_maxbytes=0
|
||||
|
|
@ -6,6 +6,7 @@ import sys
|
|||
import yaml
|
||||
from cli.utils import getLogger
|
||||
from cli.consts import (
|
||||
ARCHGW_DOCKER_IMAGE,
|
||||
ARCHGW_DOCKER_NAME,
|
||||
KATANEMO_LOCAL_MODEL_LIST,
|
||||
)
|
||||
|
|
@ -55,7 +56,9 @@ def start_arch(arch_config_file, env, log_timeout=120, foreground=False):
|
|||
path (str): The path where the prompt_config.yml file is located.
|
||||
log_timeout (int): Time in seconds to show logs before checking for healthy state.
|
||||
"""
|
||||
log.info("Starting arch gateway")
|
||||
log.info(
|
||||
f"Starting arch gateway, image name: {ARCHGW_DOCKER_NAME}, tag: {ARCHGW_DOCKER_IMAGE}"
|
||||
)
|
||||
|
||||
try:
|
||||
archgw_container_status = docker_container_status(ARCHGW_DOCKER_NAME)
|
||||
|
|
@ -92,10 +95,15 @@ def start_arch(arch_config_file, env, log_timeout=120, foreground=False):
|
|||
current_time = time.time()
|
||||
elapsed_time = current_time - start_time
|
||||
|
||||
if archgw_status == "exited":
|
||||
log.info("archgw container exited unexpectedly.")
|
||||
stream_gateway_logs(follow=False)
|
||||
sys.exit(1)
|
||||
|
||||
# Check if timeout is reached
|
||||
if elapsed_time > log_timeout:
|
||||
log.info(f"stopping log monitoring after {log_timeout} seconds.")
|
||||
break
|
||||
sys.exit(1)
|
||||
|
||||
if prompt_gateway_health_check_status or llm_gateway_health_check_status:
|
||||
log.info("archgw is running and is healthy!")
|
||||
|
|
@ -109,27 +117,27 @@ def start_arch(arch_config_file, env, log_timeout=120, foreground=False):
|
|||
|
||||
except KeyboardInterrupt:
|
||||
log.info("Keyboard interrupt received, stopping arch gateway service.")
|
||||
stop_arch()
|
||||
stop_docker_container()
|
||||
|
||||
|
||||
def stop_arch():
|
||||
def stop_docker_container(service=ARCHGW_DOCKER_NAME):
|
||||
"""
|
||||
Shutdown all Docker Compose services by running `docker-compose down`.
|
||||
|
||||
Args:
|
||||
path (str): The path where the docker-compose.yml file is located.
|
||||
"""
|
||||
log.info("Shutting down arch gateway service.")
|
||||
log.info(f"Shutting down {service} service.")
|
||||
|
||||
try:
|
||||
subprocess.run(
|
||||
["docker", "stop", ARCHGW_DOCKER_NAME],
|
||||
["docker", "stop", service],
|
||||
)
|
||||
subprocess.run(
|
||||
["docker", "rm", ARCHGW_DOCKER_NAME],
|
||||
["docker", "rm", service],
|
||||
)
|
||||
|
||||
log.info("Successfully shut down arch gateway service.")
|
||||
log.info(f"Successfully shut down {service} service.")
|
||||
|
||||
except subprocess.CalledProcessError as e:
|
||||
log.info(f"Failed to shut down services: {str(e)}")
|
||||
|
|
|
|||
|
|
@ -3,7 +3,10 @@ import json
|
|||
import sys
|
||||
import requests
|
||||
|
||||
from cli.consts import ARCHGW_DOCKER_IMAGE, ARCHGW_DOCKER_NAME
|
||||
from cli.consts import (
|
||||
ARCHGW_DOCKER_IMAGE,
|
||||
ARCHGW_DOCKER_NAME,
|
||||
)
|
||||
from cli.utils import getLogger
|
||||
|
||||
log = getLogger(__name__)
|
||||
|
|
@ -54,7 +57,6 @@ def docker_start_archgw_detached(
|
|||
port_mappings_args = [item for port in port_mappings for item in ("-p", port)]
|
||||
|
||||
volume_mappings = [
|
||||
f"{logs_path_abs}:/var/log:rw",
|
||||
f"{arch_config_file}:/app/arch_config.yaml:ro",
|
||||
# "/Users/adilhafeez/src/intelligent-prompt-gateway/crates/target/wasm32-wasip1/release:/etc/envoy/proxy-wasm-plugins:ro",
|
||||
]
|
||||
|
|
@ -90,7 +92,7 @@ def health_check_endpoint(endpoint: str) -> bool:
|
|||
return False
|
||||
|
||||
|
||||
def stream_gateway_logs(follow):
|
||||
def stream_gateway_logs(follow, service="archgw"):
|
||||
"""
|
||||
Stream logs from the arch gateway service.
|
||||
"""
|
||||
|
|
@ -99,7 +101,7 @@ def stream_gateway_logs(follow):
|
|||
options = ["docker", "logs"]
|
||||
if follow:
|
||||
options.append("-f")
|
||||
options.append(ARCHGW_DOCKER_NAME)
|
||||
options.append(service)
|
||||
try:
|
||||
# Run `docker-compose logs` to stream logs from the gateway service
|
||||
subprocess.run(
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@ from cli.core import (
|
|||
start_arch_modelserver,
|
||||
stop_arch_modelserver,
|
||||
start_arch,
|
||||
stop_arch,
|
||||
stop_docker_container,
|
||||
download_models_from_hf,
|
||||
)
|
||||
from cli.consts import (
|
||||
|
|
@ -51,6 +51,18 @@ def get_version():
|
|||
return "version not found"
|
||||
|
||||
|
||||
def verify_service_name(service):
|
||||
"""Verify if the service name is valid."""
|
||||
if service not in [
|
||||
SERVICE_NAME_ARCHGW,
|
||||
SERVICE_NAME_MODEL_SERVER,
|
||||
SERVICE_ALL,
|
||||
]:
|
||||
print(f"Error: Invalid service {service}. Exiting")
|
||||
sys.exit(1)
|
||||
return True
|
||||
|
||||
|
||||
@click.group(invoke_without_command=True)
|
||||
@click.option("--version", is_flag=True, help="Show the archgw cli version and exit.")
|
||||
@click.pass_context
|
||||
|
|
@ -75,9 +87,8 @@ def main(ctx, version):
|
|||
)
|
||||
def build(service):
|
||||
"""Build Arch from source. Must be in root of cloned repo."""
|
||||
if service not in [SERVICE_NAME_ARCHGW, SERVICE_NAME_MODEL_SERVER, SERVICE_ALL]:
|
||||
print(f"Error: Invalid service {service}. Exiting")
|
||||
sys.exit(1)
|
||||
verify_service_name(service)
|
||||
|
||||
# Check if /arch/Dockerfile exists
|
||||
if service == SERVICE_NAME_ARCHGW or service == SERVICE_ALL:
|
||||
if os.path.exists(ARCHGW_DOCKERFILE):
|
||||
|
|
@ -146,9 +157,7 @@ def build(service):
|
|||
)
|
||||
def up(file, path, service, foreground):
|
||||
"""Starts Arch."""
|
||||
if service not in [SERVICE_NAME_ARCHGW, SERVICE_NAME_MODEL_SERVER, SERVICE_ALL]:
|
||||
log.info(f"Error: Invalid service {service}. Exiting")
|
||||
sys.exit(1)
|
||||
verify_service_name(service)
|
||||
|
||||
if service == SERVICE_ALL and foreground:
|
||||
# foreground can only be specified when starting individual services
|
||||
|
|
@ -156,7 +165,7 @@ def up(file, path, service, foreground):
|
|||
sys.exit(1)
|
||||
|
||||
if service == SERVICE_NAME_MODEL_SERVER:
|
||||
log.info("Download archgw models from HuggingFace...")
|
||||
log.info("Download models from HuggingFace...")
|
||||
download_models_from_hf()
|
||||
start_arch_modelserver(foreground)
|
||||
return
|
||||
|
|
@ -186,8 +195,6 @@ def up(file, path, service, foreground):
|
|||
log.info(f"Validation stderr: {validation_stderr}")
|
||||
sys.exit(1)
|
||||
|
||||
log.info("Starting arch model server and arch gateway")
|
||||
|
||||
# Set the ARCH_CONFIG_FILE environment variable
|
||||
env_stage = {
|
||||
"OTEL_TRACING_HTTP_ENDPOINT": "http://host.docker.internal:4318/v1/traces",
|
||||
|
|
@ -210,7 +217,6 @@ def up(file, path, service, foreground):
|
|||
else:
|
||||
app_env_file = os.path.abspath(os.path.join(path, ".env"))
|
||||
|
||||
print(f"app_env_file: {app_env_file}")
|
||||
if not os.path.exists(
|
||||
app_env_file
|
||||
): # check to see if the environment variables in the current environment or not
|
||||
|
|
@ -248,17 +254,15 @@ def up(file, path, service, foreground):
|
|||
def down(service):
|
||||
"""Stops Arch."""
|
||||
|
||||
if service not in [SERVICE_NAME_ARCHGW, SERVICE_NAME_MODEL_SERVER, SERVICE_ALL]:
|
||||
log.info(f"Error: Invalid service {service}. Exiting")
|
||||
sys.exit(1)
|
||||
verify_service_name(service)
|
||||
|
||||
if service == SERVICE_NAME_MODEL_SERVER:
|
||||
stop_arch_modelserver()
|
||||
elif service == SERVICE_NAME_ARCHGW:
|
||||
stop_arch()
|
||||
stop_docker_container()
|
||||
else:
|
||||
stop_arch_modelserver()
|
||||
stop_arch()
|
||||
stop_docker_container(SERVICE_NAME_ARCHGW)
|
||||
|
||||
|
||||
@click.command()
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue