Merge branch 'main' into adil/add_acm_demo

2026-06-17 15:25:17 +02:00 · 2025-01-08 16:55:07 -08:00 · 2025-01-08 16:55:07 -08:00 · 68097fde07
commit 68097fde07
parent 285a66fdb6 dae6239b81
166 changed files with 9507 additions and 11803 deletions
--- a/arch/arch_config_schema.yaml
+++ b/arch/arch_config_schema.yaml
@ -99,6 +99,8 @@ properties:
                  type: string
              in_path:
                type: boolean
+              format:
+                type: string
            additionalProperties: false
            required:
              - name
--- a/arch/docker-compose.dev.yaml
+++ b/arch/docker-compose.dev.yaml
@ -22,3 +22,4 @@ services:
      - OPENAI_API_KEY=${OPENAI_API_KEY:?error}
      - MISTRAL_API_KEY=${MISTRAL_API_KEY:?error}
      - OTEL_TRACING_HTTP_ENDPOINT=http://host.docker.internal:4318/v1/traces
+      - MODEL_SERVER_PORT=${MODEL_SERVER_PORT:-51000}
--- a/arch/docker-compose.e2e.yaml
+++ b/arch/docker-compose.e2e.yaml
@ -1,19 +0,0 @@
-services:
-  archgw:
-    image: katanemo/archgw:latest
-    ports:
-      - "10000:10000"
-      - "10001:10001"
-      - "11000:11000"
-      - "12000:12000"
-      - "19901:9901"
-    volumes:
-      - ${ARCH_CONFIG_FILE:-../demos/weather_forecast/arch_config.yaml}:/config/arch_config.yaml
-      - /etc/ssl/cert.pem:/etc/ssl/cert.pem
-      - ~/archgw_logs:/var/log/
-    extra_hosts:
-      - "host.docker.internal:host-gateway"
-    environment:
-      - OPENAI_API_KEY=${OPENAI_API_KEY:?error}
-      - MISTRAL_API_KEY=${MISTRAL_API_KEY:?error}
-      - OTEL_TRACING_HTTP_ENDPOINT=http://host.docker.internal:4318/v1/traces
--- a/arch/envoy.template.yaml
+++ b/arch/envoy.template.yaml
@ -1,6 +1,32 @@
 admin:
  address:
    socket_address: { address: 0.0.0.0, port_value: 9901 }
+
+stats_config:
+  histogram_bucket_settings:
+    match:
+      exact: "wasmcustom.time_to_first_token"
+    buckets:
+      - 100
+      - 500
+      - 800
+      - 1000
+      - 1200
+      - 1400
+      - 1600
+      - 1800
+      - 2000
+      - 2200
+      - 2400
+      - 3000
+      - 3500
+      - 4000
+      - 4500
+      - 5000
+      - 6000
+      - 10000
+      - 60000
+      - 180000
 static_resources:
  listeners:
    - name: arch_listener_http
@ -211,8 +237,7 @@ static_resources:
                      domains:
                        - "*"
                      routes:
-
-                        {% for internal_clustrer in ["embeddings", "zeroshot", "guard", "arch_fc", "hallucination"] %}
+                        {% for internal_clustrer in ["arch_fc", "model_server"] %}
                        - match:
                            prefix: "/"
                            headers:
@ -225,16 +250,16 @@ static_resources:
                            timeout: 60s
                        {% endfor %}

-                        {% for _, cluster in arch_clusters.items() %}
+                        {% for cluster_name, cluster in arch_clusters.items() %}
                        - match:
                            prefix: "/"
                            headers:
                              - name: "x-arch-upstream"
                                string_match:
-                                  exact: {{ cluster.name }}
+                                  exact: {{ cluster_name }}
                          route:
                            auto_host_rewrite: true
-                            cluster: {{ cluster.name }}
+                            cluster: {{ cluster_name }}
                            timeout: 60s
                        {% endfor %}
                http_filters:
@ -449,7 +474,7 @@ static_resources:
        typed_config:
          "@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext
          sni: api.mistral.ai
-    {% for internal_clustrer in ["embeddings", "zeroshot", "guard", "arch_fc", "hallucination"] %}
+    {% for internal_clustrer in ["arch_fc", "model_server"] %}
    - name: {{ internal_clustrer }}
      connect_timeout: 5s
      type: STRICT_DNS
@ -463,7 +488,7 @@ static_resources:
                  address:
                    socket_address:
                      address: host.docker.internal
-                      port_value: 51000
+                      port_value: $MODEL_SERVER_PORT
                  hostname: {{ internal_clustrer }}
    {% endfor %}
    - name: mistral_7b_instruct
@ -481,8 +506,8 @@ static_resources:
                      address: mistral_7b_instruct
                      port_value: 10001
                  hostname: "mistral_7b_instruct"
-{% for _, cluster in arch_clusters.items() %}
-    - name: {{ cluster.name }}
+{% for cluster_name, cluster in arch_clusters.items() %}
+    - name: {{ cluster_name }}
      {% if cluster.connect_timeout -%}
      connect_timeout: {{ cluster.connect_timeout }}
      {% else -%}
@ -492,7 +517,7 @@ static_resources:
      dns_lookup_family: V4_ONLY
      lb_policy: ROUND_ROBIN
      load_assignment:
-        cluster_name: {{ cluster.name }}
+        cluster_name: {{ cluster_name }}
        endpoints:
          - lb_endpoints:
              - endpoint:
--- a/arch/tools/README.md
+++ b/arch/tools/README.md
@ -19,7 +19,7 @@ source venv/bin/activate

 ### Step 3: Run the build script
 ```bash
-pip install archgw==0.1.6
+pip install archgw==0.1.7
 ```

 ## Uninstall Instructions: archgw CLI
--- a/arch/tools/cli/config_generator.py
+++ b/arch/tools/cli/config_generator.py
@ -1,3 +1,4 @@
+import json
 import os
 from jinja2 import Environment, FileSystemLoader
 import yaml
@ -47,32 +48,27 @@ def validate_and_render_schema():
    config_schema_yaml = yaml.safe_load(arch_config_schema)
    inferred_clusters = {}

+    endpoints = config_yaml.get("endpoints", {})
+
+    # override the inferred clusters with the ones defined in the config
+    for name, endpoint_details in endpoints.items():
+        inferred_clusters[name] = endpoint_details
+        endpoint = inferred_clusters[name]["endpoint"]
+        if len(endpoint.split(":")) > 1:
+            inferred_clusters[name]["endpoint"] = endpoint.split(":")[0]
+            inferred_clusters[name]["port"] = int(endpoint.split(":")[1])
+
+    print("defined clusters from arch_config.yaml: ", json.dumps(inferred_clusters))
+
    if "prompt_targets" in config_yaml:
        for prompt_target in config_yaml["prompt_targets"]:
            name = prompt_target.get("endpoint", {}).get("name", None)
            if not name:
                continue
            if name not in inferred_clusters:
-                inferred_clusters[name] = {
-                    "name": name,
-                    "port": 80,  # default port
-                }
-
-    endpoints = config_yaml.get("endpoints", {})
-
-    # override the inferred clusters with the ones defined in the config
-    for name, endpoint_details in endpoints.items():
-        if name in inferred_clusters:
-            print("updating cluster", endpoint_details)
-            inferred_clusters[name].update(endpoint_details)
-            endpoint = inferred_clusters[name]["endpoint"]
-            if len(endpoint.split(":")) > 1:
-                inferred_clusters[name]["endpoint"] = endpoint.split(":")[0]
-                inferred_clusters[name]["port"] = int(endpoint.split(":")[1])
-        else:
-            inferred_clusters[name] = endpoint_details
-
-    print("updated clusters", inferred_clusters)
+                raise Exception(
+                    f"Unknown endpoint {name}, please add it in endpoints section in your arch_config.yaml file"
+                )

    arch_llm_providers = config_yaml["llm_providers"]
    arch_tracing = config_yaml.get("tracing", {})
@ -90,6 +86,7 @@ def validate_and_render_schema():

    rendered = template.render(data)
    print(ENVOY_CONFIG_FILE_RENDERED)
+    print(rendered)
    with open(ENVOY_CONFIG_FILE_RENDERED, "w") as file:
        file.write(rendered)

--- a/arch/tools/cli/consts.py
+++ b/arch/tools/cli/consts.py
@ -1,8 +1,6 @@
 KATANEMO_DOCKERHUB_REPO = "katanemo/archgw"
 KATANEMO_LOCAL_MODEL_LIST = [
-    "katanemo/Arch-Guard-cpu",
    "katanemo/Arch-Guard",
-    "katanemo/bge-large-en-v1.5",
 ]
 SERVICE_NAME_ARCHGW = "archgw"
 SERVICE_NAME_MODEL_SERVER = "model_server"
--- a/arch/tools/cli/core.py
+++ b/arch/tools/cli/core.py
@ -4,7 +4,8 @@ import time
 import sys
 import glob
 import docker
-from cli.utils import getLogger
+from docker.errors import DockerException
+from cli.utils import getLogger, update_docker_host_env
 from cli.consts import (
    ARCHGW_DOCKER_IMAGE,
    ARCHGW_DOCKER_NAME,
@ -44,6 +45,7 @@ def start_archgw_docker(client, arch_config_file, env):
        },
        environment={
            "OTEL_TRACING_HTTP_ENDPOINT": "http://host.docker.internal:4318/v1/traces",
+            "MODEL_SERVER_PORT": os.getenv("MODEL_SERVER_PORT", "51000"),
            **env,
        },
        extra_hosts={"host.docker.internal": "host-gateway"},
@ -78,25 +80,6 @@ def stream_gateway_logs(follow):
        log.info(f"Failed to stream logs: {str(e)}")


-def stream_model_server_logs(follow):
-    """
-    Get the model server logs, check if the user wants to follow/tail them.
-    """
-    log_file_expanded = os.path.expanduser(MODEL_SERVER_LOG_FILE)
-
-    stream_command = ["tail"]
-    if follow:
-        stream_command.append("-f")
-
-    stream_command.append(log_file_expanded)
-    subprocess.run(
-        stream_command,
-        check=True,
-        stdout=sys.stdout,
-        stderr=sys.stderr,
-    )
-
-
 def stream_access_logs(follow):
    """
    Get the archgw access logs
@ -117,7 +100,7 @@ def stream_access_logs(follow):
    )


-def start_arch(arch_config_file, env, log_timeout=120):
+def start_arch(arch_config_file, env, log_timeout=120, foreground=False):
    """
    Start Docker Compose in detached mode and stream logs until services are healthy.

@ -128,7 +111,22 @@ def start_arch(arch_config_file, env, log_timeout=120):
    log.info("Starting arch gateway")

    try:
-        client = docker.from_env()
+        try:
+            client = docker.from_env()
+        except DockerException as e:
+            # try setting up the docker host environment variable and retry
+            update_docker_host_env()
+            client = docker.from_env()
+
+        try:
+            container = client.containers.get("archgw")
+            log.info("archgw container found in docker, stopping and removing it")
+            # ensure that previous docker container is stopped and removed
+            container.stop()
+            container.remove()
+            log.info("Stopped and removed archgw container")
+        except docker.errors.NotFound as e:
+            pass

        container = start_archgw_docker(client, arch_config_file, env)

@ -153,6 +151,13 @@ def start_arch(arch_config_file, env, log_timeout=120):
                log.info(f"Container health status: {container_status}")
                time.sleep(1)

+        if foreground:
+            for line in container.logs(stream=True):
+                print(line.decode("utf-8").strip("\n"))
+
+    except KeyboardInterrupt:
+        log.info("Keyboard interrupt received, stopping arch gateway service.")
+        stop_arch()
    except docker.errors.APIError as e:
        log.info(f"Failed to start Arch: {str(e)}")

@ -186,17 +191,23 @@ def download_models_from_hf():
        snapshot_download(repo_id=model)


-def start_arch_modelserver():
+def start_arch_modelserver(foreground):
    """
    Start the model server. This assumes that the archgw_modelserver package is installed locally

    """
    try:
        log.info("archgw_modelserver restart")
-        subprocess.run(
-            ["archgw_modelserver", "restart"], check=True, start_new_session=True
-        )
-        log.info("Successfully ran model_server")
+        if foreground:
+            subprocess.run(
+                ["archgw_modelserver", "start", "--foreground"],
+                check=True,
+            )
+        else:
+            subprocess.run(
+                ["archgw_modelserver", "start"],
+                check=True,
+            )
    except subprocess.CalledProcessError as e:
        log.info(f"Failed to start model_server. Please check archgw_modelserver logs")
        sys.exit(1)
@ -212,7 +223,6 @@ def stop_arch_modelserver():
            ["archgw_modelserver", "stop"],
            check=True,
        )
-        log.info("Successfully stopped the archgw model_server")
    except subprocess.CalledProcessError as e:
        log.info(f"Failed to start model_server. Please check archgw_modelserver logs")
        sys.exit(1)
--- a/arch/tools/cli/main.py
+++ b/arch/tools/cli/main.py
@ -16,10 +16,9 @@ from cli.core import (
    stop_arch_modelserver,
    start_arch,
    stop_arch,
-    stream_gateway_logs,
-    stream_model_server_logs,
-    stream_access_logs,
    download_models_from_hf,
+    stream_access_logs,
+    stream_gateway_logs,
 )
 from cli.consts import (
    KATANEMO_DOCKERHUB_REPO,
@ -138,16 +137,27 @@ def build(service):
    default=SERVICE_ALL,
    help="Service to start. Options are model_server, archgw.",
 )
-def up(file, path, service):
+@click.option(
+    "--foreground",
+    default=False,
+    help="Run Arch in the foreground. Default is False",
+    is_flag=True,
+)
+def up(file, path, service, foreground):
    """Starts Arch."""
    if service not in [SERVICE_NAME_ARCHGW, SERVICE_NAME_MODEL_SERVER, SERVICE_ALL]:
        log.info(f"Error: Invalid service {service}. Exiting")
        sys.exit(1)

+    if service == SERVICE_ALL and foreground:
+        # foreground can only be specified when starting individual services
+        log.info("foreground flag is only supported for individual services. Exiting.")
+        sys.exit(1)
+
    if service == SERVICE_NAME_MODEL_SERVER:
        log.info("Download archgw models from HuggingFace...")
        download_models_from_hf()
-        start_arch_modelserver()
+        start_arch_modelserver(foreground)
        return

    if file:
@ -214,12 +224,11 @@ def up(file, path, service):
    env.update(env_stage)

    if service == SERVICE_NAME_ARCHGW:
-        start_arch(arch_config_file, env)
+        start_arch(arch_config_file, env, foreground=foreground)
    else:
-        # this will used the cached versions of the models, so its safe to use everytime.
        download_models_from_hf()
-        start_arch_modelserver()
-        start_arch(arch_config_file, env)
+        start_arch_modelserver(foreground)
+        start_arch(arch_config_file, env, foreground=foreground)


@click.command()
@ -267,65 +276,37 @@ def generate_prompt_targets(file):


@click.command()
-@click.option(
-    "--service",
-    default=SERVICE_ALL,
-    help="Service to monitor. By default it will monitor both core gateway and model_server logs.",
-)
@click.option(
    "--debug",
    help="For detailed debug logs to trace calls from archgw <> model_server <> api_server, etc",
    is_flag=True,
 )
@click.option("--follow", help="Follow the logs", is_flag=True)
-def logs(service, debug, follow):
+def logs(debug, follow):
    """Stream logs from access logs services."""

-    if service not in [SERVICE_NAME_ARCHGW, SERVICE_NAME_MODEL_SERVER, SERVICE_ALL]:
-        print(f"Error: Invalid service {service}. Exiting")
-        sys.exit(1)
-
-    if debug:
-        try:
-            archgw_process = None
-            if service == SERVICE_NAME_ARCHGW or service == SERVICE_ALL:
-                archgw_process = multiprocessing.Process(
-                    target=stream_gateway_logs, args=(follow,)
-                )
-                archgw_process.start()
-
-            model_server_process = None
-            if service == SERVICE_NAME_MODEL_SERVER or service == SERVICE_ALL:
-                model_server_process = multiprocessing.Process(
-                    target=stream_model_server_logs, args=(follow,)
-                )
-                model_server_process.start()
-
-            if archgw_process:
-                archgw_process.join()
-            if model_server_process:
-                model_server_process.join()
-        except KeyboardInterrupt:
-            log.info("KeyboardInterrupt detected. Exiting.")
-            if archgw_process and archgw_process.is_alive():
-                archgw_process.terminate()
-
-            if model_server_process and model_server_process.is_alive():
-                model_server_process.terminate()
-    else:
-        try:
-            archgw_access_logs_process = None
-            archgw_access_logs_process = multiprocessing.Process(
-                target=stream_access_logs, args=(follow,)
+    archgw_process = None
+    try:
+        if debug:
+            archgw_process = multiprocessing.Process(
+                target=stream_gateway_logs, args=(follow,)
            )
-            archgw_access_logs_process.start()
+            archgw_process.start()

-            if archgw_access_logs_process:
-                archgw_access_logs_process.join()
-        except KeyboardInterrupt:
-            log.info("KeyboardInterrupt detected. Exiting.")
-            if archgw_access_logs_process.is_alive():
-                archgw_access_logs_process.terminate()
+        archgw_access_logs_process = multiprocessing.Process(
+            target=stream_access_logs, args=(follow,)
+        )
+        archgw_access_logs_process.start()
+        archgw_access_logs_process.join()
+
+        if archgw_process:
+            archgw_process.join()
+    except KeyboardInterrupt:
+        log.info("KeyboardInterrupt detected. Exiting.")
+        if archgw_access_logs_process.is_alive():
+            archgw_access_logs_process.terminate()
+        if archgw_process and archgw_process.is_alive():
+            archgw_process.terminate()


 main.add_command(up)
--- a/arch/tools/cli/utils.py
+++ b/arch/tools/cli/utils.py
@ -1,6 +1,8 @@
+import os
 import yaml
 import logging
 import docker
+from docker.errors import DockerException

 from cli.consts import ARCHGW_DOCKER_IMAGE, ARCHGW_DOCKER_NAME

@ -19,10 +21,32 @@ def getLogger(name="cli"):
 log = getLogger(__name__)


+def update_docker_host_env():
+    """
+    Update DOCKER_HOST environment variable to use the local Docker socket
+    """
+    if os.getenv("DOCKER_HOST"):
+        return
+
+    default_docker_socket = os.getenv("DEFAULT_DOCKER_SOCKET", "/var/run/docker.sock")
+    if not os.path.exists(default_docker_socket):
+        home_dir = os.getenv("HOME")
+        docker_host = f"unix://{home_dir}/.docker/run/docker.sock"
+        log.info(
+            f"Default docker socket {default_docker_socket} not found, using {docker_host}"
+        )
+        os.environ["DOCKER_HOST"] = docker_host
+
+
 def validate_schema(arch_config_file: str) -> None:
    try:
-        client = docker.from_env()
-        # Run the container with detach=True to avoid blocking main process
+        try:
+            client = docker.from_env()
+        except DockerException as e:
+            # try setting up the docker host environment variable and retry
+            update_docker_host_env()
+            client = docker.from_env()
+
        container = client.containers.run(
            image=ARCHGW_DOCKER_IMAGE,
            volumes={
--- a/arch/tools/poetry.lock
+++ b/arch/tools/poetry.lock
--- a/arch/tools/pyproject.toml
+++ b/arch/tools/pyproject.toml
@ -1,6 +1,6 @@
 [tool.poetry]
 name = "archgw"
-version = "0.1.6"
+version = "0.1.7"
 description = "Python-based CLI tool to manage Arch Gateway."
 authors = ["Katanemo Labs, Inc."]
 packages = [
@ -9,8 +9,8 @@ packages = [
 readme = "README.md"

 [tool.poetry.dependencies]
-python = ">=3.12"
-archgw_modelserver = "0.1.6"
+python = "^3.12"
+archgw_modelserver = "0.1.7"
 pyyaml = "^6.0.2"
 pydantic = "^2.10.1"
 click = "^8.1.7"