Merge branch 'main' into musa/www

2026-06-29 15:49:40 +02:00 · 2025-12-18 14:53:56 -08:00 · 2025-12-18 14:53:56 -08:00 · a6f9ca3594
commit a6f9ca3594
parent 7f6ff00d27 48bbc7cce7
189 changed files with 21252 additions and 14516 deletions
--- a/arch/tools/README.md
+++ b/arch/tools/README.md
@ -19,7 +19,7 @@ source venv/bin/activate

 ### Step 3: Run the build script
 ```bash
-pip install archgw==0.3.18
+pip install archgw==0.3.22
 ```

 ## Uninstall Instructions: archgw CLI
@ -56,15 +56,8 @@ poetry install
 archgw build
 ```

-### Step 5: download models
-This will help download models so model_server can load faster. This should be done once.
-
-```bash
-archgw download-models
-```
-
 ### Logs
-`archgw` command can also view logs from gateway and model_server. Use following command to view logs,
+`archgw` command can also view logs from the gateway. Use following command to view logs,

 ```bash
 archgw logs --follow
--- a/arch/tools/cli/config_generator.py
+++ b/arch/tools/cli/config_generator.py
@ -13,10 +13,10 @@ SUPPORTED_PROVIDERS_WITH_BASE_URL = [
    "ollama",
    "qwen",
    "amazon_bedrock",
+    "arch",
 ]

 SUPPORTED_PROVIDERS_WITHOUT_BASE_URL = [
-    "arch",
    "deepseek",
    "groq",
    "mistral",
@ -101,8 +101,17 @@ def validate_and_render_schema():

    # Process agents section and convert to endpoints
    agents = config_yaml.get("agents", [])
-    for agent in agents:
+    filters = config_yaml.get("filters", [])
+    agents_combined = agents + filters
+    agent_id_keys = set()
+
+    for agent in agents_combined:
        agent_id = agent.get("id")
+        if agent_id in agent_id_keys:
+            raise Exception(
+                f"Duplicate agent id {agent_id}, please provide unique id for each agent"
+            )
+        agent_id_keys.add(agent_id)
        agent_endpoint = agent.get("url")

        if agent_id and agent_endpoint:
@ -304,6 +313,16 @@ def validate_and_render_schema():
                }
            )

+    # Always add arch-function model provider if not already defined
+    if "arch-function" not in model_provider_name_set:
+        updated_model_providers.append(
+            {
+                "name": "arch-function",
+                "provider_interface": "arch",
+                "model": "Arch-Function",
+            }
+        )
+
    config_yaml["model_providers"] = deepcopy(updated_model_providers)

    listeners_with_provider = 0
--- a/arch/tools/cli/consts.py
+++ b/arch/tools/cli/consts.py
@ -1,13 +1,5 @@
 import os

-
-KATANEMO_DOCKERHUB_REPO = "katanemo/archgw"
-KATANEMO_LOCAL_MODEL_LIST = [
-    "katanemo/Arch-Guard",
-]
 SERVICE_NAME_ARCHGW = "archgw"
-SERVICE_NAME_MODEL_SERVER = "model_server"
-SERVICE_ALL = "all"
-MODEL_SERVER_LOG_FILE = "~/archgw_logs/modelserver.log"
 ARCHGW_DOCKER_NAME = "archgw"
-ARCHGW_DOCKER_IMAGE = os.getenv("ARCHGW_DOCKER_IMAGE", "katanemo/archgw:0.3.18")
+ARCHGW_DOCKER_IMAGE = os.getenv("ARCHGW_DOCKER_IMAGE", "katanemo/archgw:0.3.22")
--- a/arch/tools/cli/core.py
+++ b/arch/tools/cli/core.py
@ -9,9 +9,7 @@ from cli.utils import convert_legacy_listeners, getLogger
 from cli.consts import (
    ARCHGW_DOCKER_IMAGE,
    ARCHGW_DOCKER_NAME,
-    KATANEMO_LOCAL_MODEL_LIST,
 )
-from huggingface_hub import snapshot_download
 import subprocess
 from cli.docker_cli import (
    docker_container_status,
@ -144,49 +142,6 @@ def stop_docker_container(service=ARCHGW_DOCKER_NAME):
        log.info(f"Failed to shut down services: {str(e)}")


-def download_models_from_hf():
-    for model in KATANEMO_LOCAL_MODEL_LIST:
-        log.info(f"Downloading model: {model}")
-        snapshot_download(repo_id=model)
-
-
-def start_arch_modelserver(foreground):
-    """
-    Start the model server. This assumes that the archgw_modelserver package is installed locally
-
-    """
-    try:
-        log.info("archgw_modelserver restart")
-        if foreground:
-            subprocess.run(
-                ["archgw_modelserver", "start", "--foreground"],
-                check=True,
-            )
-        else:
-            subprocess.run(
-                ["archgw_modelserver", "start"],
-                check=True,
-            )
-    except subprocess.CalledProcessError as e:
-        log.info(f"Failed to start model_server. Please check archgw_modelserver logs")
-        sys.exit(1)
-
-
-def stop_arch_modelserver():
-    """
-    Stop the model server. This assumes that the archgw_modelserver package is installed locally
-
-    """
-    try:
-        subprocess.run(
-            ["archgw_modelserver", "stop"],
-            check=True,
-        )
-    except subprocess.CalledProcessError as e:
-        log.info(f"Failed to start model_server. Please check archgw_modelserver logs")
-        sys.exit(1)
-
-
 def start_cli_agent(arch_config_file=None, settings_json="{}"):
    """Start a CLI client connected to Arch."""

--- a/arch/tools/cli/main.py
+++ b/arch/tools/cli/main.py
@ -20,20 +20,14 @@ from cli.utils import (
    find_config_file,
 )
 from cli.core import (
-    start_arch_modelserver,
-    stop_arch_modelserver,
    start_arch,
    stop_docker_container,
-    download_models_from_hf,
    start_cli_agent,
 )
 from cli.consts import (
    ARCHGW_DOCKER_IMAGE,
    ARCHGW_DOCKER_NAME,
-    KATANEMO_DOCKERHUB_REPO,
    SERVICE_NAME_ARCHGW,
-    SERVICE_NAME_MODEL_SERVER,
-    SERVICE_ALL,
 )

 log = getLogger(__name__)
@ -47,9 +41,8 @@ logo = r"""

 """

-# Command to build archgw and model_server Docker images
+# Command to build archgw Docker images
 ARCHGW_DOCKERFILE = "./arch/Dockerfile"
-MODEL_SERVER_BUILD_FILE = "./model_server/pyproject.toml"


 def get_version():
@ -60,18 +53,6 @@ def get_version():
        return "version not found"


-def verify_service_name(service):
-    """Verify if the service name is valid."""
-    if service not in [
-        SERVICE_NAME_ARCHGW,
-        SERVICE_NAME_MODEL_SERVER,
-        SERVICE_ALL,
-    ]:
-        print(f"Error: Invalid service {service}. Exiting")
-        sys.exit(1)
-    return True
-
-
@click.group(invoke_without_command=True)
@click.option("--version", is_flag=True, help="Show the archgw cli version and exit.")
@click.pass_context
@ -89,17 +70,11 @@ def main(ctx, version):


@click.command()
-@click.option(
-    "--service",
-    default=SERVICE_ALL,
-    help="Optional parameter to specify which service to build. Options are model_server, archgw",
-)
-def build(service):
+def build():
    """Build Arch from source. Must be in root of cloned repo."""
-    verify_service_name(service)

    # Check if /arch/Dockerfile exists
-    if service == SERVICE_NAME_ARCHGW or service == SERVICE_ALL:
+    if os.path.exists(ARCHGW_DOCKERFILE):
        if os.path.exists(ARCHGW_DOCKERFILE):
            click.echo("Building archgw image...")
            try:
@ -110,8 +85,6 @@ def build(service):
                        "-f",
                        ARCHGW_DOCKERFILE,
                        "-t",
-                        f"{KATANEMO_DOCKERHUB_REPO}:latest",
-                        "-t",
                        f"{ARCHGW_DOCKER_IMAGE}",
                        ".",
                        "--add-host=host.docker.internal:host-gateway",
@ -128,57 +101,20 @@ def build(service):

    click.echo("archgw image built successfully.")

-    """Install the model server dependencies using Poetry."""
-    if service == SERVICE_NAME_MODEL_SERVER or service == SERVICE_ALL:
-        # Check if pyproject.toml exists
-        if os.path.exists(MODEL_SERVER_BUILD_FILE):
-            click.echo("Installing model server dependencies with Poetry...")
-            try:
-                subprocess.run(
-                    ["poetry", "install", "--no-cache"],
-                    cwd=os.path.dirname(MODEL_SERVER_BUILD_FILE),
-                    check=True,
-                )
-                click.echo("Model server dependencies installed successfully.")
-            except subprocess.CalledProcessError as e:
-                click.echo(f"Error installing model server dependencies: {e}")
-                sys.exit(1)
-        else:
-            click.echo(f"Error: pyproject.toml not found in {MODEL_SERVER_BUILD_FILE}")
-            sys.exit(1)
-

@click.command()
@click.argument("file", required=False)  # Optional file argument
@click.option(
    "--path", default=".", help="Path to the directory containing arch_config.yaml"
 )
-@click.option(
-    "--service",
-    default=SERVICE_ALL,
-    help="Service to start. Options are model_server, archgw.",
-)
@click.option(
    "--foreground",
    default=False,
    help="Run Arch in the foreground. Default is False",
    is_flag=True,
 )
-def up(file, path, service, foreground):
+def up(file, path, foreground):
    """Starts Arch."""
-    verify_service_name(service)
-
-    if service == SERVICE_ALL and foreground:
-        # foreground can only be specified when starting individual services
-        log.info("foreground flag is only supported for individual services. Exiting.")
-        sys.exit(1)
-
-    if service == SERVICE_NAME_MODEL_SERVER:
-        log.info("Download models from HuggingFace...")
-        download_models_from_hf()
-        start_arch_modelserver(foreground)
-        return
-
    # Use the utility function to find config file
    arch_config_file = find_config_file(path, file)

@ -202,7 +138,6 @@ def up(file, path, service, foreground):
    # Set the ARCH_CONFIG_FILE environment variable
    env_stage = {
        "OTEL_TRACING_HTTP_ENDPOINT": "http://host.docker.internal:4318/v1/traces",
-        "MODEL_SERVER_PORT": os.getenv("MODEL_SERVER_PORT", "51000"),
    }
    env = os.environ.copy()
    # Remove PATH variable if present
@ -242,40 +177,13 @@ def up(file, path, service, foreground):
                    env_stage[access_key] = env_file_dict[access_key]

    env.update(env_stage)
-
-    if service == SERVICE_NAME_ARCHGW:
-        start_arch(arch_config_file, env, foreground=foreground)
-    else:
-        # Check if ingress_traffic listener is configured before starting model_server
-        if has_ingress_listener(arch_config_file):
-            download_models_from_hf()
-            start_arch_modelserver(foreground)
-        else:
-            log.info(
-                "Skipping model_server startup: no ingress_traffic listener configured in arch_config.yaml"
-            )
-
-        start_arch(arch_config_file, env, foreground=foreground)
+    start_arch(arch_config_file, env, foreground=foreground)


@click.command()
-@click.option(
-    "--service",
-    default=SERVICE_ALL,
-    help="Service to down. Options are all, model_server, archgw. Default is all",
-)
-def down(service):
+def down():
    """Stops Arch."""
-
-    verify_service_name(service)
-
-    if service == SERVICE_NAME_MODEL_SERVER:
-        stop_arch_modelserver()
-    elif service == SERVICE_NAME_ARCHGW:
-        stop_docker_container()
-    else:
-        stop_arch_modelserver()
-        stop_docker_container(SERVICE_NAME_ARCHGW)
+    stop_docker_container()


@click.command()
@ -303,7 +211,7 @@ def generate_prompt_targets(file):
@click.command()
@click.option(
    "--debug",
-    help="For detailed debug logs to trace calls from archgw <> model_server <> api_server, etc",
+    help="For detailed debug logs to trace calls from archgw <> api_server, etc",
    is_flag=True,
 )
@click.option("--follow", help="Follow the logs", is_flag=True)
--- a/arch/tools/cli/utils.py
+++ b/arch/tools/cli/utils.py
@ -57,6 +57,10 @@ def convert_legacy_listeners(
        "timeout": "30s",
    }

+    # Handle None case
+    if listeners is None:
+        return [llm_gateway_listener], llm_gateway_listener, prompt_gateway_listener
+
    if isinstance(listeners, dict):
        # legacy listeners
        # check if type is array or object
@ -148,6 +152,24 @@ def get_llm_provider_access_keys(arch_config_file):
            if access_key is not None:
                access_key_list.append(access_key)

+    # Extract environment variables from state_storage.connection_string
+    state_storage = arch_config_yaml.get("state_storage_v1_responses")
+    if state_storage:
+        connection_string = state_storage.get("connection_string")
+        if connection_string and isinstance(connection_string, str):
+            # Extract all $VAR and ${VAR} patterns from connection string
+            import re
+
+            # Match both $VAR and ${VAR} patterns
+            pattern = r"\$\{?([A-Z_][A-Z0-9_]*)\}?"
+            matches = re.findall(pattern, connection_string)
+            for var in matches:
+                access_key_list.append(f"${var}")
+        else:
+            raise ValueError(
+                "Invalid connection string received in state_storage_v1_responses"
+            )
+
    return access_key_list


--- a/arch/tools/poetry.lock
+++ b/arch/tools/poetry.lock
--- a/arch/tools/pyproject.toml
+++ b/arch/tools/pyproject.toml
@ -1,34 +1,28 @@
-[project]
-name = "archgw"
-version = "0.3.18"
-description = "Python-based CLI tool to manage Arch Gateway."
-authors = [{ name = "Katanemo Labs, Inc." }]
-readme = "README.md"
-requires-python = ">=3.10"
-dependencies = [
-    "archgw_modelserver==0.3.18",
-    "click>=8.1.7,<9.0.0",
-    "jinja2>=3.1.4,<4.0.0",
-    "jsonschema>=4.23.0,<5.0.0",
-    "pyyaml>=6.0.2,<7.0.0",
-]
-
-[project.scripts]
-archgw = "cli.main:main"
-
-[dependency-groups]
-dev = [
-    "pytest>=8.4.1,<9.0.0",
-]
-
 [tool.poetry]
+name = "archgw"
+version = "0.3.22"
+description = "Python-based CLI tool to manage Arch Gateway."
+authors = ["Katanemo Labs, Inc."]
+readme = "README.md"
 packages = [{ include = "cli" }]
-dependencies = { archgw_modelserver = { path = "../../model_server", develop = true } }
+
+[tool.poetry.dependencies]
+python = ">=3.10"
+click = ">=8.1.7,<9.0.0"
+jinja2 = ">=3.1.4,<4.0.0"
+jsonschema = ">=4.23.0,<5.0.0"
+pyyaml = ">=6.0.2,<7.0.0"
+requests = ">=2.31.0,<3.0.0"
+
+[tool.poetry.group.dev.dependencies]
+pytest = ">=8.4.1,<9.0.0"
+
+[tool.poetry.scripts]
+archgw = "cli.main:main"

 [build-system]
 requires = ["poetry-core>=2.0.0"]
 build-backend = "poetry.core.masonry.api"

-
 [tool.pytest.ini_options]
 addopts = ["-v"]
--- a/arch/tools/test/test_config_generator.py
+++ b/arch/tools/test/test_config_generator.py
@ -94,21 +94,16 @@ def test_validate_and_render_happy_path_agent_config(monkeypatch):
 version: v0.3.0

 agents:
-  - name: query_rewriter
-    kind: openai
-    endpoint: http://localhost:10500
-  - name: context_builder
-    kind: openai
-    endpoint: http://localhost:10501
-  - name: response_generator
-    kind: openai
-    endpoint: http://localhost:10502
-  - name: research_agent
-    kind: openai
-    endpoint: http://localhost:10500
-  - name: input_guard_rails
-    kind: openai
-    endpoint: http://localhost:10503
+  - id: query_rewriter
+    url: http://localhost:10500
+  - id: context_builder
+    url: http://localhost:10501
+  - id: response_generator
+    url: http://localhost:10502
+  - id: research_agent
+    url: http://localhost:10500
+  - id: input_guard_rails
+    url: http://localhost:10503

 listeners:
  - name: tmobile
@ -156,7 +151,7 @@ listeners:
        mock.mock_open().return_value,  # ARCH_CONFIG_FILE_RENDERED (write)
    ]
    with mock.patch("builtins.open", m_open):
-        with mock.patch("config_generator.Environment"):
+        with mock.patch("cli.config_generator.Environment"):
            validate_and_render_schema()