Merge branch 'main' into musa/www

2026-06-26 15:39:40 +02:00 · 2025-12-18 14:53:56 -08:00 · 2025-12-18 14:53:56 -08:00 · a6f9ca3594
commit a6f9ca3594
parent 7f6ff00d27 48bbc7cce7
189 changed files with 21252 additions and 14516 deletions
--- a/arch/arch_config_schema.yaml
+++ b/arch/arch_config_schema.yaml
@ -14,6 +14,38 @@ properties:
    type: array
    items:
      type: object
+      properties:
+        id:
+          type: string
+        url:
+          type: string
+      additionalProperties: false
+      required:
+        - id
+        - url
+  filters:
+    type: array
+    items:
+      type: object
+      properties:
+        id:
+          type: string
+        url:
+          type: string
+        type:
+          type: string
+          enum:
+            - mcp
+        transport:
+          type: string
+          enum:
+            - streamable-http
+        tool:
+          type: string
+      additionalProperties: false
+      required:
+        - id
+        - url
  listeners:
    oneOf:
      - type: array
@ -331,6 +363,31 @@ properties:
      model:
        type: string
      additionalProperties: false
+  state_storage:
+    type: object
+    properties:
+      type:
+        type: string
+        enum:
+          - memory
+          - postgres
+      connection_string:
+        type: string
+        description: Required when type is postgres. Supports environment variable substitution using $VAR or ${VAR} syntax.
+    additionalProperties: false
+    required:
+      - type
+    # Note: connection_string is conditionally required based on type
+    # If type is 'postgres', connection_string must be provided
+    # If type is 'memory', connection_string is not needed
+    allOf:
+      - if:
+          properties:
+            type:
+              const: postgres
+        then:
+          required:
+            - connection_string
  prompt_guards:
    type: object
    properties:
--- a/arch/docker-compose.dev.yaml
+++ b/arch/docker-compose.dev.yaml
@ -22,4 +22,3 @@ services:
      - OPENAI_API_KEY=${OPENAI_API_KEY:?error}
      - MISTRAL_API_KEY=${MISTRAL_API_KEY:?error}
      - OTEL_TRACING_HTTP_ENDPOINT=http://host.docker.internal:4318/v1/traces
-      - MODEL_SERVER_PORT=${MODEL_SERVER_PORT:-51000}
--- a/arch/envoy.template.yaml
+++ b/arch/envoy.template.yaml
@ -51,11 +51,11 @@ static_resources:
                        envoy_grpc:
                          cluster_name: opentelemetry_collector
                        timeout: 0.250s
-                      service_name: archgw(inbound)
+                      service_name: plano(inbound)
                  random_sampling:
                    value: {{ arch_tracing.random_sampling }}
                {% endif %}
-                stat_prefix: ingress_traffic
+                stat_prefix: plano(inbound)
                codec_type: AUTO
                scheme_header_transformation:
                  scheme_to_overwrite: https
@ -95,21 +95,6 @@ static_resources:
            - name: envoy.filters.network.http_connection_manager
              typed_config:
                "@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
-                {% if "random_sampling" in arch_tracing and arch_tracing["random_sampling"] > 0 %}
-                generate_request_id: true
-                tracing:
-                  provider:
-                    name: envoy.tracers.opentelemetry
-                    typed_config:
-                      "@type": type.googleapis.com/envoy.config.trace.v3.OpenTelemetryConfig
-                      grpc_service:
-                        envoy_grpc:
-                          cluster_name: opentelemetry_collector
-                        timeout: 0.250s
-                      service_name: ingress_traffic
-                  random_sampling:
-                    value: {{ arch_tracing.random_sampling }}
-                {% endif %}
                stat_prefix: ingress_traffic
                codec_type: AUTO
                scheme_header_transformation:
@ -221,7 +206,7 @@ static_resources:
    - name: outbound_api_traffic
      address:
        socket_address:
-          address: 0.0.0.0
+          address: 127.0.0.1
          port_value: 11000
      traffic_direction: OUTBOUND
      filter_chains:
@ -229,21 +214,21 @@ static_resources:
            - name: envoy.filters.network.http_connection_manager
              typed_config:
                "@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
-                {% if "random_sampling" in arch_tracing and arch_tracing["random_sampling"] > 0 %}
-                generate_request_id: true
-                tracing:
-                  provider:
-                    name: envoy.tracers.opentelemetry
-                    typed_config:
-                      "@type": type.googleapis.com/envoy.config.trace.v3.OpenTelemetryConfig
-                      grpc_service:
-                        envoy_grpc:
-                          cluster_name: opentelemetry_collector
-                        timeout: 0.250s
-                      service_name: outbound_api_traffic
-                  random_sampling:
-                    value: {{ arch_tracing.random_sampling }}
-                {% endif %}
+                # {% if "random_sampling" in arch_tracing and arch_tracing["random_sampling"] > 0 %}
+                # generate_request_id: true
+                # tracing:
+                #   provider:
+                #     name: envoy.tracers.opentelemetry
+                #     typed_config:
+                #       "@type": type.googleapis.com/envoy.config.trace.v3.OpenTelemetryConfig
+                #       grpc_service:
+                #         envoy_grpc:
+                #           cluster_name: opentelemetry_collector
+                #         timeout: 0.250s
+                #       service_name: tools
+                #   random_sampling:
+                #     value: {{ arch_tracing.random_sampling }}
+                # {% endif %}
                stat_prefix: outbound_api_traffic
                codec_type: AUTO
                scheme_header_transformation:
@ -262,19 +247,16 @@ static_resources:
                      domains:
                        - "*"
                      routes:
-                        {% for internal_cluster in ["arch_fc", "model_server"] %}
                        - match:
                            prefix: "/"
                            headers:
                              - name: "x-arch-upstream"
                                string_match:
-                                  exact: {{ internal_cluster }}
+                                  exact: bright_staff
                          route:
                            auto_host_rewrite: true
-                            cluster: {{ internal_cluster }}
+                            cluster: bright_staff
                            timeout: 300s
-                        {% endfor %}
-
                        {% for cluster_name, cluster in arch_clusters.items() %}
                        - match:
                            prefix: "/"
@ -317,7 +299,7 @@ static_resources:
                        envoy_grpc:
                          cluster_name: opentelemetry_collector
                        timeout: 0.250s
-                      service_name: arch_gateway
+                      service_name: plano(inbound)
                  random_sampling:
                    value: {{ arch_tracing.random_sampling }}
                {% endif %}
@ -416,7 +398,7 @@ static_resources:
                        envoy_grpc:
                          cluster_name: opentelemetry_collector
                        timeout: 0.250s
-                      service_name: archgw(outbound)
+                      service_name: plano(outbound)
                  random_sampling:
                    value: {{ arch_tracing.random_sampling }}
                {% endif %}
@ -487,6 +469,50 @@ static_resources:
                    typed_config:
                      "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router

+{% if "random_sampling" in arch_tracing and arch_tracing["random_sampling"] > 0 %}
+    - name: otel_collector_proxy
+      address:
+        socket_address:
+          address: 127.0.0.1
+          port_value: 9903
+      traffic_direction: OUTBOUND
+      filter_chains:
+        - filters:
+            - name: envoy.filters.network.http_connection_manager
+              typed_config:
+                "@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
+                stat_prefix: otel_proxy
+                codec_type: AUTO
+                access_log:
+                - name: envoy.access_loggers.file
+                  typed_config:
+                    "@type": type.googleapis.com/envoy.extensions.access_loggers.file.v3.FileAccessLog
+                    path: "/var/log/access_otel.log"
+                    format: |
+                     [%START_TIME%] "%REQ(:METHOD)% %REQ(X-ENVOY-ORIGINAL-PATH?:PATH)% %PROTOCOL%" %RESPONSE_CODE% %RESPONSE_FLAGS% %BYTES_RECEIVED% %BYTES_SENT% %DURATION% %RESP(X-ENVOY-UPSTREAM-SERVICE-TIME)% "%REQ(X-FORWARDED-FOR)%" "%REQ(USER-AGENT)%" "%REQ(X-REQUEST-ID)%" "%REQ(:AUTHORITY)%" "%UPSTREAM_HOST%" "%UPSTREAM_CLUSTER%"
+                route_config:
+                  name: otel_route
+                  virtual_hosts:
+                    - name: otel_backend
+                      domains: ["*"]
+                      routes:
+                        - match:
+                            prefix: "/v1/traces"
+                          route:
+                            cluster: opentelemetry_collector_http
+                            timeout: 5s
+                            retry_policy:
+                              retry_on: "5xx,connect-failure,refused-stream,reset"
+                              num_retries: 3
+                              per_try_timeout: 2s
+                              host_selection_retry_max_attempts: 5
+                              retriable_status_codes: [500, 502, 503, 504]
+                http_filters:
+                  - name: envoy.filters.http.router
+                    typed_config:
+                      "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
+{% endif %}
+
    - name: egress_traffic_llm
      address:
        socket_address:
@ -599,7 +625,7 @@ static_resources:
  clusters:

    - name: arch
-      connect_timeout: 0.5s
+      connect_timeout: 5s
      type: LOGICAL_DNS
      dns_lookup_family: V4_ONLY
      lb_policy: ROUND_ROBIN
@ -868,24 +894,6 @@ static_resources:
            tls_params:
              tls_minimum_protocol_version: TLSv1_2
              tls_maximum_protocol_version: TLSv1_3
-
-    {% for internal_cluster in ["arch_fc", "model_server"] %}
-    - name: {{ internal_cluster }}
-      connect_timeout: 0.5s
-      type: STRICT_DNS
-      dns_lookup_family: V4_ONLY
-      lb_policy: ROUND_ROBIN
-      load_assignment:
-        cluster_name: {{ internal_cluster }}
-        endpoints:
-          - lb_endpoints:
-              - endpoint:
-                  address:
-                    socket_address:
-                      address: host.docker.internal
-                      port_value: 51000
-                  hostname: {{ internal_cluster }}
-    {% endfor %}
    - name: mistral_7b_instruct
      connect_timeout: 0.5s
      type: STRICT_DNS
@ -1035,7 +1043,6 @@ static_resources:
                      port_value: 12001
                  hostname: arch_listener_llm

-
 {% if "random_sampling" in arch_tracing and arch_tracing["random_sampling"] > 0 %}
    - name: opentelemetry_collector
      type: STRICT_DNS
@ -1069,4 +1076,19 @@ static_resources:
                socket_address:
                  address: host.docker.internal
                  port_value: 4318
+          # Circuit breaker configuration to prevent overwhelming OTEL collector
+      circuit_breakers:
+        thresholds:
+          - priority: DEFAULT
+            max_connections: 100
+            max_pending_requests: 100
+            max_requests: 100
+            max_retries: 3
+      # Health checking and outlier detection
+      outlier_detection:
+        consecutive_5xx: 5
+        interval: 10s
+        base_ejection_time: 30s
+        max_ejection_percent: 50
+        enforcing_consecutive_5xx: 100
 {% endif %}
--- a/arch/supervisord.conf
+++ b/arch/supervisord.conf
@ -2,7 +2,7 @@
 nodaemon=true

 [program:brightstaff]
-command=sh -c "RUST_LOG=info /app/brightstaff 2>&1 | tee /var/log/brightstaff.log | while IFS= read -r line; do echo '[brightstaff]' \"$line\"; done"
+command=sh -c "envsubst < /app/arch_config_rendered.yaml > /app/arch_config_rendered.env_sub.yaml && RUST_LOG=debug ARCH_CONFIG_PATH_RENDERED=/app/arch_config_rendered.env_sub.yaml /app/brightstaff 2>&1 | tee /var/log/brightstaff.log | while IFS= read -r line; do echo '[brightstaff]' \"$line\"; done"
 stdout_logfile=/dev/stdout
 redirect_stderr=true
 stdout_logfile_maxbytes=0
--- a/arch/tools/README.md
+++ b/arch/tools/README.md
@ -19,7 +19,7 @@ source venv/bin/activate

 ### Step 3: Run the build script
 ```bash
-pip install archgw==0.3.18
+pip install archgw==0.3.22
 ```

 ## Uninstall Instructions: archgw CLI
@ -56,15 +56,8 @@ poetry install
 archgw build
 ```

-### Step 5: download models
-This will help download models so model_server can load faster. This should be done once.
-
-```bash
-archgw download-models
-```
-
 ### Logs
-`archgw` command can also view logs from gateway and model_server. Use following command to view logs,
+`archgw` command can also view logs from the gateway. Use following command to view logs,

 ```bash
 archgw logs --follow
--- a/arch/tools/cli/config_generator.py
+++ b/arch/tools/cli/config_generator.py
@ -13,10 +13,10 @@ SUPPORTED_PROVIDERS_WITH_BASE_URL = [
    "ollama",
    "qwen",
    "amazon_bedrock",
+    "arch",
 ]

 SUPPORTED_PROVIDERS_WITHOUT_BASE_URL = [
-    "arch",
    "deepseek",
    "groq",
    "mistral",
@ -101,8 +101,17 @@ def validate_and_render_schema():

    # Process agents section and convert to endpoints
    agents = config_yaml.get("agents", [])
-    for agent in agents:
+    filters = config_yaml.get("filters", [])
+    agents_combined = agents + filters
+    agent_id_keys = set()
+
+    for agent in agents_combined:
        agent_id = agent.get("id")
+        if agent_id in agent_id_keys:
+            raise Exception(
+                f"Duplicate agent id {agent_id}, please provide unique id for each agent"
+            )
+        agent_id_keys.add(agent_id)
        agent_endpoint = agent.get("url")

        if agent_id and agent_endpoint:
@ -304,6 +313,16 @@ def validate_and_render_schema():
                }
            )

+    # Always add arch-function model provider if not already defined
+    if "arch-function" not in model_provider_name_set:
+        updated_model_providers.append(
+            {
+                "name": "arch-function",
+                "provider_interface": "arch",
+                "model": "Arch-Function",
+            }
+        )
+
    config_yaml["model_providers"] = deepcopy(updated_model_providers)

    listeners_with_provider = 0
--- a/arch/tools/cli/consts.py
+++ b/arch/tools/cli/consts.py
@ -1,13 +1,5 @@
 import os

-
-KATANEMO_DOCKERHUB_REPO = "katanemo/archgw"
-KATANEMO_LOCAL_MODEL_LIST = [
-    "katanemo/Arch-Guard",
-]
 SERVICE_NAME_ARCHGW = "archgw"
-SERVICE_NAME_MODEL_SERVER = "model_server"
-SERVICE_ALL = "all"
-MODEL_SERVER_LOG_FILE = "~/archgw_logs/modelserver.log"
 ARCHGW_DOCKER_NAME = "archgw"
-ARCHGW_DOCKER_IMAGE = os.getenv("ARCHGW_DOCKER_IMAGE", "katanemo/archgw:0.3.18")
+ARCHGW_DOCKER_IMAGE = os.getenv("ARCHGW_DOCKER_IMAGE", "katanemo/archgw:0.3.22")
--- a/arch/tools/cli/core.py
+++ b/arch/tools/cli/core.py
@ -9,9 +9,7 @@ from cli.utils import convert_legacy_listeners, getLogger
 from cli.consts import (
    ARCHGW_DOCKER_IMAGE,
    ARCHGW_DOCKER_NAME,
-    KATANEMO_LOCAL_MODEL_LIST,
 )
-from huggingface_hub import snapshot_download
 import subprocess
 from cli.docker_cli import (
    docker_container_status,
@ -144,49 +142,6 @@ def stop_docker_container(service=ARCHGW_DOCKER_NAME):
        log.info(f"Failed to shut down services: {str(e)}")


-def download_models_from_hf():
-    for model in KATANEMO_LOCAL_MODEL_LIST:
-        log.info(f"Downloading model: {model}")
-        snapshot_download(repo_id=model)
-
-
-def start_arch_modelserver(foreground):
-    """
-    Start the model server. This assumes that the archgw_modelserver package is installed locally
-
-    """
-    try:
-        log.info("archgw_modelserver restart")
-        if foreground:
-            subprocess.run(
-                ["archgw_modelserver", "start", "--foreground"],
-                check=True,
-            )
-        else:
-            subprocess.run(
-                ["archgw_modelserver", "start"],
-                check=True,
-            )
-    except subprocess.CalledProcessError as e:
-        log.info(f"Failed to start model_server. Please check archgw_modelserver logs")
-        sys.exit(1)
-
-
-def stop_arch_modelserver():
-    """
-    Stop the model server. This assumes that the archgw_modelserver package is installed locally
-
-    """
-    try:
-        subprocess.run(
-            ["archgw_modelserver", "stop"],
-            check=True,
-        )
-    except subprocess.CalledProcessError as e:
-        log.info(f"Failed to start model_server. Please check archgw_modelserver logs")
-        sys.exit(1)
-
-
 def start_cli_agent(arch_config_file=None, settings_json="{}"):
    """Start a CLI client connected to Arch."""

--- a/arch/tools/cli/main.py
+++ b/arch/tools/cli/main.py
@ -20,20 +20,14 @@ from cli.utils import (
    find_config_file,
 )
 from cli.core import (
-    start_arch_modelserver,
-    stop_arch_modelserver,
    start_arch,
    stop_docker_container,
-    download_models_from_hf,
    start_cli_agent,
 )
 from cli.consts import (
    ARCHGW_DOCKER_IMAGE,
    ARCHGW_DOCKER_NAME,
-    KATANEMO_DOCKERHUB_REPO,
    SERVICE_NAME_ARCHGW,
-    SERVICE_NAME_MODEL_SERVER,
-    SERVICE_ALL,
 )

 log = getLogger(__name__)
@ -47,9 +41,8 @@ logo = r"""

 """

-# Command to build archgw and model_server Docker images
+# Command to build archgw Docker images
 ARCHGW_DOCKERFILE = "./arch/Dockerfile"
-MODEL_SERVER_BUILD_FILE = "./model_server/pyproject.toml"


 def get_version():
@ -60,18 +53,6 @@ def get_version():
        return "version not found"


-def verify_service_name(service):
-    """Verify if the service name is valid."""
-    if service not in [
-        SERVICE_NAME_ARCHGW,
-        SERVICE_NAME_MODEL_SERVER,
-        SERVICE_ALL,
-    ]:
-        print(f"Error: Invalid service {service}. Exiting")
-        sys.exit(1)
-    return True
-
-
@click.group(invoke_without_command=True)
@click.option("--version", is_flag=True, help="Show the archgw cli version and exit.")
@click.pass_context
@ -89,17 +70,11 @@ def main(ctx, version):


@click.command()
-@click.option(
-    "--service",
-    default=SERVICE_ALL,
-    help="Optional parameter to specify which service to build. Options are model_server, archgw",
-)
-def build(service):
+def build():
    """Build Arch from source. Must be in root of cloned repo."""
-    verify_service_name(service)

    # Check if /arch/Dockerfile exists
-    if service == SERVICE_NAME_ARCHGW or service == SERVICE_ALL:
+    if os.path.exists(ARCHGW_DOCKERFILE):
        if os.path.exists(ARCHGW_DOCKERFILE):
            click.echo("Building archgw image...")
            try:
@ -110,8 +85,6 @@ def build(service):
                        "-f",
                        ARCHGW_DOCKERFILE,
                        "-t",
-                        f"{KATANEMO_DOCKERHUB_REPO}:latest",
-                        "-t",
                        f"{ARCHGW_DOCKER_IMAGE}",
                        ".",
                        "--add-host=host.docker.internal:host-gateway",
@ -128,57 +101,20 @@ def build(service):

    click.echo("archgw image built successfully.")

-    """Install the model server dependencies using Poetry."""
-    if service == SERVICE_NAME_MODEL_SERVER or service == SERVICE_ALL:
-        # Check if pyproject.toml exists
-        if os.path.exists(MODEL_SERVER_BUILD_FILE):
-            click.echo("Installing model server dependencies with Poetry...")
-            try:
-                subprocess.run(
-                    ["poetry", "install", "--no-cache"],
-                    cwd=os.path.dirname(MODEL_SERVER_BUILD_FILE),
-                    check=True,
-                )
-                click.echo("Model server dependencies installed successfully.")
-            except subprocess.CalledProcessError as e:
-                click.echo(f"Error installing model server dependencies: {e}")
-                sys.exit(1)
-        else:
-            click.echo(f"Error: pyproject.toml not found in {MODEL_SERVER_BUILD_FILE}")
-            sys.exit(1)
-

@click.command()
@click.argument("file", required=False)  # Optional file argument
@click.option(
    "--path", default=".", help="Path to the directory containing arch_config.yaml"
 )
-@click.option(
-    "--service",
-    default=SERVICE_ALL,
-    help="Service to start. Options are model_server, archgw.",
-)
@click.option(
    "--foreground",
    default=False,
    help="Run Arch in the foreground. Default is False",
    is_flag=True,
 )
-def up(file, path, service, foreground):
+def up(file, path, foreground):
    """Starts Arch."""
-    verify_service_name(service)
-
-    if service == SERVICE_ALL and foreground:
-        # foreground can only be specified when starting individual services
-        log.info("foreground flag is only supported for individual services. Exiting.")
-        sys.exit(1)
-
-    if service == SERVICE_NAME_MODEL_SERVER:
-        log.info("Download models from HuggingFace...")
-        download_models_from_hf()
-        start_arch_modelserver(foreground)
-        return
-
    # Use the utility function to find config file
    arch_config_file = find_config_file(path, file)

@ -202,7 +138,6 @@ def up(file, path, service, foreground):
    # Set the ARCH_CONFIG_FILE environment variable
    env_stage = {
        "OTEL_TRACING_HTTP_ENDPOINT": "http://host.docker.internal:4318/v1/traces",
-        "MODEL_SERVER_PORT": os.getenv("MODEL_SERVER_PORT", "51000"),
    }
    env = os.environ.copy()
    # Remove PATH variable if present
@ -242,40 +177,13 @@ def up(file, path, service, foreground):
                    env_stage[access_key] = env_file_dict[access_key]

    env.update(env_stage)
-
-    if service == SERVICE_NAME_ARCHGW:
-        start_arch(arch_config_file, env, foreground=foreground)
-    else:
-        # Check if ingress_traffic listener is configured before starting model_server
-        if has_ingress_listener(arch_config_file):
-            download_models_from_hf()
-            start_arch_modelserver(foreground)
-        else:
-            log.info(
-                "Skipping model_server startup: no ingress_traffic listener configured in arch_config.yaml"
-            )
-
-        start_arch(arch_config_file, env, foreground=foreground)
+    start_arch(arch_config_file, env, foreground=foreground)


@click.command()
-@click.option(
-    "--service",
-    default=SERVICE_ALL,
-    help="Service to down. Options are all, model_server, archgw. Default is all",
-)
-def down(service):
+def down():
    """Stops Arch."""
-
-    verify_service_name(service)
-
-    if service == SERVICE_NAME_MODEL_SERVER:
-        stop_arch_modelserver()
-    elif service == SERVICE_NAME_ARCHGW:
-        stop_docker_container()
-    else:
-        stop_arch_modelserver()
-        stop_docker_container(SERVICE_NAME_ARCHGW)
+    stop_docker_container()


@click.command()
@ -303,7 +211,7 @@ def generate_prompt_targets(file):
@click.command()
@click.option(
    "--debug",
-    help="For detailed debug logs to trace calls from archgw <> model_server <> api_server, etc",
+    help="For detailed debug logs to trace calls from archgw <> api_server, etc",
    is_flag=True,
 )
@click.option("--follow", help="Follow the logs", is_flag=True)
--- a/arch/tools/cli/utils.py
+++ b/arch/tools/cli/utils.py
@ -57,6 +57,10 @@ def convert_legacy_listeners(
        "timeout": "30s",
    }

+    # Handle None case
+    if listeners is None:
+        return [llm_gateway_listener], llm_gateway_listener, prompt_gateway_listener
+
    if isinstance(listeners, dict):
        # legacy listeners
        # check if type is array or object
@ -148,6 +152,24 @@ def get_llm_provider_access_keys(arch_config_file):
            if access_key is not None:
                access_key_list.append(access_key)

+    # Extract environment variables from state_storage.connection_string
+    state_storage = arch_config_yaml.get("state_storage_v1_responses")
+    if state_storage:
+        connection_string = state_storage.get("connection_string")
+        if connection_string and isinstance(connection_string, str):
+            # Extract all $VAR and ${VAR} patterns from connection string
+            import re
+
+            # Match both $VAR and ${VAR} patterns
+            pattern = r"\$\{?([A-Z_][A-Z0-9_]*)\}?"
+            matches = re.findall(pattern, connection_string)
+            for var in matches:
+                access_key_list.append(f"${var}")
+        else:
+            raise ValueError(
+                "Invalid connection string received in state_storage_v1_responses"
+            )
+
    return access_key_list


--- a/arch/tools/poetry.lock
+++ b/arch/tools/poetry.lock
--- a/arch/tools/pyproject.toml
+++ b/arch/tools/pyproject.toml
@ -1,34 +1,28 @@
-[project]
-name = "archgw"
-version = "0.3.18"
-description = "Python-based CLI tool to manage Arch Gateway."
-authors = [{ name = "Katanemo Labs, Inc." }]
-readme = "README.md"
-requires-python = ">=3.10"
-dependencies = [
-    "archgw_modelserver==0.3.18",
-    "click>=8.1.7,<9.0.0",
-    "jinja2>=3.1.4,<4.0.0",
-    "jsonschema>=4.23.0,<5.0.0",
-    "pyyaml>=6.0.2,<7.0.0",
-]
-
-[project.scripts]
-archgw = "cli.main:main"
-
-[dependency-groups]
-dev = [
-    "pytest>=8.4.1,<9.0.0",
-]
-
 [tool.poetry]
+name = "archgw"
+version = "0.3.22"
+description = "Python-based CLI tool to manage Arch Gateway."
+authors = ["Katanemo Labs, Inc."]
+readme = "README.md"
 packages = [{ include = "cli" }]
-dependencies = { archgw_modelserver = { path = "../../model_server", develop = true } }
+
+[tool.poetry.dependencies]
+python = ">=3.10"
+click = ">=8.1.7,<9.0.0"
+jinja2 = ">=3.1.4,<4.0.0"
+jsonschema = ">=4.23.0,<5.0.0"
+pyyaml = ">=6.0.2,<7.0.0"
+requests = ">=2.31.0,<3.0.0"
+
+[tool.poetry.group.dev.dependencies]
+pytest = ">=8.4.1,<9.0.0"
+
+[tool.poetry.scripts]
+archgw = "cli.main:main"

 [build-system]
 requires = ["poetry-core>=2.0.0"]
 build-backend = "poetry.core.masonry.api"

-
 [tool.pytest.ini_options]
 addopts = ["-v"]
--- a/arch/tools/test/test_config_generator.py
+++ b/arch/tools/test/test_config_generator.py
@ -94,21 +94,16 @@ def test_validate_and_render_happy_path_agent_config(monkeypatch):
 version: v0.3.0

 agents:
-  - name: query_rewriter
-    kind: openai
-    endpoint: http://localhost:10500
-  - name: context_builder
-    kind: openai
-    endpoint: http://localhost:10501
-  - name: response_generator
-    kind: openai
-    endpoint: http://localhost:10502
-  - name: research_agent
-    kind: openai
-    endpoint: http://localhost:10500
-  - name: input_guard_rails
-    kind: openai
-    endpoint: http://localhost:10503
+  - id: query_rewriter
+    url: http://localhost:10500
+  - id: context_builder
+    url: http://localhost:10501
+  - id: response_generator
+    url: http://localhost:10502
+  - id: research_agent
+    url: http://localhost:10500
+  - id: input_guard_rails
+    url: http://localhost:10503

 listeners:
  - name: tmobile
@ -156,7 +151,7 @@ listeners:
        mock.mock_open().return_value,  # ARCH_CONFIG_FILE_RENDERED (write)
    ]
    with mock.patch("builtins.open", m_open):
-        with mock.patch("config_generator.Environment"):
+        with mock.patch("cli.config_generator.Environment"):
            validate_and_render_schema()